/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ #include <algorithm> #include <map> #include <sys/stat.h> #include <string> #include <sstream> #include <cstring> #include <cstdlib> #include <zlib.h> #include <fcntl.h> #include <errno.h> #include "mozilla/Assertions.h" #include "mozilla/Scoped.h" #include "mozilla/UniquePtr.h" #include "SeekableZStream.h" #include "Utils.h" #include "Logging.h" Logging Logging::Singleton; const char *filterName[] = { "none", "thumb", "arm", "x86", "auto" }; /* Maximum supported size for chunkSize */ static const size_t maxChunkSize = 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize), sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1); class Buffer: public MappedPtr { public: virtual ~Buffer() { } virtual bool Resize(size_t size) { MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (buf == MAP_FAILED) return false; if (*this != MAP_FAILED) memcpy(buf, *this, std::min(size, GetLength())); Assign(buf); return true; } bool Fill(Buffer &other) { size_t size = other.GetLength(); if (!size || !Resize(size)) return false; memcpy(static_cast<void *>(*this), static_cast<void *>(other), size); return true; } }; class FileBuffer: public Buffer { public: bool Init(const char *name, bool writable_ = false) { fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666); if (fd == -1) return false; writable = writable_; return true; } virtual bool Resize(size_t size) { if (writable) { if (ftruncate(fd, size) == -1) return false; } Assign(MemoryRange::mmap(nullptr, size, PROT_READ | (writable ? PROT_WRITE : 0), writable ? MAP_SHARED : MAP_PRIVATE, fd, 0)); return this != MAP_FAILED; } int getFd() { return fd; } private: AutoCloseFD fd; bool writable; }; class FilteredBuffer: public Buffer { public: void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize) { SeekableZStream::ZStreamFilter filterCB = SeekableZStream::GetFilter(filter); MOZ_ASSERT(filterCB); Fill(other); size_t size = other.GetLength(); Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this)); size_t avail = 0; /* Filter needs to be applied in chunks. */ while (size) { avail = std::min(size, chunkSize); filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)), SeekableZStream::FILTER, data, avail); size -= avail; data += avail; } } }; template <typename T> class Dictionary: public Buffer { typedef T piece; typedef std::pair<piece, int> stat_pair; static bool stat_cmp(stat_pair a, stat_pair b) { return a.second < b.second; } public: Dictionary(Buffer &inBuf, size_t size) { if (!size || !Resize(size)) return; DEBUG_LOG("Creating dictionary"); piece *origBufPieces = reinterpret_cast<piece *>( static_cast<void *>(inBuf)); std::map<piece, int> stats; for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) { stats[origBufPieces[i]]++; } std::vector<stat_pair> statsVec(stats.begin(), stats.end()); std::sort(statsVec.begin(), statsVec.end(), stat_cmp); piece *dictPieces = reinterpret_cast<piece *>( static_cast<void *>(*this)); typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin(); for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend(); i--, ++it) { dictPieces[i - 1] = it->first; } } }; class SzipAction { public: virtual int run(const char *name, Buffer &origBuf, const char *outName, Buffer &outBuf) = 0; virtual ~SzipAction() {} }; class SzipDecompress: public SzipAction { public: int run(const char *name, Buffer &origBuf, const char *outName, Buffer &outBuf); }; class SzipCompress: public SzipAction { public: int run(const char *name, Buffer &origBuf, const char *outName, Buffer &outBuf); SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter, size_t aDictSize) : chunkSize(aChunkSize ? aChunkSize : 16384) , filter(aFilter) , dictSize(aDictSize) {} const static signed char winSizeLog = 15; const static size_t winSize = 1 << winSizeLog; const static SeekableZStream::FilterId DEFAULT_FILTER = #if defined(TARGET_THUMB) SeekableZStream::BCJ_THUMB; #elif defined(TARGET_ARM) SeekableZStream::BCJ_ARM; #elif defined(TARGET_X86) SeekableZStream::BCJ_X86; #else SeekableZStream::NONE; #endif private: int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict, size_t aDictSize, SeekableZStream::FilterId aFilter); size_t chunkSize; SeekableZStream::FilterId filter; size_t dictSize; }; /* Decompress a seekable compressed stream */ int SzipDecompress::run(const char *name, Buffer &origBuf, const char *outName, Buffer &outBuf) { size_t origSize = origBuf.GetLength(); if (origSize < sizeof(SeekableZStreamHeader)) { ERROR("%s is not compressed", name); return 0; } SeekableZStream zstream; if (!zstream.Init(origBuf, origSize)) return 0; size_t size = zstream.GetUncompressedSize(); /* Give enough room for the uncompressed data */ if (!outBuf.Resize(size)) { ERROR("Error resizing %s: %s", outName, strerror(errno)); return 1; } if (!zstream.Decompress(outBuf, 0, size)) return 1; return 0; } /* Generate a seekable compressed stream. */ int SzipCompress::run(const char *name, Buffer &origBuf, const char *outName, Buffer &outBuf) { size_t origSize = origBuf.GetLength(); if (origSize == 0) { ERROR("Won't compress %s: it's empty", name); return 1; } if (SeekableZStreamHeader::validate(origBuf)) { WARN("Skipping %s: it's already a szip", name); return 0; } bool compressed = false; LOG("Size = %" PRIuSize, origSize); /* Allocate a buffer the size of the uncompressed data: we don't want * a compressed file larger than that anyways. */ if (!outBuf.Resize(origSize)) { ERROR("Couldn't allocate output buffer: %s", strerror(errno)); return 1; } /* Find the most appropriate filter */ SeekableZStream::FilterId firstFilter, lastFilter; bool scanFilters; if (filter == SeekableZStream::FILTER_MAX) { firstFilter = SeekableZStream::NONE; lastFilter = SeekableZStream::FILTER_MAX; scanFilters = true; } else { firstFilter = lastFilter = filter; ++lastFilter; scanFilters = false; } mozilla::UniquePtr<Buffer> filteredBuf; Buffer *origData; for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) { mozilla::UniquePtr<FilteredBuffer> filteredTmp; Buffer tmpBuf; if (f != SeekableZStream::NONE) { DEBUG_LOG("Applying filter \"%s\"", filterName[f]); filteredTmp = mozilla::MakeUnique<FilteredBuffer>(); filteredTmp->Filter(origBuf, f, chunkSize); origData = filteredTmp.get(); } else { origData = &origBuf; } if (dictSize && !scanFilters) { filteredBuf = mozilla::Move(filteredTmp); break; } DEBUG_LOG("Compressing with no dictionary"); if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) { if (tmpBuf.GetLength() < outBuf.GetLength()) { outBuf.Fill(tmpBuf); compressed = true; filter = f; filteredBuf = mozilla::Move(filteredTmp); continue; } } } origData = filteredBuf ? filteredBuf.get() : &origBuf; if (dictSize) { Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0); /* Find the most appropriate dictionary size */ size_t firstDictSize, lastDictSize; if (dictSize == (size_t) -1) { /* If we scanned for filters, we effectively already tried dictSize=0 */ firstDictSize = scanFilters ? 4096 : 0; lastDictSize = SzipCompress::winSize; } else { firstDictSize = lastDictSize = dictSize; } Buffer tmpBuf; for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) { DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d); if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict) + SzipCompress::winSize - d, d, filter)) continue; if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) { outBuf.Fill(tmpBuf); compressed = true; dictSize = d; } } } if (!compressed) { outBuf.Fill(origBuf); LOG("Not compressed"); return 0; } if (dictSize == (size_t) -1) dictSize = 0; DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize, filterName[filter], dictSize); LOG("Compressed size is %" PRIuSize, outBuf.GetLength()); /* Sanity check */ Buffer tmpBuf; SzipDecompress decompress; if (decompress.run("buffer", outBuf, "buffer", tmpBuf)) return 1; size_t size = tmpBuf.GetLength(); if (size != origSize) { ERROR("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize); return 1; } if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) { ERROR("Compression error: content mismatch"); return 1; } return 0; } int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict, size_t aDictSize, SeekableZStream::FilterId aFilter) { size_t origSize = origBuf.GetLength(); MOZ_ASSERT(origSize != 0); /* Expected total number of chunks */ size_t nChunks = ((origSize + chunkSize - 1) / chunkSize); /* The first chunk is going to be stored after the header, the dictionary * and the offset table */ size_t offset = sizeof(SeekableZStreamHeader) + aDictSize + nChunks * sizeof(uint32_t); if (offset >= origSize) return 1; /* Allocate a buffer the size of the uncompressed data: we don't want * a compressed file larger than that anyways. */ if (!outBuf.Resize(origSize)) { ERROR("Couldn't allocate output buffer: %s", strerror(errno)); return 1; } SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader; unsigned char *dictionary = static_cast<unsigned char *>( outBuf + sizeof(SeekableZStreamHeader)); le_uint32 *entry = reinterpret_cast<le_uint32 *>(dictionary + aDictSize); /* Initialize header */ header->chunkSize = chunkSize; header->dictSize = aDictSize; header->totalSize = offset; header->windowBits = -SzipCompress::winSizeLog; // Raw stream, // window size of 32k. header->filter = aFilter; if (aDictSize) memcpy(dictionary, aDict, aDictSize); /* Initialize zlib structure */ z_stream zStream; memset(&zStream, 0, sizeof(zStream)); zStream.avail_out = origSize - offset; zStream.next_out = static_cast<Bytef*>(outBuf) + offset; size_t avail = 0; size_t size = origSize; unsigned char *data = reinterpret_cast<unsigned char *>( static_cast<void *>(origBuf)); while (size) { avail = std::min(size, chunkSize); /* Compress chunk */ int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); if (aDictSize) deflateSetDictionary(&zStream, dictionary, aDictSize); MOZ_ASSERT(ret == Z_OK); zStream.avail_in = avail; zStream.next_in = data; ret = deflate(&zStream, Z_FINISH); /* Under normal conditions, deflate returns Z_STREAM_END. If there is not * enough room to compress, deflate returns Z_OK and avail_out is 0. We * still want to deflateEnd in that case, so fall through. It will bail * on the avail_out test that follows. */ MOZ_ASSERT(ret == Z_STREAM_END || ret == Z_OK); ret = deflateEnd(&zStream); MOZ_ASSERT(ret == Z_OK); if (zStream.avail_out <= 0) return 1; size_t len = origSize - offset - zStream.avail_out; /* Adjust headers */ header->totalSize += len; *entry++ = offset; header->nChunks++; /* Prepare for next iteration */ size -= avail; data += avail; offset += len; } header->lastChunkSize = avail; MOZ_ASSERT(header->totalSize == offset); MOZ_ASSERT(header->nChunks == nChunks); if (!outBuf.Resize(offset)) { ERROR("Error truncating output: %s", strerror(errno)); return 1; } return 0; } bool GetSize(const char *str, size_t *out) { char *end; MOZ_ASSERT(out); errno = 0; *out = strtol(str, &end, 10); return (!errno && !*end); } int main(int argc, char* argv[]) { mozilla::UniquePtr<SzipAction> action; char **firstArg; bool compress = true; size_t chunkSize = 0; SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER; size_t dictSize = (size_t) 0; Logging::Init(); for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) { if (!firstArg[0] || firstArg[0][0] != '-') break; if (strcmp(firstArg[0], "-d") == 0) { compress = false; } else if (strcmp(firstArg[0], "-c") == 0) { firstArg++; argc--; if (!firstArg[0]) break; if (!GetSize(firstArg[0], &chunkSize) || !chunkSize || (chunkSize % 4096) || (chunkSize > maxChunkSize)) { ERROR("Invalid chunk size"); return 1; } } else if (strcmp(firstArg[0], "-f") == 0) { firstArg++; argc--; if (!firstArg[0]) break; bool matched = false; for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) { if (strcmp(firstArg[0], filterName[i]) == 0) { filter = static_cast<SeekableZStream::FilterId>(i); matched = true; break; } } if (!matched) { ERROR("Invalid filter"); return 1; } } else if (strcmp(firstArg[0], "-D") == 0) { firstArg++; argc--; if (!firstArg[0]) break; if (strcmp(firstArg[0], "auto") == 0) { dictSize = -1; } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) { ERROR("Invalid dictionary size"); return 1; } } } if (argc != 2 || !firstArg[0]) { LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file", argv[0]); return 1; } if (compress) { action.reset(new SzipCompress(chunkSize, filter, dictSize)); } else { if (chunkSize) { ERROR("-c is incompatible with -d"); return 1; } if (dictSize) { ERROR("-D is incompatible with -d"); return 1; } action.reset(new SzipDecompress()); } std::stringstream tmpOutStream; tmpOutStream << firstArg[0] << ".sz." << getpid(); std::string tmpOut(tmpOutStream.str()); int ret; struct stat st; { FileBuffer origBuf; if (!origBuf.Init(firstArg[0])) { ERROR("Couldn't open %s: %s", firstArg[0], strerror(errno)); return 1; } ret = fstat(origBuf.getFd(), &st); if (ret == -1) { ERROR("Couldn't stat %s: %s", firstArg[0], strerror(errno)); return 1; } size_t origSize = st.st_size; /* Mmap the original file */ if (!origBuf.Resize(origSize)) { ERROR("Couldn't mmap %s: %s", firstArg[0], strerror(errno)); return 1; } /* Create the compressed file */ FileBuffer outBuf; if (!outBuf.Init(tmpOut.c_str(), true)) { ERROR("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno)); return 1; } ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf); if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) { st.st_size = 0; } } if ((ret == 0) && st.st_size) { rename(tmpOut.c_str(), firstArg[0]); } else { unlink(tmpOut.c_str()); } return ret; }