summaryrefslogtreecommitdiffstats
path: root/mozglue/linker/szip.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mozglue/linker/szip.cpp')
-rw-r--r--mozglue/linker/szip.cpp593
1 files changed, 593 insertions, 0 deletions
diff --git a/mozglue/linker/szip.cpp b/mozglue/linker/szip.cpp
new file mode 100644
index 000000000..bfc882fbe
--- /dev/null
+++ b/mozglue/linker/szip.cpp
@@ -0,0 +1,593 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <map>
+#include <sys/stat.h>
+#include <string>
+#include <sstream>
+#include <cstring>
+#include <cstdlib>
+#include <zlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "mozilla/Assertions.h"
+#include "mozilla/Scoped.h"
+#include "mozilla/UniquePtr.h"
+#include "SeekableZStream.h"
+#include "Utils.h"
+#include "Logging.h"
+
+Logging Logging::Singleton;
+
+const char *filterName[] = {
+ "none",
+ "thumb",
+ "arm",
+ "x86",
+ "auto"
+};
+
+/* Maximum supported size for chunkSize */
+static const size_t maxChunkSize =
+ 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize),
+ sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1);
+
+class Buffer: public MappedPtr
+{
+public:
+ virtual ~Buffer() { }
+
+ virtual bool Resize(size_t size)
+ {
+ MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (buf == MAP_FAILED)
+ return false;
+ if (*this != MAP_FAILED)
+ memcpy(buf, *this, std::min(size, GetLength()));
+ Assign(buf);
+ return true;
+ }
+
+ bool Fill(Buffer &other)
+ {
+ size_t size = other.GetLength();
+ if (!size || !Resize(size))
+ return false;
+ memcpy(static_cast<void *>(*this), static_cast<void *>(other), size);
+ return true;
+ }
+};
+
+class FileBuffer: public Buffer
+{
+public:
+ bool Init(const char *name, bool writable_ = false)
+ {
+ fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666);
+ if (fd == -1)
+ return false;
+ writable = writable_;
+ return true;
+ }
+
+ virtual bool Resize(size_t size)
+ {
+ if (writable) {
+ if (ftruncate(fd, size) == -1)
+ return false;
+ }
+ Assign(MemoryRange::mmap(nullptr, size,
+ PROT_READ | (writable ? PROT_WRITE : 0),
+ writable ? MAP_SHARED : MAP_PRIVATE, fd, 0));
+ return this != MAP_FAILED;
+ }
+
+ int getFd()
+ {
+ return fd;
+ }
+
+private:
+ AutoCloseFD fd;
+ bool writable;
+};
+
+class FilteredBuffer: public Buffer
+{
+public:
+ void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize)
+ {
+ SeekableZStream::ZStreamFilter filterCB =
+ SeekableZStream::GetFilter(filter);
+ MOZ_ASSERT(filterCB);
+ Fill(other);
+ size_t size = other.GetLength();
+ Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this));
+ size_t avail = 0;
+ /* Filter needs to be applied in chunks. */
+ while (size) {
+ avail = std::min(size, chunkSize);
+ filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)),
+ SeekableZStream::FILTER, data, avail);
+ size -= avail;
+ data += avail;
+ }
+ }
+};
+
+template <typename T>
+class Dictionary: public Buffer
+{
+ typedef T piece;
+ typedef std::pair<piece, int> stat_pair;
+
+ static bool stat_cmp(stat_pair a, stat_pair b)
+ {
+ return a.second < b.second;
+ }
+
+public:
+ Dictionary(Buffer &inBuf, size_t size)
+ {
+ if (!size || !Resize(size))
+ return;
+ DEBUG_LOG("Creating dictionary");
+ piece *origBufPieces = reinterpret_cast<piece *>(
+ static_cast<void *>(inBuf));
+ std::map<piece, int> stats;
+ for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) {
+ stats[origBufPieces[i]]++;
+ }
+ std::vector<stat_pair> statsVec(stats.begin(), stats.end());
+ std::sort(statsVec.begin(), statsVec.end(), stat_cmp);
+
+ piece *dictPieces = reinterpret_cast<piece *>(
+ static_cast<void *>(*this));
+ typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin();
+ for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend();
+ i--, ++it) {
+ dictPieces[i - 1] = it->first;
+ }
+ }
+};
+
+class SzipAction
+{
+public:
+ virtual int run(const char *name, Buffer &origBuf,
+ const char *outName, Buffer &outBuf) = 0;
+
+ virtual ~SzipAction() {}
+};
+
+class SzipDecompress: public SzipAction
+{
+public:
+ int run(const char *name, Buffer &origBuf,
+ const char *outName, Buffer &outBuf);
+};
+
+
+class SzipCompress: public SzipAction
+{
+public:
+ int run(const char *name, Buffer &origBuf,
+ const char *outName, Buffer &outBuf);
+
+ SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter,
+ size_t aDictSize)
+ : chunkSize(aChunkSize ? aChunkSize : 16384)
+ , filter(aFilter)
+ , dictSize(aDictSize)
+ {}
+
+ const static signed char winSizeLog = 15;
+ const static size_t winSize = 1 << winSizeLog;
+
+ const static SeekableZStream::FilterId DEFAULT_FILTER =
+#if defined(TARGET_THUMB)
+ SeekableZStream::BCJ_THUMB;
+#elif defined(TARGET_ARM)
+ SeekableZStream::BCJ_ARM;
+#elif defined(TARGET_X86)
+ SeekableZStream::BCJ_X86;
+#else
+ SeekableZStream::NONE;
+#endif
+
+private:
+
+ int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict,
+ size_t aDictSize, SeekableZStream::FilterId aFilter);
+
+ size_t chunkSize;
+ SeekableZStream::FilterId filter;
+ size_t dictSize;
+};
+
+/* Decompress a seekable compressed stream */
+int SzipDecompress::run(const char *name, Buffer &origBuf,
+ const char *outName, Buffer &outBuf)
+{
+ size_t origSize = origBuf.GetLength();
+ if (origSize < sizeof(SeekableZStreamHeader)) {
+ ERROR("%s is not compressed", name);
+ return 0;
+ }
+
+ SeekableZStream zstream;
+ if (!zstream.Init(origBuf, origSize))
+ return 0;
+
+ size_t size = zstream.GetUncompressedSize();
+
+ /* Give enough room for the uncompressed data */
+ if (!outBuf.Resize(size)) {
+ ERROR("Error resizing %s: %s", outName, strerror(errno));
+ return 1;
+ }
+
+ if (!zstream.Decompress(outBuf, 0, size))
+ return 1;
+
+ return 0;
+}
+
+/* Generate a seekable compressed stream. */
+int SzipCompress::run(const char *name, Buffer &origBuf,
+ const char *outName, Buffer &outBuf)
+{
+ size_t origSize = origBuf.GetLength();
+ if (origSize == 0) {
+ ERROR("Won't compress %s: it's empty", name);
+ return 1;
+ }
+ if (SeekableZStreamHeader::validate(origBuf)) {
+ WARN("Skipping %s: it's already a szip", name);
+ return 0;
+ }
+ bool compressed = false;
+ LOG("Size = %" PRIuSize, origSize);
+
+ /* Allocate a buffer the size of the uncompressed data: we don't want
+ * a compressed file larger than that anyways. */
+ if (!outBuf.Resize(origSize)) {
+ ERROR("Couldn't allocate output buffer: %s", strerror(errno));
+ return 1;
+ }
+
+ /* Find the most appropriate filter */
+ SeekableZStream::FilterId firstFilter, lastFilter;
+ bool scanFilters;
+ if (filter == SeekableZStream::FILTER_MAX) {
+ firstFilter = SeekableZStream::NONE;
+ lastFilter = SeekableZStream::FILTER_MAX;
+ scanFilters = true;
+ } else {
+ firstFilter = lastFilter = filter;
+ ++lastFilter;
+ scanFilters = false;
+ }
+
+ mozilla::UniquePtr<Buffer> filteredBuf;
+ Buffer *origData;
+ for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) {
+ mozilla::UniquePtr<FilteredBuffer> filteredTmp;
+ Buffer tmpBuf;
+ if (f != SeekableZStream::NONE) {
+ DEBUG_LOG("Applying filter \"%s\"", filterName[f]);
+ filteredTmp = mozilla::MakeUnique<FilteredBuffer>();
+ filteredTmp->Filter(origBuf, f, chunkSize);
+ origData = filteredTmp.get();
+ } else {
+ origData = &origBuf;
+ }
+ if (dictSize && !scanFilters) {
+ filteredBuf = mozilla::Move(filteredTmp);
+ break;
+ }
+ DEBUG_LOG("Compressing with no dictionary");
+ if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) {
+ if (tmpBuf.GetLength() < outBuf.GetLength()) {
+ outBuf.Fill(tmpBuf);
+ compressed = true;
+ filter = f;
+ filteredBuf = mozilla::Move(filteredTmp);
+ continue;
+ }
+ }
+ }
+
+ origData = filteredBuf ? filteredBuf.get() : &origBuf;
+
+ if (dictSize) {
+ Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0);
+
+ /* Find the most appropriate dictionary size */
+ size_t firstDictSize, lastDictSize;
+ if (dictSize == (size_t) -1) {
+ /* If we scanned for filters, we effectively already tried dictSize=0 */
+ firstDictSize = scanFilters ? 4096 : 0;
+ lastDictSize = SzipCompress::winSize;
+ } else {
+ firstDictSize = lastDictSize = dictSize;
+ }
+
+ Buffer tmpBuf;
+ for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) {
+ DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d);
+ if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict)
+ + SzipCompress::winSize - d, d, filter))
+ continue;
+ if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) {
+ outBuf.Fill(tmpBuf);
+ compressed = true;
+ dictSize = d;
+ }
+ }
+ }
+
+ if (!compressed) {
+ outBuf.Fill(origBuf);
+ LOG("Not compressed");
+ return 0;
+ }
+
+ if (dictSize == (size_t) -1)
+ dictSize = 0;
+
+ DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize,
+ filterName[filter], dictSize);
+ LOG("Compressed size is %" PRIuSize, outBuf.GetLength());
+
+ /* Sanity check */
+ Buffer tmpBuf;
+ SzipDecompress decompress;
+ if (decompress.run("buffer", outBuf, "buffer", tmpBuf))
+ return 1;
+
+ size_t size = tmpBuf.GetLength();
+ if (size != origSize) {
+ ERROR("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize);
+ return 1;
+ }
+ if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) {
+ ERROR("Compression error: content mismatch");
+ return 1;
+ }
+ return 0;
+}
+
+int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf,
+ const unsigned char *aDict, size_t aDictSize,
+ SeekableZStream::FilterId aFilter)
+{
+ size_t origSize = origBuf.GetLength();
+ MOZ_ASSERT(origSize != 0);
+
+ /* Expected total number of chunks */
+ size_t nChunks = ((origSize + chunkSize - 1) / chunkSize);
+
+ /* The first chunk is going to be stored after the header, the dictionary
+ * and the offset table */
+ size_t offset = sizeof(SeekableZStreamHeader) + aDictSize
+ + nChunks * sizeof(uint32_t);
+
+ if (offset >= origSize)
+ return 1;
+
+ /* Allocate a buffer the size of the uncompressed data: we don't want
+ * a compressed file larger than that anyways. */
+ if (!outBuf.Resize(origSize)) {
+ ERROR("Couldn't allocate output buffer: %s", strerror(errno));
+ return 1;
+ }
+
+ SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader;
+ unsigned char *dictionary = static_cast<unsigned char *>(
+ outBuf + sizeof(SeekableZStreamHeader));
+ le_uint32 *entry =
+ reinterpret_cast<le_uint32 *>(dictionary + aDictSize);
+
+ /* Initialize header */
+ header->chunkSize = chunkSize;
+ header->dictSize = aDictSize;
+ header->totalSize = offset;
+ header->windowBits = -SzipCompress::winSizeLog; // Raw stream,
+ // window size of 32k.
+ header->filter = aFilter;
+ if (aDictSize)
+ memcpy(dictionary, aDict, aDictSize);
+
+ /* Initialize zlib structure */
+ z_stream zStream;
+ memset(&zStream, 0, sizeof(zStream));
+ zStream.avail_out = origSize - offset;
+ zStream.next_out = static_cast<Bytef*>(outBuf) + offset;
+
+ size_t avail = 0;
+ size_t size = origSize;
+ unsigned char *data = reinterpret_cast<unsigned char *>(
+ static_cast<void *>(origBuf));
+ while (size) {
+ avail = std::min(size, chunkSize);
+
+ /* Compress chunk */
+ int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits,
+ MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+ if (aDictSize)
+ deflateSetDictionary(&zStream, dictionary, aDictSize);
+ MOZ_ASSERT(ret == Z_OK);
+ zStream.avail_in = avail;
+ zStream.next_in = data;
+ ret = deflate(&zStream, Z_FINISH);
+ /* Under normal conditions, deflate returns Z_STREAM_END. If there is not
+ * enough room to compress, deflate returns Z_OK and avail_out is 0. We
+ * still want to deflateEnd in that case, so fall through. It will bail
+ * on the avail_out test that follows. */
+ MOZ_ASSERT(ret == Z_STREAM_END || ret == Z_OK);
+ ret = deflateEnd(&zStream);
+ MOZ_ASSERT(ret == Z_OK);
+ if (zStream.avail_out <= 0)
+ return 1;
+
+ size_t len = origSize - offset - zStream.avail_out;
+
+ /* Adjust headers */
+ header->totalSize += len;
+ *entry++ = offset;
+ header->nChunks++;
+
+ /* Prepare for next iteration */
+ size -= avail;
+ data += avail;
+ offset += len;
+ }
+ header->lastChunkSize = avail;
+ MOZ_ASSERT(header->totalSize == offset);
+ MOZ_ASSERT(header->nChunks == nChunks);
+
+ if (!outBuf.Resize(offset)) {
+ ERROR("Error truncating output: %s", strerror(errno));
+ return 1;
+ }
+
+ return 0;
+
+}
+
+bool GetSize(const char *str, size_t *out)
+{
+ char *end;
+ MOZ_ASSERT(out);
+ errno = 0;
+ *out = strtol(str, &end, 10);
+ return (!errno && !*end);
+}
+
+int main(int argc, char* argv[])
+{
+ mozilla::UniquePtr<SzipAction> action;
+ char **firstArg;
+ bool compress = true;
+ size_t chunkSize = 0;
+ SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER;
+ size_t dictSize = (size_t) 0;
+
+ Logging::Init();
+
+ for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) {
+ if (!firstArg[0] || firstArg[0][0] != '-')
+ break;
+ if (strcmp(firstArg[0], "-d") == 0) {
+ compress = false;
+ } else if (strcmp(firstArg[0], "-c") == 0) {
+ firstArg++;
+ argc--;
+ if (!firstArg[0])
+ break;
+ if (!GetSize(firstArg[0], &chunkSize) || !chunkSize ||
+ (chunkSize % 4096) || (chunkSize > maxChunkSize)) {
+ ERROR("Invalid chunk size");
+ return 1;
+ }
+ } else if (strcmp(firstArg[0], "-f") == 0) {
+ firstArg++;
+ argc--;
+ if (!firstArg[0])
+ break;
+ bool matched = false;
+ for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) {
+ if (strcmp(firstArg[0], filterName[i]) == 0) {
+ filter = static_cast<SeekableZStream::FilterId>(i);
+ matched = true;
+ break;
+ }
+ }
+ if (!matched) {
+ ERROR("Invalid filter");
+ return 1;
+ }
+ } else if (strcmp(firstArg[0], "-D") == 0) {
+ firstArg++;
+ argc--;
+ if (!firstArg[0])
+ break;
+ if (strcmp(firstArg[0], "auto") == 0) {
+ dictSize = -1;
+ } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) {
+ ERROR("Invalid dictionary size");
+ return 1;
+ }
+ }
+ }
+
+ if (argc != 2 || !firstArg[0]) {
+ LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file",
+ argv[0]);
+ return 1;
+ }
+
+ if (compress) {
+ action.reset(new SzipCompress(chunkSize, filter, dictSize));
+ } else {
+ if (chunkSize) {
+ ERROR("-c is incompatible with -d");
+ return 1;
+ }
+ if (dictSize) {
+ ERROR("-D is incompatible with -d");
+ return 1;
+ }
+ action.reset(new SzipDecompress());
+ }
+
+ std::stringstream tmpOutStream;
+ tmpOutStream << firstArg[0] << ".sz." << getpid();
+ std::string tmpOut(tmpOutStream.str());
+ int ret;
+ struct stat st;
+ {
+ FileBuffer origBuf;
+ if (!origBuf.Init(firstArg[0])) {
+ ERROR("Couldn't open %s: %s", firstArg[0], strerror(errno));
+ return 1;
+ }
+
+ ret = fstat(origBuf.getFd(), &st);
+ if (ret == -1) {
+ ERROR("Couldn't stat %s: %s", firstArg[0], strerror(errno));
+ return 1;
+ }
+
+ size_t origSize = st.st_size;
+
+ /* Mmap the original file */
+ if (!origBuf.Resize(origSize)) {
+ ERROR("Couldn't mmap %s: %s", firstArg[0], strerror(errno));
+ return 1;
+ }
+
+ /* Create the compressed file */
+ FileBuffer outBuf;
+ if (!outBuf.Init(tmpOut.c_str(), true)) {
+ ERROR("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno));
+ return 1;
+ }
+
+ ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf);
+ if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) {
+ st.st_size = 0;
+ }
+ }
+
+ if ((ret == 0) && st.st_size) {
+ rename(tmpOut.c_str(), firstArg[0]);
+ } else {
+ unlink(tmpOut.c_str());
+ }
+ return ret;
+}