From 2c8dc0b855c38c5204d398ad306fa9cf43be1ada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Mr=C3=A1zek?= Date: Thu, 26 Sep 2013 02:58:09 +0200 Subject: Compression algo dependencies, still need hackery... --- CMakeLists.txt | 11 +- depends/lzma/CMakeLists.txt | 54 + depends/lzma/LICENSE.txt | 9 + depends/lzma/easylzma_test.c | 282 ++ depends/lzma/elzma.c | 557 ++++ depends/lzma/include/common.h | 118 + depends/lzma/include/compress.h | 77 + depends/lzma/include/decompress.h | 58 + depends/lzma/include/simple.h | 37 + depends/lzma/pavlov/7zCrc.c | 35 + depends/lzma/pavlov/7zCrc.h | 24 + depends/lzma/pavlov/LzFind.c | 779 +++++ depends/lzma/pavlov/LzFind.h | 107 + depends/lzma/pavlov/LzHash.h | 62 + depends/lzma/pavlov/LzmaDec.c | 1076 +++++++ depends/lzma/pavlov/LzmaDec.h | 220 ++ depends/lzma/pavlov/LzmaEnc.c | 2349 +++++++++++++++ depends/lzma/pavlov/LzmaEnc.h | 71 + depends/lzma/pavlov/LzmaLib.c | 41 + depends/lzma/pavlov/LzmaLib.h | 137 + depends/lzma/pavlov/Types.h | 87 + depends/lzma/wrapper/common_internal.c | 46 + depends/lzma/wrapper/common_internal.h | 60 + depends/lzma/wrapper/compress.c | 297 ++ depends/lzma/wrapper/decompress.c | 263 ++ depends/lzma/wrapper/lzip_header.c | 96 + depends/lzma/wrapper/lzip_header.h | 11 + depends/lzma/wrapper/lzma_header.c | 134 + depends/lzma/wrapper/lzma_header.h | 10 + depends/lzma/wrapper/simple.c | 139 + depends/pack200/CMakeLists.txt | 43 + depends/pack200/include/unpack200.h | 1 + depends/pack200/src/bands.cpp | 451 +++ depends/pack200/src/bands.h | 492 +++ depends/pack200/src/bytes.cpp | 217 ++ depends/pack200/src/bytes.h | 284 ++ depends/pack200/src/coding.cpp | 1049 +++++++ depends/pack200/src/coding.h | 270 ++ depends/pack200/src/constants.h | 442 +++ depends/pack200/src/defines.h | 136 + depends/pack200/src/main.cpp | 489 +++ depends/pack200/src/unpack.cpp | 5105 ++++++++++++++++++++++++++++++++ depends/pack200/src/unpack.h | 585 ++++ depends/pack200/src/utils.cpp | 91 + depends/pack200/src/utils.h | 54 + depends/pack200/src/zip.cpp | 610 ++++ depends/pack200/src/zip.h | 130 + logic/OneSixUpdate.cpp | 5 +- logic/lists/MinecraftVersionList.cpp | 2 +- logic/net/ByteArrayDownload.cpp | 2 +- logic/net/DownloadJob.cpp | 95 +- logic/net/DownloadJob.h | 9 +- 52 files changed, 17769 insertions(+), 40 deletions(-) create mode 100644 depends/lzma/CMakeLists.txt create mode 100644 depends/lzma/LICENSE.txt create mode 100644 depends/lzma/easylzma_test.c create mode 100644 depends/lzma/elzma.c create mode 100644 depends/lzma/include/common.h create mode 100644 depends/lzma/include/compress.h create mode 100644 depends/lzma/include/decompress.h create mode 100644 depends/lzma/include/simple.h create mode 100755 depends/lzma/pavlov/7zCrc.c create mode 100755 depends/lzma/pavlov/7zCrc.h create mode 100755 depends/lzma/pavlov/LzFind.c create mode 100755 depends/lzma/pavlov/LzFind.h create mode 100755 depends/lzma/pavlov/LzHash.h create mode 100755 depends/lzma/pavlov/LzmaDec.c create mode 100755 depends/lzma/pavlov/LzmaDec.h create mode 100755 depends/lzma/pavlov/LzmaEnc.c create mode 100755 depends/lzma/pavlov/LzmaEnc.h create mode 100755 depends/lzma/pavlov/LzmaLib.c create mode 100755 depends/lzma/pavlov/LzmaLib.h create mode 100755 depends/lzma/pavlov/Types.h create mode 100644 depends/lzma/wrapper/common_internal.c create mode 100644 depends/lzma/wrapper/common_internal.h create mode 100644 depends/lzma/wrapper/compress.c create mode 100644 depends/lzma/wrapper/decompress.c create mode 100644 depends/lzma/wrapper/lzip_header.c create mode 100644 depends/lzma/wrapper/lzip_header.h create mode 100644 depends/lzma/wrapper/lzma_header.c create mode 100644 depends/lzma/wrapper/lzma_header.h create mode 100644 depends/lzma/wrapper/simple.c create mode 100644 depends/pack200/CMakeLists.txt create mode 100644 depends/pack200/include/unpack200.h create mode 100644 depends/pack200/src/bands.cpp create mode 100644 depends/pack200/src/bands.h create mode 100644 depends/pack200/src/bytes.cpp create mode 100644 depends/pack200/src/bytes.h create mode 100644 depends/pack200/src/coding.cpp create mode 100644 depends/pack200/src/coding.h create mode 100644 depends/pack200/src/constants.h create mode 100644 depends/pack200/src/defines.h create mode 100644 depends/pack200/src/main.cpp create mode 100644 depends/pack200/src/unpack.cpp create mode 100644 depends/pack200/src/unpack.h create mode 100644 depends/pack200/src/utils.cpp create mode 100644 depends/pack200/src/utils.h create mode 100644 depends/pack200/src/zip.cpp create mode 100644 depends/pack200/src/zip.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 04886184..f16a5620 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,10 @@ include_directories(${Qt5Widgets_INCLUDE_DIRS}) add_subdirectory(depends/quazip) include_directories(depends/quazip) +# Add lzma +add_subdirectory(depends/lzma) +include_directories(depends/lzma/include) + # Add the java launcher add_subdirectory(depends/launcher) @@ -60,6 +64,8 @@ include_directories(${LIBSETTINGS_INCLUDE_DIR}) add_subdirectory(depends/groupview) include_directories(${LIBGROUPVIEW_INCLUDE_DIR}) +#pack 200 +add_subdirectory(depends/pack200) ################################ SET UP BUILD OPTIONS ################################ @@ -345,8 +351,9 @@ ADD_EXECUTABLE(MultiMC MACOSX_BUNDLE WIN32 # Link QT5_USE_MODULES(MultiMC Widgets Network Xml) -TARGET_LINK_LIBRARIES(MultiMC quazip libUtil libSettings libGroupView ${MultiMC_LINK_ADDITIONAL_LIBS}) -ADD_DEPENDENCIES(MultiMC MultiMCLauncher libUtil libSettings libGroupView) +TARGET_LINK_LIBRARIES(MultiMC quazip lzma libUtil libSettings libGroupView +${MultiMC_LINK_ADDITIONAL_LIBS}) +#ADD_DEPENDENCIES(MultiMC MultiMCLauncher libUtil libSettings libGroupView) option(BUILD_KEYRING_TEST "Build the simple keyring test binary" OFF) diff --git a/depends/lzma/CMakeLists.txt b/depends/lzma/CMakeLists.txt new file mode 100644 index 00000000..4df2b762 --- /dev/null +++ b/depends/lzma/CMakeLists.txt @@ -0,0 +1,54 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) + +PROJECT(lzma) + +IF (WIN32) + ADD_DEFINITIONS(-DWIN32) +ENDIF (WIN32) + +SET(SRCS +# original code by Igor Pavlov +# Lzma version 4.63 +# Minified ~_~ +pavlov/7zCrc.c +pavlov/7zCrc.h +pavlov/LzFind.c +pavlov/LzFind.h +pavlov/LzHash.h +pavlov/LzmaDec.c +pavlov/LzmaDec.h +pavlov/LzmaEnc.c +pavlov/LzmaEnc.h +pavlov/LzmaLib.c +pavlov/LzmaLib.h +pavlov/Types.h + +# Public headers +include/common.h +include/compress.h +include/decompress.h +include/simple.h + +# Wrapper by Lloyd Hilaiel (lloyd@hilaiel.com) +wrapper/common_internal.c +wrapper/common_internal.h +wrapper/compress.c +wrapper/decompress.c +wrapper/simple.c +wrapper/lzip_header.c +wrapper/lzip_header.h +wrapper/lzma_header.c +wrapper/lzma_header.h +) + +# an include directory to allow easylzma implementation to find public +# headers +INCLUDE_DIRECTORIES(include) +ADD_LIBRARY(lzma STATIC ${SRCS}) + +# lzma compress/decompress tool +ADD_EXECUTABLE(elzma elzma.c) +TARGET_LINK_LIBRARIES(elzma lzma) +# a simple test... +ADD_EXECUTABLE(easylzma_test easylzma_test.c) +TARGET_LINK_LIBRARIES(easylzma_test lzma) diff --git a/depends/lzma/LICENSE.txt b/depends/lzma/LICENSE.txt new file mode 100644 index 00000000..a8a34e6a --- /dev/null +++ b/depends/lzma/LICENSE.txt @@ -0,0 +1,9 @@ +# Written in 2009 by Lloyd Hilaiel +# Butchered in 2013 by Petr Mrazek +# +# License +# +# All the cruft you find here is public domain. You don't have to credit +# anyone to use this code, but my personal request is that you mention +# Igor Pavlov for his hard, high quality work. +# diff --git a/depends/lzma/easylzma_test.c b/depends/lzma/easylzma_test.c new file mode 100644 index 00000000..69858728 --- /dev/null +++ b/depends/lzma/easylzma_test.c @@ -0,0 +1,282 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + * + * Various compiled-in tests for the easylzma library which excercise + * API correctness and handling of corrupt data. + */ + +#include "simple.h" + +#include +#include + +static const char *sampleData = + "Overview\n" + "\n" + "Easylzma is a C library and command line tools for LZMA compression and \n" + "decompression. It uses a Igor Pavlov's reference implementation and SDK\n" + "written in C.\n" + "\n" + "License\n" + "\n" + "All the cruft you find here is public domain. You don't have to credit\n" + "anyone to use this code, but my personal request is that you mention\n" + "Igor Pavlov for his hard, high quality work.\n" + "\n" + "Project Goals\n" + "\n" + "1. A tiny C wrapper and portable build system around a subset of\n" + " Igor Pavlov's public domain LZMA compression and decompression\n" + " implementation.\n" + "2. A tiny and straighforward API\n" + "3. Support for multiple different prominent LZMA file formats (see section on\n" + " file formats below)\n" + "4. easy to build and use everywhere (doze and nix alike)\n" + "5. public domain licensing through and through. (hats off to Igor)\n" + "\n" + "Current State:\n" + "\n" + "THIS IS A WORK IN PROGRESS. The code here should be considered pre-alpha,\n" + "and this should only be used by tinkerers or hackers at this point. Once\n" + "feature completion is attained this message will be updated. See the\n" + "TODO file distributed with the source for remaining work to be done.\n" + "\n" + "Platforms Supported\n" + "\n" + "0.0.2 has been successfully compiled and run basic round trip testing\n" + "on the following platforms & compilers:\n" + "\n" + " * win32 - visual studio 2005\n" + " * osx - 10.4 & 10.5 (intel)\n" + " * netbsd ppc - 4.0.1 with gcc 4.1.2\n" + " (NOTE: memory allocation errors when dict size is default)\n" + " * freebsd 6.1 - amd64 gcc 3.4.4\n" + "\n" + "Features\n" + "\n" + "XXX: write me (and the code)\n" + "\n" + "Usage\n" + "\n" + "XXX: write me (and the code)\n" + "\n" + "The Saga of LZMA File Formats, and a couple cents.\n" + "\n" + "As far as I can tell, there are at least four different ways to put LZMA\n" + "compressed data in a stream:\n" + "\n" + "1. The LZMA-Alone format, which consists of a 13 byte header including\n" + " compression properties, dictionary size, and the uncompressed size of\n" + " the file, followed by compressed data. This format has some support\n" + " in Igor Pavlov's reference implementation and is in widespread use, as\n" + " it's supported by lzmautils: http://tukaani.org/lzma/\n" + "\n" + " The canonical (afaict) implementation of this format (lzmautis) is\n" + " BSD licensed.\n" + "\n" + "2. The lzip format (http://www.nongnu.org/lzip/lzip.html) - which\n" + " includes a CRC footer and leading \"magic number\". The former\n" + " affords data integrity gaurantees, while the latter simplifies\n" + " heuristic determination of file format. This format looks to have\n" + " reasonably widespread usage, though not quite as significant as\n" + " LZMA-Alone.\n" + "\n" + " The only implementation of this format I can find (lzip) is GPL licensed.\n" + "\n" + "3. the xz format ( http://tukaani.org/xz/xz-file-format.txt ) which is\n" + " a more complex representation that includes CRC support and a magic\n" + " number. This format is to be supported by the next iteration of\n" + " XZ Utils which is currently in beta. The source may be obtained\n" + " here: git://ctrl.tukaani.org/xz.git\n" + "\n" + " This format will address some criticisms to the LZMA-Alone format and\n" + " was developed collaboratively by Lasse Collin (the current maintainer\n" + " of XZ utils) and Igor Pavlov (the author of 7zip and the refrence\n" + " implementation of LZMA).\n" + "\n" + " The xz format will employ LZMA2 which consists of extensions on top\n" + " of LZMA, in the xz utils maintainer's words:\n" + "\n" + " \"The primary compression algorithm in .xz is currently LZMA2, which\n" + " is an extension on top of the orignal LZMA to fix a few practical\n" + " issues, like adding support for flushing the encoder (equivalent\n" + " to zlib's Z_SYNC_FLUSH), which isn't possible with the original\n" + " LZMA.\"\n" + "\n" + " Again, maintainers words, regarding licensing:\n" + "\n" + " \"XZ Utils currently contains a zlib-like compression library and a \n" + " gzip-like command line tool. It's currently under LGPLv2.1+ but I will \n" + " put it into the public domain before the first stable release.\"\n" + "\n" + "4. The 7zip disk format which can contain multiple files possibly stored in\n" + " LZMA compressed format.\n" + "\n" + "Given the state of things, the goal of this project is to develop something\n" + "based on the existing formats, and quickly leverage code generated by the XZ\n" + "Utils project, or simply kill this thing if that project produces something\n" + "that's easy to embed and has a clean API at a similar level of abstraction\n" + "as easylzma.\n" + "\n" + "lloyd - sometime in oh nine.\n"; + +/* a test that we can round trip compress/decompress data using LZMA or LZIP + * formats */ +static int roundTripTest(elzma_file_format format) +{ + int rc; + unsigned char *compressed; + unsigned char *decompressed; + size_t sz; + + rc = simpleCompress(format, (unsigned char *)sampleData, strlen(sampleData), &compressed, + &sz); + + if (rc != ELZMA_E_OK) + return rc; + + /* gross assurance that compression is actually compressing */ + if (sz > strlen(sampleData)) + { + free(compressed); + return 1; + } + + rc = simpleDecompress(format, compressed, sz, &decompressed, &sz); + + free(compressed); + + if (rc != ELZMA_E_OK) + return rc; + + if (sz != strlen(sampleData) || 0 != memcmp(decompressed, sampleData, sz)) + { + free(decompressed); + return 1; + } + + return ELZMA_E_OK; +} + +/* "correct" lzip generated from the lzip program */ +/*|LZIP...3.?..????|*/ +/*|....?e2~........|*/ +static unsigned char correctLzip[] = { + 0x4c, 0x5a, 0x49, 0x50, 0x01, 0x0c, 0x00, 0x33, 0x1b, 0xec, 0x15, 0x07, 0xff, 0xff, + 0xff, 0xff, 0x80, 0x00, 0x00, 0x00, 0xa8, 0x65, 0x32, 0x7e, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* "correct" lzip generated from lzma utils */ +static unsigned char correctLzma[] = {0x5d, 0x00, 0x00, 0x80, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x33, 0x1b, 0xec, 0x14, 0x00, 0x00, 0x00}; + +/* lzip with a bad CRC */ +static unsigned char corruptCRC[] = { + 0x4c, 0x5a, 0x49, 0x50, 0x01, 0x0c, 0x00, 0x33, 0x1b, 0xec, 0x15, 0x07, 0xff, 0xff, + 0xff, 0xff, 0x80, 0x00, 0x00, 0x00, 0xa8, 0x65, 0x31, 0x7e, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* lzip with a bad uncompressed size */ +static unsigned char corruptSize[] = { + 0x4c, 0x5a, 0x49, 0x50, 0x01, 0x0c, 0x00, 0x33, 0x1b, 0xec, 0x15, 0x07, 0xff, 0xff, + 0xff, 0xff, 0x80, 0x00, 0x00, 0x00, 0xa8, 0x65, 0x32, 0x7e, 0x04, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* lzma with a bad uncompressed size */ +static unsigned char corruptSizeLzma[] = {0x5d, 0x00, 0x00, 0x80, 0x00, 0x04, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x33, 0x1b, 0xec, 0x14, 0x00, 0x00, 0x00}; + +/* lzma with a bad uncompressed size */ +static unsigned char corruptSizeLzma2[] = {0x5d, 0x00, 0x00, 0x80, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x33, 0x1b, 0xec, 0x14, 0x00, 0x00, 0x00}; + +/* tests */ +static struct +{ + const char *testName; /* the name of the test */ + int expectedCode; /* the expected output of the test */ + elzma_file_format format; + unsigned char *data; /* input data */ + unsigned int dataSize; +} tests[] = { + {"correct lzip", ELZMA_E_OK, ELZMA_lzip, correctLzip, sizeof(correctLzip)}, + {"lzip as lzma", ELZMA_E_DECOMPRESS_ERROR, ELZMA_lzma, correctLzip, sizeof(correctLzip)}, + {"correct lzma", ELZMA_E_OK, ELZMA_lzma, correctLzma, sizeof(correctLzma)}, + {"lzma as lzip", ELZMA_E_CORRUPT_HEADER, ELZMA_lzip, correctLzma, sizeof(correctLzma)}, + {"corrupt crc", ELZMA_E_CRC32_MISMATCH, ELZMA_lzip, corruptCRC, sizeof(corruptCRC)}, + {"bad lzip size", ELZMA_E_SIZE_MISMATCH, ELZMA_lzip, corruptSize, sizeof(corruptSize)}, + {"bad lzma size", ELZMA_E_INSUFFICIENT_INPUT, ELZMA_lzma, + corruptSizeLzma, sizeof(corruptSizeLzma)}, + {"bad lzma size 2", ELZMA_E_SIZE_MISMATCH, ELZMA_lzma, + corruptSizeLzma2, sizeof(corruptSizeLzma2)}}; + +int main(void) +{ + unsigned int i; + unsigned int testsPassed = 0; + unsigned int testsRun = 0; + + int rc = 0; + + printf("round trip lzma test: "); + fflush(stdout); + testsRun++; + if (ELZMA_E_OK != (rc = roundTripTest(ELZMA_lzma))) + { + printf("fail! (%d)\n", rc); + } + else + { + testsPassed++; + printf("ok\n"); + } + + printf("round trip lzip test: "); + fflush(stdout); + testsRun++; + if (ELZMA_E_OK != (rc = roundTripTest(ELZMA_lzip))) + { + printf("fail (%d)!\n", rc); + } + else + { + testsPassed++; + printf("ok\n"); + } + + /* now run through the tests table */ + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) + { + unsigned char *decompressed = NULL; + size_t sz = 0; + + printf("%s test: ", tests[i].testName); + rc = simpleDecompress(tests[i].format, tests[i].data, tests[i].dataSize, &decompressed, + &sz); + + testsRun++; + if (rc != tests[i].expectedCode) + { + printf("fail - got %d - expected %d\n", rc, tests[i].expectedCode); + } + else + { + testsPassed++; + printf("ok\n"); + free(decompressed); + } + } + + printf("\n%d/%d tests passed\n", testsPassed, testsRun); + + return (testsPassed == testsRun) ? 0 : 1; +} diff --git a/depends/lzma/elzma.c b/depends/lzma/elzma.c new file mode 100644 index 00000000..f715a7b2 --- /dev/null +++ b/depends/lzma/elzma.c @@ -0,0 +1,557 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + * + * command line elzma tool for lzma compression + * + * At time of writing, the primary purpose of this tool is to test the + * easylzma library. + * + * TODO: + * - stdin/stdout support + * - multiple file support + * - much more + */ + +#include "include/compress.h" +#include "include/decompress.h" + +#include +#include +#include + +#ifdef WIN32 +#include +#define unlink _unlink +#else +#include +#endif + +int deleteFile(const char *path) +{ + return unlink(path); +} + +/* a utility to open a pair of files */ +/* XXX: respect overwrite flag */ +static int openFiles(const char *ifname, FILE **inFile, const char *ofname, FILE **outFile, + int overwrite) +{ + *inFile = fopen(ifname, "rb"); + if (*inFile == NULL) + { + fprintf(stderr, "couldn't open '%s' for reading\n", ifname); + return 1; + } + + *outFile = fopen(ofname, "wb"); + if (*outFile == NULL) + { + fprintf(stderr, "couldn't open '%s' for writing\n", ofname); + return 1; + } + + return 0; +} + +#define ELZMA_COMPRESS_USAGE \ + "Compress files using the LZMA algorithm (in place by default).\n" \ + "\n" \ + "Usage: elzma [options] [file]\n" \ + " -1 .. -9 compression level, -1 is fast, -9 is best (default 5)\n" \ + " -f, --force overwrite output files if they exist\n" \ + " -h, --help output this message and exit\n" \ + " -k, --keep don't delete input files\n" \ + " --lzip compress to lzip disk format (.lz extension)\n" \ + " --lzma compress to LZMA-Alone disk format (.lzma extension)\n" \ + " -v, --verbose output verbose status information while compressing\n" \ + " -z, --compress compress files (default when invoking elzma program)\n" \ + " -d, --decompress decompress files (default when invoking unelzma program)\n" \ + "\n" \ + "Advanced Options:\n" \ + " -s --set-max-dict (advanced) specify maximum dictionary size in bytes\n" + +/* parse arguments populating output parameters, return nonzero on failure */ +static int parseCompressArgs(int argc, char **argv, unsigned char *level, char **fname, + unsigned int *maxDictSize, unsigned int *verbose, + unsigned int *keep, unsigned int *overwrite, + elzma_file_format *format) +{ + int i; + + if (argc < 2) + return 1; + + for (i = 1; i < argc; i++) + { + if (argv[i][0] == '-') + { + char *val = NULL; + char *arg = &(argv[i][1]); + if (arg[0] == '-') + arg++; + + /* now see what argument this is */ + if (!strcmp(arg, "h") || !strcmp(arg, "help")) + { + return 1; + } + else if (!strcmp(arg, "s") || !strcmp(arg, "set-max-dict")) + { + unsigned int j = 0; + val = argv[++i]; + + /* validate argument is numeric */ + for (j = 0; j < strlen(val); j++) + { + if (val[j] < '0' || val[j] > '9') + return 1; + } + + *maxDictSize = strtoul(val, (char **)NULL, 10); + + /* don't allow dictionary sizes less than 8k */ + if (*maxDictSize < (1 < 13)) + *maxDictSize = 1 < 13; + else + { + /* make sure dict size is compatible with lzip, + * this will effectively collapse it to a close power + * of 2 */ + *maxDictSize = elzma_get_dict_size(*maxDictSize); + } + } + else if (!strcmp(arg, "v") || !strcmp(arg, "verbose")) + { + *verbose = 1; + } + else if (!strcmp(arg, "f") || !strcmp(arg, "force")) + { + *overwrite = 1; + } + else if (!strcmp(arg, "k") || !strcmp(arg, "keep")) + { + *keep = 1; + } + else if (strlen(arg) == 1 && arg[0] >= '1' && arg[0] <= '9') + { + *level = arg[0] - '0'; + } + else if (!strcmp(arg, "lzma")) + { + *format = ELZMA_lzma; + } + else if (!strcmp(arg, "lzip")) + { + *format = ELZMA_lzip; + } + else if (!strcmp(arg, "z") || !strcmp(arg, "d") || !strcmp(arg, "compress") || + !strcmp(arg, "decompress")) + { + /* noop */ + } + else + { + return 1; + } + } + else + { + *fname = argv[i]; + break; + } + } + + /* proper number of arguments? */ + if (i != argc - 1 || *fname == NULL) + return 1; + + return 0; +} + +/* callbacks for streamed input and output */ +static size_t elzmaWriteFunc(void *ctx, const void *buf, size_t size) +{ + size_t wt; + FILE *f = (FILE *)ctx; + assert(f != NULL); + + wt = fwrite(buf, 1, size, f); + + return wt; +} + +static int elzmaReadFunc(void *ctx, void *buf, size_t *size) +{ + FILE *f = (FILE *)ctx; + assert(f != NULL); + *size = fread(buf, 1, *size, f); + + return 0; +} + +static void printProgressHeader(void) +{ + printf("|0%% 50%% 100%%|\n"); +} + +static void endProgress(int pCtx) +{ + while (pCtx++ < 64) + { + printf("."); + } + printf("|\n"); +} + +static void elzmaProgressFunc(void *ctx, size_t complete, size_t total) +{ + int *dots = (int *)ctx; + int wantDots = (int)(64 * (double)complete / (double)total); + if (*dots == 0) + { + printf("|"); + (*dots)++; + } + while (wantDots > *dots) + { + printf("."); + (*dots)++; + } + fflush(stdout); +} + +static int doCompress(int argc, char **argv) +{ + /* default compression parameters, some of which may be overridded by + * command line arguments */ + unsigned char level = 5; + unsigned char lc = ELZMA_LC_DEFAULT; + unsigned char lp = ELZMA_LP_DEFAULT; + unsigned char pb = ELZMA_PB_DEFAULT; + unsigned int maxDictSize = ELZMA_DICT_SIZE_DEFAULT_MAX; + unsigned int dictSize = 0; + elzma_file_format format = ELZMA_lzma; + char *ext = ".lzma"; + char *ifname = NULL; + char *ofname = NULL; + unsigned int verbose = 0; + FILE *inFile = NULL; + FILE *outFile = NULL; + elzma_compress_handle hand = NULL; + /* XXX: large file support */ + unsigned int uncompressedSize = 0; + unsigned int keep = 0; + unsigned int overwrite = 0; + + if (0 != parseCompressArgs(argc, argv, &level, &ifname, &maxDictSize, &verbose, &keep, + &overwrite, &format)) + { + fprintf(stderr, ELZMA_COMPRESS_USAGE); + return 1; + } + + /* extension switching based on compression type*/ + if (format == ELZMA_lzip) + ext = ".lz"; + + /* generate output file name */ + { + ofname = malloc(strlen(ifname) + strlen(ext) + 1); + ofname[0] = 0; + strcat(ofname, ifname); + strcat(ofname, ext); + } + + /* now attempt to open input and ouput files */ + /* XXX: stdin/stdout support */ + if (0 != openFiles(ifname, &inFile, ofname, &outFile, overwrite)) + { + return 1; + } + + /* set uncompressed size */ + if (0 != fseek(inFile, 0, SEEK_END) || 0 == (uncompressedSize = ftell(inFile)) || + 0 != fseek(inFile, 0, SEEK_SET)) + { + fprintf(stderr, "error seeking input file (%s) - zero length?\n", ifname); + deleteFile(ofname); + return 1; + } + + /* determine a reasonable dictionary size given input size */ + dictSize = elzma_get_dict_size(uncompressedSize); + if (dictSize > maxDictSize) + dictSize = maxDictSize; + + if (verbose) + { + printf("compressing '%s' to '%s'\n", ifname, ofname); + printf("lc/lp/pb = %u/%u/%u | dictionary size = %u bytes\n", lc, lp, pb, dictSize); + printf("input file is %u bytes\n", uncompressedSize); + } + + /* allocate a compression handle */ + hand = elzma_compress_alloc(); + if (hand == NULL) + { + fprintf(stderr, "couldn't allocate compression object\n"); + deleteFile(ofname); + return 1; + } + + if (ELZMA_E_OK != + elzma_compress_config(hand, lc, lp, pb, level, dictSize, format, uncompressedSize)) + { + fprintf(stderr, "couldn't configure compression with " + "provided parameters\n"); + deleteFile(ofname); + return 1; + } + + { + int rv; + int pCtx = 0; + + if (verbose) + printProgressHeader(); + + rv = elzma_compress_run(hand, elzmaReadFunc, (void *)inFile, elzmaWriteFunc, + (void *)outFile, (verbose ? elzmaProgressFunc : NULL), &pCtx); + + if (verbose) + endProgress(pCtx); + + if (ELZMA_E_OK != rv) + { + fprintf(stderr, "error compressing\n"); + deleteFile(ofname); + return 1; + } + } + + /* clean up */ + elzma_compress_free(&hand); + fclose(inFile); + fclose(outFile); + free(ofname); + + if (!keep) + deleteFile(ifname); + + return 0; +} + +#define ELZMA_DECOMPRESS_USAGE \ + "Decompress files compressed using the LZMA algorithm (in place by default).\n" \ + "\n" \ + "Usage: unelzma [options] [file]\n" \ + " -f, --force overwrite output files if they exist\n" \ + " -h, --help output this message and exit\n" \ + " -k, --keep don't delete input files\n" \ + " -v, --verbose output verbose status information while decompressing\n" \ + " -z, --compress compress files (default when invoking elzma program)\n" \ + " -d, --decompress decompress files (default when invoking unelzma program)\n" \ + "\n" +/* parse arguments populating output parameters, return nonzero on failure */ +static int parseDecompressArgs(int argc, char **argv, char **fname, unsigned int *verbose, + unsigned int *keep, unsigned int *overwrite) +{ + int i; + + if (argc < 2) + return 1; + + for (i = 1; i < argc; i++) + { + if (argv[i][0] == '-') + { + char *arg = &(argv[i][1]); + if (arg[0] == '-') + arg++; + + /* now see what argument this is */ + if (!strcmp(arg, "h") || !strcmp(arg, "help")) + { + return 1; + } + else if (!strcmp(arg, "v") || !strcmp(arg, "verbose")) + { + *verbose = 1; + } + else if (!strcmp(arg, "k") || !strcmp(arg, "keep")) + { + *keep = 1; + } + else if (!strcmp(arg, "f") || !strcmp(arg, "force")) + { + *overwrite = 1; + } + else if (!strcmp(arg, "z") || !strcmp(arg, "d") || !strcmp(arg, "compress") || + !strcmp(arg, "decompress")) + { + /* noop */ + } + else + { + return 1; + } + } + else + { + *fname = argv[i]; + break; + } + } + + /* proper number of arguments? */ + if (i != argc - 1 || *fname == NULL) + return 1; + + return 0; +} + +static int doDecompress(int argc, char **argv) +{ + char *ifname = NULL; + char *ofname = NULL; + unsigned int verbose = 0; + FILE *inFile = NULL; + FILE *outFile = NULL; + elzma_decompress_handle hand = NULL; + unsigned int overwrite = 0; + unsigned int keep = 0; + elzma_file_format format; + const char *lzmaExt = ".lzma"; + const char *lzipExt = ".lz"; + const char *ext = ".lz"; + + if (0 != parseDecompressArgs(argc, argv, &ifname, &verbose, &keep, &overwrite)) + { + fprintf(stderr, ELZMA_DECOMPRESS_USAGE); + return 1; + } + + /* generate output file name */ + if (strlen(ifname) > strlen(lzmaExt) && + 0 == strcmp(lzmaExt, ifname + strlen(ifname) - strlen(lzmaExt))) + { + format = ELZMA_lzma; + ext = lzmaExt; + } + else if (strlen(ifname) > strlen(lzipExt) && + 0 == strcmp(lzipExt, ifname + strlen(ifname) - strlen(lzipExt))) + { + format = ELZMA_lzip; + ext = lzipExt; + } + else + { + fprintf(stderr, "input file extension not recognized (expected either " + "%s or %s)", + lzmaExt, lzipExt); + return 1; + } + + ofname = malloc(strlen(ifname) - strlen(ext)); + ofname[0] = 0; + strncat(ofname, ifname, strlen(ifname) - strlen(ext)); + + /* now attempt to open input and ouput files */ + /* XXX: stdin/stdout support */ + if (0 != openFiles(ifname, &inFile, ofname, &outFile, overwrite)) + { + return 1; + } + + hand = elzma_decompress_alloc(); + if (hand == NULL) + { + fprintf(stderr, "couldn't allocate decompression object\n"); + deleteFile(ofname); + return 1; + } + + if (ELZMA_E_OK != elzma_decompress_run(hand, elzmaReadFunc, (void *)inFile, elzmaWriteFunc, + (void *)outFile, format)) + { + fprintf(stderr, "error decompressing\n"); + deleteFile(ofname); + return 1; + } + + elzma_decompress_free(&hand); + + if (!keep) + deleteFile(ifname); + + return 0; +} + +int main(int argc, char **argv) +{ + const char *unelzma = "unelzma"; + const char *unelzmaLose = "unelzma.exe"; + const char *elzma = "elzma"; + const char *elzmaLose = "elzma.exe"; + + enum + { + RM_NONE, + RM_COMPRESS, + RM_DECOMPRESS + } runmode = RM_NONE; + + /* first we'll determine the mode we're running in, indicated by + * the binary name (argv[0]) or by the presence of a flag: + * one of -z, -d, -compress, --decompress */ + if ((strlen(argv[0]) >= strlen(unelzma) && + !strcmp((argv[0] + strlen(argv[0]) - strlen(unelzma)), unelzma)) || + (strlen(argv[0]) >= strlen(unelzmaLose) && + !strcmp((argv[0] + strlen(argv[0]) - strlen(unelzmaLose)), unelzmaLose))) + { + runmode = RM_DECOMPRESS; + } + else if ((strlen(argv[0]) >= strlen(elzma) && + !strcmp((argv[0] + strlen(argv[0]) - strlen(elzma)), elzma)) || + (strlen(argv[0]) >= strlen(elzmaLose) && + !strcmp((argv[0] + strlen(argv[0]) - strlen(elzmaLose)), elzmaLose))) + { + runmode = RM_COMPRESS; + } + + /* allow runmode to be overridded by a command line flag, first flag + * wins */ + { + int i; + for (i = 1; i < argc; i++) + { + if (!strcmp(argv[i], "-d") || !strcmp(argv[i], "--decompress")) + { + runmode = RM_DECOMPRESS; + break; + } + else if (!strcmp(argv[i], "-z") || !strcmp(argv[i], "--compress")) + { + runmode = RM_COMPRESS; + break; + } + } + } + + if (runmode != RM_COMPRESS && runmode != RM_DECOMPRESS) + { + fprintf(stderr, "couldn't determine whether " + "you want to compress or decompress\n"); + return 1; + } + + if (runmode == RM_COMPRESS) + return doCompress(argc, argv); + return doDecompress(argc, argv); +} diff --git a/depends/lzma/include/common.h b/depends/lzma/include/common.h new file mode 100644 index 00000000..f02bdb4d --- /dev/null +++ b/depends/lzma/include/common.h @@ -0,0 +1,118 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + * + * easylzma/common.h - definitions common to both compression and + * decompression + */ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* msft dll export gunk. To build a DLL on windows, you + * must define WIN32, EASYLZMA_SHARED, and EASYLZMA_BUILD. To use a + * DLL, you must define EASYLZMA_SHARED and WIN32 */ +#if defined(WIN32) && defined(EASYLZMA_SHARED) +#ifdef EASYLZMA_BUILD +#define EASYLZMA_API __declspec(dllexport) +#else +#define EASYLZMA_API __declspec(dllimport) +#endif +#else +#define EASYLZMA_API +#endif + +/** error codes */ + +/** no error */ +#define ELZMA_E_OK 0 +/** bad parameters passed to an ELZMA function */ +#define ELZMA_E_BAD_PARAMS 10 +/** could not initialize the encode with configured parameters. */ +#define ELZMA_E_ENCODING_PROPERTIES_ERROR 11 +/** an error occured during compression (XXX: be more specific) */ +#define ELZMA_E_COMPRESS_ERROR 12 +/** currently unsupported lzma file format was specified*/ +#define ELZMA_E_UNSUPPORTED_FORMAT 13 +/** an error occured when reading input */ +#define ELZMA_E_INPUT_ERROR 14 +/** an error occured when writing output */ +#define ELZMA_E_OUTPUT_ERROR 15 +/** LZMA header couldn't be parsed */ +#define ELZMA_E_CORRUPT_HEADER 16 +/** an error occured during decompression (XXX: be more specific) */ +#define ELZMA_E_DECOMPRESS_ERROR 17 +/** the input stream returns EOF before the decompression could complete */ +#define ELZMA_E_INSUFFICIENT_INPUT 18 +/** for formats which have an emebedded crc, this error would indicated that + * what came out was not what went in, i.e. data corruption */ +#define ELZMA_E_CRC32_MISMATCH 19 +/** for formats which have an emebedded uncompressed content length, + * this error indicates that the amount we read was not what we expected */ +#define ELZMA_E_SIZE_MISMATCH 20 + +/** Supported file formats */ +typedef enum +{ + ELZMA_lzip, /**< the lzip format which includes a magic number and + * CRC check */ + ELZMA_lzma /**< the LZMA-Alone format, originally designed by + * Igor Pavlov and in widespread use due to lzmautils, + * lacking both aforementioned features of lzip */ + /* XXX: future, potentially , + ELZMA_xz + */ +} elzma_file_format; + +/** + * A callback invoked during elzma_[de]compress_run when the [de]compression + * process has generated [de]compressed output. + * + * the size parameter indicates how much data is in buf to be written. + * it is required that the write callback consume all data, and a return + * value not equal to input size indicates and error. + */ +typedef size_t (*elzma_write_callback)(void *ctx, const void *buf, size_t size); + +/** + * A callback invoked during elzma_[de]compress_run when the [de]compression + * process requires more [un]compressed input. + * + * the size parameter is an in/out argument. on input it indicates + * the buffer size. on output it indicates the amount of data read into + * buf. when *size is zero on output it indicates EOF. + * + * \returns the read callback should return nonzero on failure. + */ +typedef int (*elzma_read_callback)(void *ctx, void *buf, size_t *size); + +/** + * A callback invoked during elzma_[de]compress_run to report progress + * on the [de]compression. + * + * \returns the read callback should return nonzero on failure. + */ +typedef void (*elzma_progress_callback)(void *ctx, size_t complete, size_t total); + +/** pointer to a malloc function, supporting client overriding memory + * allocation routines */ +typedef void *(*elzma_malloc)(void *ctx, unsigned int sz); + +/** pointer to a free function, supporting client overriding memory + * allocation routines */ +typedef void (*elzma_free)(void *ctx, void *ptr); + +#ifdef __cplusplus +} +; +#endif diff --git a/depends/lzma/include/compress.h b/depends/lzma/include/compress.h new file mode 100644 index 00000000..46c81d75 --- /dev/null +++ b/depends/lzma/include/compress.h @@ -0,0 +1,77 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + * + * compress.h - the API for LZMA compression using easylzma + */ + +#pragma once + +#include "common.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** suggested default values */ +#define ELZMA_LC_DEFAULT 3 +#define ELZMA_LP_DEFAULT 0 +#define ELZMA_PB_DEFAULT 2 +#define ELZMA_DICT_SIZE_DEFAULT_MAX (1 << 24) + +/** an opaque handle to an lzma compressor */ +typedef struct _elzma_compress_handle *elzma_compress_handle; + +/** + * Allocate a handle to an LZMA compressor object. + */ +elzma_compress_handle EASYLZMA_API elzma_compress_alloc(); + +/** + * set allocation routines (optional, if not called malloc & free will + * be used) + */ +void EASYLZMA_API +elzma_compress_set_allocation_callbacks(elzma_compress_handle hand, elzma_malloc mallocFunc, + void *mallocFuncContext, elzma_free freeFunc, + void *freeFuncContext); + +/** + * Free all data associated with an LZMA compressor object. + */ +void EASYLZMA_API elzma_compress_free(elzma_compress_handle *hand); + +/** + * Set configuration paramters for a compression run. If not called, + * reasonable defaults will be used. + */ +int EASYLZMA_API elzma_compress_config(elzma_compress_handle hand, unsigned char lc, + unsigned char lp, unsigned char pb, unsigned char level, + unsigned int dictionarySize, elzma_file_format format, + unsigned long long uncompressedSize); + +/** + * Run compression + */ +int EASYLZMA_API +elzma_compress_run(elzma_compress_handle hand, elzma_read_callback inputStream, + void *inputContext, elzma_write_callback outputStream, void *outputContext, + elzma_progress_callback progressCallback, void *progressContext); + +/** + * a heuristic utility routine to guess a dictionary size that gets near + * optimal compression while reducing memory usage. + * accepts a size in bytes, returns a proposed dictionary size + */ +unsigned int EASYLZMA_API elzma_get_dict_size(unsigned long long size); + +#ifdef __cplusplus +} +; +#endif diff --git a/depends/lzma/include/decompress.h b/depends/lzma/include/decompress.h new file mode 100644 index 00000000..cb10b2ba --- /dev/null +++ b/depends/lzma/include/decompress.h @@ -0,0 +1,58 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + * + * easylzma/decompress.h - The API for LZMA decompression using easylzma + */ + +#pragma once + +#include "include/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** an opaque handle to an lzma decompressor */ +typedef struct _elzma_decompress_handle *elzma_decompress_handle; + +/** + * Allocate a handle to an LZMA decompressor object. + */ +elzma_decompress_handle EASYLZMA_API elzma_decompress_alloc(); + +/** + * set allocation routines (optional, if not called malloc & free will + * be used) + */ +void EASYLZMA_API +elzma_decompress_set_allocation_callbacks(elzma_decompress_handle hand, elzma_malloc mallocFunc, + void *mallocFuncContext, elzma_free freeFunc, + void *freeFuncContext); + +/** + * Free all data associated with an LZMA decompressor object. + */ +void EASYLZMA_API elzma_decompress_free(elzma_decompress_handle *hand); + +/** + * Perform decompression + * + * XXX: should the library automatically detect format by reading stream? + * currently it's based on data external to stream (such as extension + * or convention) + */ +int EASYLZMA_API elzma_decompress_run(elzma_decompress_handle hand, + elzma_read_callback inputStream, void *inputContext, + elzma_write_callback outputStream, void *outputContext, + elzma_file_format format); + +#ifdef __cplusplus +} +; +#endif diff --git a/depends/lzma/include/simple.h b/depends/lzma/include/simple.h new file mode 100644 index 00000000..83f7b2d2 --- /dev/null +++ b/depends/lzma/include/simple.h @@ -0,0 +1,37 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + * + * simple.h - a wrapper around easylzma to compress/decompress to memory + */ + +#pragma once + +#include "include/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "include/compress.h" +#include "include/decompress.h" + +/* compress a chunk of memory and return a dynamically allocated buffer + * if successful. return value is an easylzma error code */ +int EASYLZMA_API simpleCompress(elzma_file_format format, const unsigned char *inData, + size_t inLen, unsigned char **outData, size_t *outLen); + +/* decompress a chunk of memory and return a dynamically allocated buffer + * if successful. return value is an easylzma error code */ +int EASYLZMA_API simpleDecompress(elzma_file_format format, const unsigned char *inData, + size_t inLen, unsigned char **outData, size_t *outLen); + +#ifdef __cplusplus +} +; +#endif \ No newline at end of file diff --git a/depends/lzma/pavlov/7zCrc.c b/depends/lzma/pavlov/7zCrc.c new file mode 100755 index 00000000..c1598ce2 --- /dev/null +++ b/depends/lzma/pavlov/7zCrc.c @@ -0,0 +1,35 @@ +/* 7zCrc.c -- CRC32 calculation +2008-08-05 +Igor Pavlov +Public domain */ + +#include "7zCrc.h" + +#define kCrcPoly 0xEDB88320 +uint32_t g_CrcTable[256]; + +void MY_FAST_CALL CrcGenerateTable(void) +{ + uint32_t i; + for (i = 0; i < 256; i++) + { + uint32_t r = i; + int j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); + g_CrcTable[i] = r; + } +} + +uint32_t MY_FAST_CALL CrcUpdate(uint32_t v, const void *data, size_t size) +{ + const uint8_t *p = (const uint8_t *)data; + for (; size > 0; size--, p++) + v = CRC_UPDATE_BYTE(v, *p); + return v; +} + +uint32_t MY_FAST_CALL CrcCalc(const void *data, size_t size) +{ + return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF; +} diff --git a/depends/lzma/pavlov/7zCrc.h b/depends/lzma/pavlov/7zCrc.h new file mode 100755 index 00000000..0609cb87 --- /dev/null +++ b/depends/lzma/pavlov/7zCrc.h @@ -0,0 +1,24 @@ +/* 7zCrc.h -- CRC32 calculation +2008-03-13 +Igor Pavlov +Public domain */ + +#ifndef __7Z_CRC_H +#define __7Z_CRC_H + +#include + +#include "Types.h" + +extern uint32_t g_CrcTable[]; + +void MY_FAST_CALL CrcGenerateTable(void); + +#define CRC_INIT_VAL 0xFFFFFFFF +#define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF) +#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +uint32_t MY_FAST_CALL CrcUpdate(uint32_t crc, const void *data, size_t size); +uint32_t MY_FAST_CALL CrcCalc(const void *data, size_t size); + +#endif diff --git a/depends/lzma/pavlov/LzFind.c b/depends/lzma/pavlov/LzFind.c new file mode 100755 index 00000000..75003ac1 --- /dev/null +++ b/depends/lzma/pavlov/LzFind.c @@ -0,0 +1,779 @@ +/* LzFind.c -- Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#include +#include + +#include "LzFind.h" +#include "LzHash.h" + +#define kEmptyHashValue 0 +#define kMaxValForNormalize ((uint32_t)0xFFFFFFFF) +#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ +#define kNormalizeMask (~(kNormalizeStepMin - 1)) +#define kMaxHistorySize ((uint32_t)3 << 30) + +#define kStartMaxLen 3 + +static void LzInWindow_Free(CMatchFinder *p) +{ + if (!p->directInput) + { + free(p->bufferBase); + p->bufferBase = 0; + } +} + +/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ + +static int LzInWindow_Create(CMatchFinder *p, uint32_t keepSizeReserv) +{ + uint32_t blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; + if (p->directInput) + { + p->blockSize = blockSize; + return 1; + } + if (p->bufferBase == 0 || p->blockSize != blockSize) + { + LzInWindow_Free(p); + p->blockSize = blockSize; + p->bufferBase = (uint8_t *)malloc((size_t)blockSize); + } + return (p->bufferBase != 0); +} + +uint8_t *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) +{ + return p->buffer; +} +uint8_t MatchFinder_GetIndexByte(CMatchFinder *p, int32_t index) +{ + return p->buffer[index]; +} + +uint32_t MatchFinder_GetNumAvailableBytes(CMatchFinder *p) +{ + return p->streamPos - p->pos; +} + +void MatchFinder_ReduceOffsets(CMatchFinder *p, uint32_t subValue) +{ + p->posLimit -= subValue; + p->pos -= subValue; + p->streamPos -= subValue; +} + +static void MatchFinder_ReadBlock(CMatchFinder *p) +{ + if (p->streamEndWasReached || p->result != SZ_OK) + return; + for (;;) + { + uint8_t *dest = p->buffer + (p->streamPos - p->pos); + size_t size = (p->bufferBase + p->blockSize - dest); + if (size == 0) + return; + p->result = p->stream->Read(p->stream, dest, &size); + if (p->result != SZ_OK) + return; + if (size == 0) + { + p->streamEndWasReached = 1; + return; + } + p->streamPos += (uint32_t)size; + if (p->streamPos - p->pos > p->keepSizeAfter) + return; + } +} + +void MatchFinder_MoveBlock(CMatchFinder *p) +{ + memmove(p->bufferBase, p->buffer - p->keepSizeBefore, + (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); + p->buffer = p->bufferBase + p->keepSizeBefore; +} + +int MatchFinder_NeedMove(CMatchFinder *p) +{ + /* if (p->streamEndWasReached) return 0; */ + return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); +} + +void MatchFinder_ReadIfRequired(CMatchFinder *p) +{ + if (p->streamEndWasReached) + return; + if (p->keepSizeAfter >= p->streamPos - p->pos) + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) +{ + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_SetDefaultSettings(CMatchFinder *p) +{ + p->cutValue = 32; + p->btMode = 1; + p->numHashBytes = 4; + /* p->skipModeBits = 0; */ + p->directInput = 0; + p->bigHash = 0; +} + +#define kCrcPoly 0xEDB88320 + +void MatchFinder_Construct(CMatchFinder *p) +{ + uint32_t i; + p->bufferBase = 0; + p->directInput = 0; + p->hash = 0; + MatchFinder_SetDefaultSettings(p); + + for (i = 0; i < 256; i++) + { + uint32_t r = i; + int j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); + p->crc[i] = r; + } +} + +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p) +{ + free(p->hash); + p->hash = 0; +} + +void MatchFinder_Free(CMatchFinder *p) +{ + MatchFinder_FreeThisClassMemory(p); + LzInWindow_Free(p); +} + +static CLzRef *AllocRefs(uint32_t num) +{ + size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return 0; + return (CLzRef *)malloc(sizeInBytes); +} + +int MatchFinder_Create(CMatchFinder *p, uint32_t historySize, uint32_t keepAddBufferBefore, + uint32_t matchMaxLen, uint32_t keepAddBufferAfter) +{ + uint32_t sizeReserv; + if (historySize > kMaxHistorySize) + { + MatchFinder_Free(p); + return 0; + } + sizeReserv = historySize >> 1; + if (historySize > ((uint32_t)2 << 30)) + sizeReserv = historySize >> 2; + sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; + /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary + * using */ + if (LzInWindow_Create(p, sizeReserv)) + { + uint32_t newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1; + uint32_t hs; + p->matchMaxLen = matchMaxLen; + { + p->fixedHashSize = 0; + if (p->numHashBytes == 2) + hs = (1 << 16) - 1; + else + { + hs = historySize - 1; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + /* hs >>= p->skipModeBits; */ + hs |= 0xFFFF; /* don't change it! It's required for Deflate */ + if (hs > (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + } + } + p->hashMask = hs; + hs++; + if (p->numHashBytes > 2) + p->fixedHashSize += kHash2Size; + if (p->numHashBytes > 3) + p->fixedHashSize += kHash3Size; + if (p->numHashBytes > 4) + p->fixedHashSize += kHash4Size; + hs += p->fixedHashSize; + } + + { + uint32_t prevSize = p->hashSizeSum + p->numSons; + uint32_t newSize; + p->historySize = historySize; + p->hashSizeSum = hs; + p->cyclicBufferSize = newCyclicBufferSize; + p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); + newSize = p->hashSizeSum + p->numSons; + if (p->hash != 0 && prevSize == newSize) + return 1; + MatchFinder_FreeThisClassMemory(p); + p->hash = AllocRefs(newSize); + if (p->hash != 0) + { + p->son = p->hash + p->hashSizeSum; + return 1; + } + } + } + MatchFinder_Free(p); + return 0; +} + +static void MatchFinder_SetLimits(CMatchFinder *p) +{ + uint32_t limit = kMaxValForNormalize - p->pos; + uint32_t limit2 = p->cyclicBufferSize - p->cyclicBufferPos; + if (limit2 < limit) + limit = limit2; + limit2 = p->streamPos - p->pos; + if (limit2 <= p->keepSizeAfter) + { + if (limit2 > 0) + limit2 = 1; + } + else + limit2 -= p->keepSizeAfter; + if (limit2 < limit) + limit = limit2; + { + uint32_t lenLimit = p->streamPos - p->pos; + if (lenLimit > p->matchMaxLen) + lenLimit = p->matchMaxLen; + p->lenLimit = lenLimit; + } + p->posLimit = p->pos + limit; +} + +void MatchFinder_Init(CMatchFinder *p) +{ + uint32_t i; + for (i = 0; i < p->hashSizeSum; i++) + p->hash[i] = kEmptyHashValue; + p->cyclicBufferPos = 0; + p->buffer = p->bufferBase; + p->pos = p->streamPos = p->cyclicBufferSize; + p->result = SZ_OK; + p->streamEndWasReached = 0; + MatchFinder_ReadBlock(p); + MatchFinder_SetLimits(p); +} + +static uint32_t MatchFinder_GetSubValue(CMatchFinder *p) +{ + return (p->pos - p->historySize - 1) & kNormalizeMask; +} + +void MatchFinder_Normalize3(uint32_t subValue, CLzRef *items, uint32_t numItems) +{ + uint32_t i; + for (i = 0; i < numItems; i++) + { + uint32_t value = items[i]; + if (value <= subValue) + value = kEmptyHashValue; + else + value -= subValue; + items[i] = value; + } +} + +static void MatchFinder_Normalize(CMatchFinder *p) +{ + uint32_t subValue = MatchFinder_GetSubValue(p); + MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); + MatchFinder_ReduceOffsets(p, subValue); +} + +static void MatchFinder_CheckLimits(CMatchFinder *p) +{ + if (p->pos == kMaxValForNormalize) + MatchFinder_Normalize(p); + if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) + MatchFinder_CheckAndMoveAndRead(p); + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); +} + +static uint32_t *Hc_GetMatchesSpec(uint32_t lenLimit, uint32_t curMatch, uint32_t pos, + const uint8_t *cur, CLzRef *son, uint32_t _cyclicBufferPos, + uint32_t _cyclicBufferSize, uint32_t cutValue, + uint32_t *distances, uint32_t maxLen) +{ + son[_cyclicBufferPos] = curMatch; + for (;;) + { + uint32_t delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return distances; + { + const uint8_t *pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) + { + uint32_t len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + return distances; + } + } + } + } +} + +uint32_t *GetMatchesSpec1(uint32_t lenLimit, uint32_t curMatch, uint32_t pos, + const uint8_t *cur, CLzRef *son, uint32_t _cyclicBufferPos, + uint32_t _cyclicBufferSize, uint32_t cutValue, uint32_t *distances, + uint32_t maxLen) +{ + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + uint32_t len0 = 0, len1 = 0; + for (;;) + { + uint32_t delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return distances; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) + << 1); + const uint8_t *pb = cur - delta; + uint32_t len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return distances; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +static void SkipMatchesSpec(uint32_t lenLimit, uint32_t curMatch, uint32_t pos, + const uint8_t *cur, CLzRef *son, uint32_t _cyclicBufferPos, + uint32_t _cyclicBufferSize, uint32_t cutValue) +{ + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + uint32_t len0 = 0, len1 = 0; + for (;;) + { + uint32_t delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) + << 1); + const uint8_t *pb = cur - delta; + uint32_t len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +#define MOVE_POS \ + ++p->cyclicBufferPos; \ + p->buffer++; \ + if (++p->pos == p->posLimit) \ + MatchFinder_CheckLimits(p); + +#define MOVE_POS_RET MOVE_POS return offset; + +static void MatchFinder_MovePos(CMatchFinder *p) +{ + MOVE_POS; +} + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ + uint32_t lenLimit; \ + uint32_t hashValue; \ + const uint8_t *cur; \ + uint32_t curMatch; \ + lenLimit = p->lenLimit; \ + { \ + if (lenLimit < minLen) \ + { \ + MatchFinder_MovePos(p); \ + ret_op; \ + } \ + } \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) +#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) \ + p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define GET_MATCHES_FOOTER(offset, maxLen) \ + offset = (uint32_t)( \ + GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), distances + offset, maxLen) - \ + distances); \ + MOVE_POS_RET; + +#define SKIP_FOOTER \ + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); \ + MOVE_POS; + +static uint32_t Bt2_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t offset; + GET_MATCHES_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 1) +} + +uint32_t Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 2) +} + +static uint32_t Bt3_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t hash2Value, delta2, maxLen, offset; + GET_MATCHES_HEADER(3) + + HASH3_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + curMatch = p->hash[kFix3HashSize + hashValue]; + + p->hash[hash2Value] = p->hash[kFix3HashSize + hashValue] = p->pos; + + maxLen = 2; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[0] = maxLen; + distances[1] = delta2 - 1; + offset = 2; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + GET_MATCHES_FOOTER(offset, maxLen) +} + +static uint32_t Bt4_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + GET_MATCHES_FOOTER(offset, maxLen) +} + +static uint32_t Hc4_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + offset = (uint32_t)( + Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), distances + offset, maxLen) - + (distances)); + MOVE_POS_RET +} + +uint32_t Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = (uint32_t)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), distances, 2) - + (distances)); + MOVE_POS_RET +} + +static void Bt2_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + SKIP_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +static void Bt3_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + uint32_t hash2Value; + SKIP_HEADER(3) + HASH3_CALC; + curMatch = p->hash[kFix3HashSize + hashValue]; + p->hash[hash2Value] = p->hash[kFix3HashSize + hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +static void Bt4_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + uint32_t hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->pos; + p->hash[kFix4HashSize + hashValue] = p->pos; + SKIP_FOOTER + } while (--num != 0); +} + +static void Hc4_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + uint32_t hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } while (--num != 0); +} + +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } while (--num != 0); +} + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinder_Init; + vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; + vTable->GetNumAvailableBytes = + (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = + (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + if (!p->btMode) + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } + else if (p->numHashBytes == 2) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + } + else if (p->numHashBytes == 3) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + } + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } +} diff --git a/depends/lzma/pavlov/LzFind.h b/depends/lzma/pavlov/LzFind.h new file mode 100755 index 00000000..12d89aac --- /dev/null +++ b/depends/lzma/pavlov/LzFind.h @@ -0,0 +1,107 @@ +/* LzFind.h -- Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZFIND_H +#define __LZFIND_H + +#include "Types.h" + +typedef uint32_t CLzRef; + +typedef struct _CMatchFinder +{ + uint8_t *buffer; + uint32_t pos; + uint32_t posLimit; + uint32_t streamPos; + uint32_t lenLimit; + + uint32_t cyclicBufferPos; + uint32_t cyclicBufferSize; /* it must be = (historySize + 1) */ + + uint32_t matchMaxLen; + CLzRef *hash; + CLzRef *son; + uint32_t hashMask; + uint32_t cutValue; + + uint8_t *bufferBase; + ISeqInStream *stream; + int streamEndWasReached; + + uint32_t blockSize; + uint32_t keepSizeBefore; + uint32_t keepSizeAfter; + + uint32_t numHashBytes; + int directInput; + int btMode; + /* int skipModeBits; */ + int bigHash; + uint32_t historySize; + uint32_t fixedHashSize; + uint32_t hashSizeSum; + uint32_t numSons; + SRes result; + uint32_t crc[256]; +} CMatchFinder; + +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) +#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) + +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) + +int MatchFinder_NeedMove(CMatchFinder *p); +uint8_t *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +void MatchFinder_MoveBlock(CMatchFinder *p); +void MatchFinder_ReadIfRequired(CMatchFinder *p); + +void MatchFinder_Construct(CMatchFinder *p); + +/* Conditions: + historySize <= 3 GB + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +*/ +int MatchFinder_Create(CMatchFinder *p, uint32_t historySize, uint32_t keepAddBufferBefore, + uint32_t matchMaxLen, uint32_t keepAddBufferAfter); +void MatchFinder_Free(CMatchFinder *p); +void MatchFinder_Normalize3(uint32_t subValue, CLzRef *items, uint32_t numItems); +void MatchFinder_ReduceOffsets(CMatchFinder *p, uint32_t subValue); + +uint32_t *GetMatchesSpec1(uint32_t lenLimit, uint32_t curMatch, uint32_t pos, + const uint8_t *buffer, CLzRef *son, uint32_t _cyclicBufferPos, + uint32_t _cyclicBufferSize, uint32_t _cutValue, uint32_t *distances, + uint32_t maxLen); + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef void (*Mf_Init_Func)(void *object); +typedef uint8_t (*Mf_GetIndexByte_Func)(void *object, int32_t index); +typedef uint32_t (*Mf_GetNumAvailableBytes_Func)(void *object); +typedef const uint8_t *(*Mf_GetPointerToCurrentPos_Func)(void *object); +typedef uint32_t (*Mf_GetMatches_Func)(void *object, uint32_t *distances); +typedef void (*Mf_Skip_Func)(void *object, uint32_t); + +typedef struct _IMatchFinder +{ + Mf_Init_Func Init; + Mf_GetIndexByte_Func GetIndexByte; + Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; + Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder; + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); + +void MatchFinder_Init(CMatchFinder *p); +uint32_t Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances); +uint32_t Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances); +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, uint32_t num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, uint32_t num); + +#endif diff --git a/depends/lzma/pavlov/LzHash.h b/depends/lzma/pavlov/LzHash.h new file mode 100755 index 00000000..22cb0430 --- /dev/null +++ b/depends/lzma/pavlov/LzHash.h @@ -0,0 +1,62 @@ +/* LzHash.h -- HASH functions for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#pragma once + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +#define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) +#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +#define HASH2_CALC hashValue = cur[0] | ((uint32_t)cur[1] << 8); + +#define HASH3_CALC \ + { \ + uint32_t temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hashValue = (temp ^ ((uint32_t)cur[2] << 8)) & p->hashMask; \ + } + +#define HASH4_CALC \ + { \ + uint32_t temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((uint32_t)cur[2] << 8)) & (kHash3Size - 1); \ + hashValue = (temp ^ ((uint32_t)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; \ + } + +#define HASH5_CALC \ + { \ + uint32_t temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((uint32_t)cur[2] << 8)) & (kHash3Size - 1); \ + hash4Value = (temp ^ ((uint32_t)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ + hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ + hash4Value &= (kHash4Size - 1); \ + } + +/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((uint32_t)cur[1] << 8)) ^ p->crc[cur[2]]) & + * 0xFFFF; */ +#define HASH_ZIP_CALC \ + hashValue = ((cur[2] | ((uint32_t)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + +#define MT_HASH2_CALC hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC \ + { \ + uint32_t temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((uint32_t)cur[2] << 8)) & (kHash3Size - 1); \ + } + +#define MT_HASH4_CALC \ + { \ + uint32_t temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((uint32_t)cur[2] << 8)) & (kHash3Size - 1); \ + hash4Value = \ + (temp ^ ((uint32_t)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); \ + } diff --git a/depends/lzma/pavlov/LzmaDec.c b/depends/lzma/pavlov/LzmaDec.c new file mode 100755 index 00000000..1a44dd00 --- /dev/null +++ b/depends/lzma/pavlov/LzmaDec.c @@ -0,0 +1,1076 @@ +/* LzmaDec.c -- LZMA Decoder +2008-11-06 : Igor Pavlov : Public domain */ + +#include "LzmaDec.h" + +#include +#include + +#define kNumTopBits 24 +#define kTopValue ((uint32_t)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_INIT_SIZE 5 + +#define NORMALIZE \ + if (range < kTopValue) \ + { \ + range <<= 8; \ + code = (code << 8) | (*buf++); \ + } + +#define IF_BIT_0(p) \ + ttt = *(p); \ + NORMALIZE; \ + bound = (range >> kNumBitModelTotalBits) * ttt; \ + if (code < bound) +#define UPDATE_0(p) \ + range = bound; \ + *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) \ + range -= bound; \ + code -= bound; \ + *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) \ + IF_BIT_0(p) \ + { \ + UPDATE_0(p); \ + i = (i + i); \ + A0; \ + } \ + else \ + { \ + UPDATE_1(p); \ + i = (i + i) + 1; \ + A1; \ + } +#define GET_BIT(p, i) GET_BIT2(p, i, ;, ;) + +#define TREE_GET_BIT(probs, i) \ + { \ + GET_BIT((probs + i), i); \ + } +#define TREE_DECODE(probs, limit, i) \ + { \ + i = 1; \ + do \ + { \ + TREE_GET_BIT(probs, i); \ + } while (i < limit); \ + i -= limit; \ + } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { \ + i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; \ + } +#endif + +#define NORMALIZE_CHECK \ + if (range < kTopValue) \ + { \ + if (buf >= bufLimit) \ + return DUMMY_ERROR; \ + range <<= 8; \ + code = (code << 8) | (*buf++); \ + } + +#define IF_BIT_0_CHECK(p) \ + ttt = *(p); \ + NORMALIZE_CHECK; \ + bound = (range >> kNumBitModelTotalBits) * ttt; \ + if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK \ + range -= bound; \ + code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) \ + IF_BIT_0_CHECK(p) \ + { \ + UPDATE_0_CHECK; \ + i = (i + i); \ + A0; \ + } \ + else \ + { \ + UPDATE_1_CHECK; \ + i = (i + i) + 1; \ + A1; \ + } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ;, ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { \ + i = 1; \ + do \ + { \ + GET_BIT_CHECK(probs + i, i) \ + } while (i < limit); \ + i -= limit; \ + } + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenChoice 0 +#define LenChoice2 (LenChoice + 1) +#define LenLow (LenChoice2 + 1) +#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) +#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + +#define kNumStates 12 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart \ + (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define IsMatch 0 +#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define IsRep0Long (IsRepG2 + kNumStates) +#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) +#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) +#define LenCoder (Align + kAlignTableSize) +#define RepLenCoder (LenCoder + kNumLenProbs) +#define Literal (RepLenCoder + kNumLenProbs) + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LzmaProps_GetNumProbs(p) \ + ((uint32_t)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + +#if Literal != LZMA_BASE_SIZE +StopCompilingDueBUG +#endif + static const uint8_t kLiteralNextStates[kNumStates * 2] = { + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; + +#define LZMA_DIC_MIN (1 << 12) + +/* First LZMA-symbol is always decoded. +And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization +Out: + Result: + SZ_OK - OK + SZ_ERROR_DATA - Error + p->remainLen: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : Flush marker + = kMatchSpecLenStart + 2 : State Init Marker +*/ + +static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) +{ + CLzmaProb *probs = p->probs; + + unsigned state = p->state; + uint32_t rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; + unsigned lc = p->prop.lc; + + uint8_t *dic = p->dic; + size_t dicBufSize = p->dicBufSize; + size_t dicPos = p->dicPos; + + uint32_t processedPos = p->processedPos; + uint32_t checkDicSize = p->checkDicSize; + unsigned len = 0; + + const uint8_t *buf = p->buf; + uint32_t range = p->range; + uint32_t code = p->code; + + do + { + CLzmaProb *prob; + uint32_t bound; + unsigned ttt; + unsigned posState = processedPos & pbMask; + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (checkDicSize != 0 || processedPos != 0) + prob += (LZMA_LIT_SIZE * + (((processedPos & lpMask) << lc) + + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + + if (state < kNumLitStates) + { + symbol = 1; + do + { + GET_BIT(prob + symbol, symbol) + } while (symbol < 0x100); + } + else + { + unsigned matchByte = + p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + unsigned offs = 0x100; + symbol = 1; + do + { + unsigned bit; + CLzmaProb *probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + } while (symbol < 0x100); + } + dic[dicPos++] = (uint8_t)symbol; + processedPos++; + + state = kLiteralNextStates[state]; + /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */ + continue; + } + else + { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob); + if (checkDicSize == 0 && processedPos == 0) + return SZ_ERROR_DATA; + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + UPDATE_0(prob); + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else + { + uint32_t distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep1; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep2; + } + else + { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = (1 << kLenNumMidBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, limit, len); + len += offset; + } + + if (state >= kNumStates) + { + uint32_t distance; + prob = + probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) + << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + int numDirectBits = (int)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos + distance - posSlot - 1; + { + uint32_t mask = 1; + unsigned i = 1; + do + { + GET_BIT2(prob + i, i, ;, distance |= mask); + mask <<= 1; + } while (--numDirectBits != 0); + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + uint32_t t; + code -= range; + t = (0 - + ((uint32_t)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } while (--numDirectBits != 0); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + GET_BIT2(prob + i, i, ;, distance |= 1); + GET_BIT2(prob + i, i, ;, distance |= 2); + GET_BIT2(prob + i, i, ;, distance |= 4); + GET_BIT2(prob + i, i, ;, distance |= 8); + } + if (distance == (uint32_t)0xFFFFFFFF) + { + len += kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + if (checkDicSize == 0) + { + if (distance >= processedPos) + return SZ_ERROR_DATA; + } + else if (distance >= checkDicSize) + return SZ_ERROR_DATA; + state = + (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + /* state = kLiteralNextStates[state]; */ + } + + len += kMatchMinLen; + + if (limit == dicPos) + return SZ_ERROR_DATA; + { + size_t rem = limit - dicPos; + unsigned curLen = ((rem < len) ? (unsigned)rem : len); + size_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); + + processedPos += curLen; + + len -= curLen; + if (pos + curLen <= dicBufSize) + { + uint8_t *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const uint8_t *lim = dest + curLen; + dicPos += curLen; + do + *(dest) = (uint8_t) * (dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } while (--curLen != 0); + } + } + } + } while (dicPos < limit && buf < bufLimit); + NORMALIZE; + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = len; + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = state; + + return SZ_OK; +} + +static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, size_t limit) +{ + if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + { + uint8_t *dic = p->dic; + size_t dicPos = p->dicPos; + size_t dicBufSize = p->dicBufSize; + unsigned len = p->remainLen; + uint32_t rep0 = p->reps[0]; + if (limit - dicPos < len) + len = (unsigned)(limit - dicPos); + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += len; + p->remainLen -= len; + while (len-- != 0) + { + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + } + p->dicPos = dicPos; + } +} + +static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) +{ + do + { + size_t limit2 = limit; + if (p->checkDicSize == 0) + { + uint32_t rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit2 = p->dicPos + rem; + } + RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); + if (p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + LzmaDec_WriteRem(p, limit); + } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); + + if (p->remainLen > kMatchSpecLenStart) + { + p->remainLen = kMatchSpecLenStart; + } + return 0; +} + +typedef enum +{ + DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const uint8_t *buf, size_t inSize) +{ + uint32_t range = p->range; + uint32_t code = p->code; + const uint8_t *bufLimit = buf + inSize; + CLzmaProb *probs = p->probs; + unsigned state = p->state; + ELzmaDummy res; + + { + CLzmaProb *prob; + uint32_t bound; + unsigned ttt; + unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += (LZMA_LIT_SIZE * + ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> + (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do + { + GET_BIT_CHECK(prob + symbol, symbol) + } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + CLzmaProb *probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + } while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + NORMALIZE_CHECK; + return DUMMY_REP; + } + else + { + UPDATE_1_CHECK; + } + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumMidBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = + probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) + << kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) + { + int numDirectBits = ((posSlot >> 1) - 1); + + /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - + posSlot - 1; + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } while (--numDirectBits != 0); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + do + { + GET_BIT_CHECK(prob + i, i); + } while (--numDirectBits != 0); + } + } + } + } + } + NORMALIZE_CHECK; + return res; +} + +static void LzmaDec_InitRc(CLzmaDec *p, const uint8_t *data) +{ + p->code = ((uint32_t)data[1] << 24) | ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 8) | + ((uint32_t)data[4]); + p->range = 0xFFFFFFFF; + p->needFlush = 0; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) +{ + p->needFlush = 1; + p->remainLen = 0; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->needInitState = 1; + } + if (initState) + p->needInitState = 1; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + +static void LzmaDec_InitStateReal(CLzmaDec *p) +{ + uint32_t numProbs = Literal + ((uint32_t)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); + uint32_t i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + p->needInitState = 0; +} + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, const uint8_t *src, size_t *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + size_t inSize = *srcLen; + (*srcLen) = 0; + LzmaDec_WriteRem(p, dicLimit); + + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->remainLen != kMatchSpecLenStart) + { + int checkEndMarkNow; + + if (p->needFlush != 0) + { + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + + LzmaDec_InitRc(p, p->tempBuf); + p->tempBufSize = 0; + } + + checkEndMarkNow = 0; + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + checkEndMarkNow = 1; + } + + if (p->needInitState) + LzmaDec_InitStateReal(p); + + if (p->tempBufSize == 0) + { + size_t processed; + const uint8_t *bufLimit; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, src, inSize); + if (dummyRes == DUMMY_ERROR) + { + memcpy(p->tempBuf, src, inSize); + p->tempBufSize = (unsigned)inSize; + (*srcLen) += inSize; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + bufLimit = src; + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; + if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) + return SZ_ERROR_DATA; + processed = (size_t)(p->buf - src); + (*srcLen) += processed; + src += processed; + inSize -= processed; + } + else + { + unsigned rem = p->tempBufSize, lookAhead = 0; + while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) + p->tempBuf[rem++] = src[lookAhead++]; + p->tempBufSize = rem; + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); + if (dummyRes == DUMMY_ERROR) + { + (*srcLen) += lookAhead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + } + p->buf = p->tempBuf; + if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) + return SZ_ERROR_DATA; + lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); + (*srcLen) += lookAhead; + src += lookAhead; + inSize -= lookAhead; + p->tempBufSize = 0; + } + } + if (p->code == 0) + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; +} + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, const uint8_t *src, + size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + size_t outSize = *destLen; + size_t inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + size_t inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p) +{ + free(p->probs); + p->probs = 0; +} + +static void LzmaDec_FreeDict(CLzmaDec *p) +{ + free(p->dic); + p->dic = 0; +} + +void LzmaDec_Free(CLzmaDec *p) +{ + LzmaDec_FreeProbs(p); + LzmaDec_FreeDict(p); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size) +{ + uint32_t dicSize; + uint8_t d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((uint32_t)data[2] << 8) | ((uint32_t)data[3] << 16) | + ((uint32_t)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = d % 9; + d /= 9; + p->pb = d / 5; + p->lp = d % 5; + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew) +{ + uint32_t numProbs = LzmaProps_GetNumProbs(propNew); + if (p->probs == 0 || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p); + p->probs = (CLzmaProb *)malloc(numProbs * sizeof(CLzmaProb)); + p->numProbs = numProbs; + if (p->probs == 0) + return SZ_ERROR_MEM; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew)); + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const uint8_t *props, unsigned propsSize) +{ + CLzmaProps propNew; + size_t dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew)); + dicBufSize = propNew.dicSize; + if (p->dic == 0 || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p); + p->dic = (uint8_t *)malloc(dicBufSize); + if (p->dic == 0) + { + LzmaDec_FreeProbs(p); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, + const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status) +{ + CLzmaDec p; + SRes res; + size_t inSize = *srcLen; + size_t outSize = *destLen; + *srcLen = *destLen = 0; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + + LzmaDec_Construct(&p); + res = LzmaDec_AllocateProbs(&p, propData, propSize); + if (res != 0) + return res; + p.dic = dest; + p.dicBufSize = outSize; + + LzmaDec_Init(&p); + + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + + (*destLen) = p.dicPos; + LzmaDec_FreeProbs(&p); + return res; +} diff --git a/depends/lzma/pavlov/LzmaDec.h b/depends/lzma/pavlov/LzmaDec.h new file mode 100755 index 00000000..25cb7e94 --- /dev/null +++ b/depends/lzma/pavlov/LzmaDec.h @@ -0,0 +1,220 @@ +/* LzmaDec.h -- LZMA Decoder +2008-10-04 : Igor Pavlov : Public domain */ + +#pragma once + +#include "Types.h" + +/* #define _LZMA_PROB32 */ +/* _LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +#ifdef _LZMA_PROB32 +#define CLzmaProb UInt32 +#else +#define CLzmaProb uint16_t +#endif + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaProps +{ + unsigned lc, lp, pb; + uint32_t dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size); + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + CLzmaProps prop; + CLzmaProb *probs; + uint8_t *dic; + const uint8_t *buf; + uint32_t range, code; + size_t dicPos; + size_t dicBufSize; + uint32_t processedPos; + uint32_t checkDicSize; + unsigned state; + uint32_t reps[4]; + unsigned remainLen; + int needFlush; + int needInitState; + uint32_t numProbs; + unsigned tempBufSize; + uint8_t tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_Construct(p) \ + { \ + (p)->dic = 0; \ + (p)->probs = 0; \ + } + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: + 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. + 1) Stream without end mark. You must know exact uncompressed size to decompress such + stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished + without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize); +void LzmaDec_FreeProbs(CLzmaDec *p); + +SRes LzmaDec_Allocate(CLzmaDec *state, const uint8_t *prop, unsigned propsSize); +void LzmaDec_Free(CLzmaDec *state); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Constr() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, const uint8_t *src, size_t *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status); + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, const uint8_t *src, + size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, + const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status); diff --git a/depends/lzma/pavlov/LzmaEnc.c b/depends/lzma/pavlov/LzmaEnc.c new file mode 100755 index 00000000..ac34eb45 --- /dev/null +++ b/depends/lzma/pavlov/LzmaEnc.c @@ -0,0 +1,2349 @@ +/* LzmaEnc.c -- LZMA Encoder +2008-10-04 : Igor Pavlov : Public domain */ + +#include +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#if defined(SHOW_STAT) || defined(SHOW_STAT2) +#include +#endif + +#include "LzmaEnc.h" + +#include "LzFind.h" +#ifdef COMPRESS_MF_MT +#include "LzFindMt.h" +#endif + +#ifdef SHOW_STAT +static int ttt = 0; +#endif + +#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) + +#define kBlockSize (9 << 10) +#define kUnpackBlockSize (1 << 18) +#define kMatchArraySize (1 << 21) +#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) + +#define kNumMaxDirectBits (31) + +#define kNumTopBits 24 +#define kTopValue ((uint32_t)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 +#define kBitPrice (1 << kNumBitPriceShiftBits) + +void LzmaEncProps_Init(CLzmaEncProps *p) +{ + p->level = 5; + p->dictSize = p->mc = 0; + p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->writeEndMark = 0; +} + +void LzmaEncProps_Normalize(CLzmaEncProps *p) +{ + int level = p->level; + if (level < 0) + level = 5; + p->level = level; + if (p->dictSize == 0) + p->dictSize = + (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); + if (p->lc < 0) + p->lc = 3; + if (p->lp < 0) + p->lp = 0; + if (p->pb < 0) + p->pb = 2; + if (p->algo < 0) + p->algo = (level < 5 ? 0 : 1); + if (p->fb < 0) + p->fb = (level < 7 ? 32 : 64); + if (p->btMode < 0) + p->btMode = (p->algo == 0 ? 0 : 1); + if (p->numHashBytes < 0) + p->numHashBytes = 4; + if (p->mc == 0) + p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numThreads < 0) + p->numThreads = ((p->btMode && p->algo) ? 2 : 1); +} + +uint32_t LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) +{ + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + return props.dictSize; +} + +/* #define LZMA_LOG_BSR */ +/* Define it for Intel's CPU */ + +#ifdef LZMA_LOG_BSR + +#define kDicLogSizeMaxCompress 30 + +#define BSR2_RET(pos, res) \ + { \ + unsigned long i; \ + _BitScanReverse(&i, (pos)); \ + res = (i + i) + ((pos >> (i - 1)) & 1); \ + } + +uint32_t GetPosSlot1(uint32_t pos) +{ + uint32_t res; + BSR2_RET(pos, res); + return res; +} +#define GetPosSlot2(pos, res) \ + { \ + BSR2_RET(pos, res); \ + } +#define GetPosSlot(pos, res) \ + { \ + if (pos < 2) \ + res = pos; \ + else \ + BSR2_RET(pos, res); \ + } + +#else + +#define kNumLogBits (9 + (int)sizeof(size_t) / 2) +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +void LzmaEnc_FastPosInit(uint8_t *g_FastPos) +{ + int c = 2, slotFast; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + + for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) + { + uint32_t k = (1 << ((slotFast >> 1) - 1)); + uint32_t j; + for (j = 0; j < k; j++, c++) + g_FastPos[c] = (uint8_t)slotFast; + } +} + +#define BSR2_RET(pos, res) \ + { \ + uint32_t i = 6 + ((kNumLogBits - 1) & \ + (0 - (((((uint32_t)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> i] + (i * 2); \ + } +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) \ + { \ + BSR2_RET(pos, res); \ + } +#define GetPosSlot(pos, res) \ + { \ + if (pos < kNumFullDistances) \ + res = p->g_FastPos[pos]; \ + else \ + BSR2_RET(pos, res); \ + } + +#endif + +#define LZMA_NUM_REPS 4 + +typedef unsigned CState; + +typedef struct _COptimal +{ + uint32_t price; + + CState state; + int prev1IsChar; + int prev2; + + uint32_t posPrev2; + uint32_t backPrev2; + + uint32_t posPrev; + uint32_t backPrev; + uint32_t backs[LZMA_NUM_REPS]; +} COptimal; + +#define kNumOpts (1 << 12) + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +#define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) + +#define kNumFullDistances (1 << (kEndPosModelIndex / 2)) + +#ifdef _LZMA_PROB32 +#define CLzmaProb uint32_t +#else +#define CLzmaProb uint16_t +#endif + +#define LZMA_PB_MAX 4 +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + +typedef struct +{ + CLzmaProb choice; + CLzmaProb choice2; + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; + CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; + CLzmaProb high[kLenNumHighSymbols]; +} CLenEnc; + +typedef struct +{ + CLenEnc p; + uint32_t prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + uint32_t tableSize; + uint32_t counters[LZMA_NUM_PB_STATES_MAX]; +} CLenPriceEnc; + +typedef struct _CRangeEnc +{ + uint32_t range; + uint8_t cache; + uint64_t low; + uint64_t cacheSize; + uint8_t *buf; + uint8_t *bufLim; + uint8_t *bufBase; + ISeqOutStream *outStream; + uint64_t processed; + SRes res; +} CRangeEnc; + +typedef struct _CSeqInStreamBuf +{ + ISeqInStream funcTable; + const uint8_t *data; + size_t rem; +} CSeqInStreamBuf; + +static SRes MyRead(void *pp, void *data, size_t *size) +{ + size_t curSize = *size; + CSeqInStreamBuf *p = (CSeqInStreamBuf *)pp; + if (p->rem < curSize) + curSize = p->rem; + memcpy(data, p->data, curSize); + p->rem -= curSize; + p->data += curSize; + *size = curSize; + return SZ_OK; +} + +typedef struct +{ + CLzmaProb *litProbs; + + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + uint32_t reps[LZMA_NUM_REPS]; + uint32_t state; +} CSaveState; + +typedef struct _CLzmaEnc +{ + IMatchFinder matchFinder; + void *matchFinderObj; + +#ifdef COMPRESS_MF_MT + Bool mtMode; + CMatchFinderMt matchFinderMt; +#endif + + CMatchFinder matchFinderBase; + +#ifdef COMPRESS_MF_MT + Byte pad[128]; +#endif + + uint32_t optimumEndIndex; + uint32_t optimumCurrentIndex; + + uint32_t longestMatchLength; + uint32_t numPairs; + uint32_t numAvail; + COptimal opt[kNumOpts]; + +#ifndef LZMA_LOG_BSR + uint8_t g_FastPos[1 << kNumLogBits]; +#endif + + uint32_t ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; + uint32_t matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + uint32_t numFastBytes; + uint32_t additionalOffset; + uint32_t reps[LZMA_NUM_REPS]; + uint32_t state; + + uint32_t posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + uint32_t distancesPrices[kNumLenToPosStates][kNumFullDistances]; + uint32_t alignPrices[kAlignTableSize]; + uint32_t alignPriceCount; + + uint32_t distTableSize; + + unsigned lc, lp, pb; + unsigned lpMask, pbMask; + + CLzmaProb *litProbs; + + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + unsigned lclp; + + Bool fastMode; + + CRangeEnc rc; + + Bool writeEndMark; + uint64_t nowPos64; + uint32_t matchPriceCount; + Bool finished; + Bool multiThread; + + SRes result; + uint32_t dictSize; + uint32_t matchFinderCycles; + + ISeqInStream *inStream; + CSeqInStreamBuf seqBufInStream; + + CSaveState saveState; +} CLzmaEnc; + +void LzmaEnc_SaveState(CLzmaEncHandle pp) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CSaveState *dest = &p->saveState; + int i; + dest->lenEnc = p->lenEnc; + dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + for (i = 0; i < kNumStates; i++) + { + memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); + memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); + } + for (i = 0; i < kNumLenToPosStates; i++) + memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); + memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); + memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); + memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); + memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); + memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); + memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); + memcpy(dest->reps, p->reps, sizeof(p->reps)); + memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); +} + +void LzmaEnc_RestoreState(CLzmaEncHandle pp) +{ + CLzmaEnc *dest = (CLzmaEnc *)pp; + const CSaveState *p = &dest->saveState; + int i; + dest->lenEnc = p->lenEnc; + dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + for (i = 0; i < kNumStates; i++) + { + memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); + memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); + } + for (i = 0; i < kNumLenToPosStates; i++) + memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); + memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); + memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); + memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); + memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); + memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); + memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); + memcpy(dest->reps, p->reps, sizeof(p->reps)); + memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); +} + +SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + + if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX || + props.dictSize > (1 << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30)) + return SZ_ERROR_PARAM; + p->dictSize = props.dictSize; + p->matchFinderCycles = props.mc; + { + unsigned fb = props.fb; + if (fb < 5) + fb = 5; + if (fb > LZMA_MATCH_LEN_MAX) + fb = LZMA_MATCH_LEN_MAX; + p->numFastBytes = fb; + } + p->lc = props.lc; + p->lp = props.lp; + p->pb = props.pb; + p->fastMode = (props.algo == 0); + p->matchFinderBase.btMode = props.btMode; + { + uint32_t numHashBytes = 4; + if (props.btMode) + { + if (props.numHashBytes < 2) + numHashBytes = 2; + else if (props.numHashBytes < 4) + numHashBytes = props.numHashBytes; + } + p->matchFinderBase.numHashBytes = numHashBytes; + } + + p->matchFinderBase.cutValue = props.mc; + + p->writeEndMark = props.writeEndMark; + +#ifdef COMPRESS_MF_MT + /* + if (newMultiThread != _multiThread) + { + ReleaseMatchFinder(); + _multiThread = newMultiThread; + } + */ + p->multiThread = (props.numThreads > 1); +#endif + + return SZ_OK; +} + +static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const int kShortRepNextStates[kNumStates] = {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsCharState(s) ((s) < 7) + +#define GetLenToPosState(len) \ + (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +static void RangeEnc_Construct(CRangeEnc *p) +{ + p->outStream = 0; + p->bufBase = 0; +} + +#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) + +#define RC_BUF_SIZE (1 << 16) +static int RangeEnc_Alloc(CRangeEnc *p) +{ + if (p->bufBase == 0) + { + p->bufBase = malloc(RC_BUF_SIZE); + if (p->bufBase == 0) + return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + } + return 1; +} + +static void RangeEnc_Free(CRangeEnc *p) +{ + free(p->bufBase); + p->bufBase = 0; +} + +static void RangeEnc_Init(CRangeEnc *p) +{ + /* Stream.Init(); */ + p->low = 0; + p->range = 0xFFFFFFFF; + p->cacheSize = 1; + p->cache = 0; + + p->buf = p->bufBase; + + p->processed = 0; + p->res = SZ_OK; +} + +static void RangeEnc_FlushStream(CRangeEnc *p) +{ + size_t num; + if (p->res != SZ_OK) + return; + num = p->buf - p->bufBase; + if (num != p->outStream->Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + p->processed += num; + p->buf = p->bufBase; +} + +static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +{ + if ((uint32_t)p->low < (uint32_t)0xFF000000 || (int)(p->low >> 32) != 0) + { + uint8_t temp = p->cache; + do + { + uint8_t *buf = p->buf; + *buf++ = (uint8_t)(temp + (uint8_t)(p->low >> 32)); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + temp = 0xFF; + } while (--p->cacheSize != 0); + p->cache = (uint8_t)((uint32_t)p->low >> 24); + } + p->cacheSize++; + p->low = (uint32_t)p->low << 8; +} + +static void RangeEnc_FlushData(CRangeEnc *p) +{ + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + +static void RangeEnc_EncodeDirectBits(CRangeEnc *p, uint32_t value, int numBits) +{ + do + { + p->range >>= 1; + p->low += p->range & (0 - ((value >> --numBits) & 1)); + if (p->range < kTopValue) + { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } + } while (numBits != 0); +} + +static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, uint32_t symbol) +{ + uint32_t ttt = *prob; + uint32_t newBound = (p->range >> kNumBitModelTotalBits) * ttt; + if (symbol == 0) + { + p->range = newBound; + ttt += (kBitModelTotal - ttt) >> kNumMoveBits; + } + else + { + p->low += newBound; + p->range -= newBound; + ttt -= ttt >> kNumMoveBits; + } + *prob = (CLzmaProb)ttt; + if (p->range < kTopValue) + { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } +} + +static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, uint32_t symbol) +{ + symbol |= 0x100; + do + { + RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + symbol <<= 1; + } while (symbol < 0x10000); +} + +static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, uint32_t symbol, + uint32_t matchByte) +{ + uint32_t offs = 0x100; + symbol |= 0x100; + do + { + matchByte <<= 1; + RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), + (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } while (symbol < 0x10000); +} + +void LzmaEnc_InitPriceTables(uint32_t *ProbPrices) +{ + uint32_t i; + for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; + i += (1 << kNumMoveReducingBits)) + { + const int kCyclesBits = kNumBitPriceShiftBits; + uint32_t w = i; + uint32_t bitCount = 0; + int j; + for (j = 0; j < kCyclesBits; j++) + { + w = w * w; + bitCount <<= 1; + while (w >= ((uint32_t)1 << 16)) + { + w >>= 1; + bitCount++; + } + } + ProbPrices[i >> kNumMoveReducingBits] = + ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + } +} + +#define GET_PRICE(prob, symbol) \ + p->ProbPrices \ + [((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICEa(prob, symbol) \ + ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +static uint32_t LitEnc_GetPrice(const CLzmaProb *probs, uint32_t symbol, uint32_t *ProbPrices) +{ + uint32_t price = 0; + symbol |= 0x100; + do + { + price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); + symbol <<= 1; + } while (symbol < 0x10000); + return price; +} + +static uint32_t LitEnc_GetPriceMatched(const CLzmaProb *probs, uint32_t symbol, + uint32_t matchByte, uint32_t *ProbPrices) +{ + uint32_t price = 0; + uint32_t offs = 0x100; + symbol |= 0x100; + do + { + matchByte <<= 1; + price += + GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } while (symbol < 0x10000); + return price; +} + +static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, uint32_t symbol) +{ + uint32_t m = 1; + int i; + for (i = numBitLevels; i != 0;) + { + uint32_t bit; + i--; + bit = (symbol >> i) & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + } +} + +static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, + uint32_t symbol) +{ + uint32_t m = 1; + int i; + for (i = 0; i < numBitLevels; i++) + { + uint32_t bit = symbol & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + symbol >>= 1; + } +} + +static uint32_t RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, uint32_t symbol, + uint32_t *ProbPrices) +{ + uint32_t price = 0; + symbol |= (1 << numBitLevels); + while (symbol != 1) + { + price += GET_PRICEa(probs[symbol >> 1], symbol & 1); + symbol >>= 1; + } + return price; +} + +static uint32_t RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, + uint32_t symbol, uint32_t *ProbPrices) +{ + uint32_t price = 0; + uint32_t m = 1; + int i; + for (i = numBitLevels; i != 0; i--) + { + uint32_t bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) | bit; + } + return price; +} + +static void LenEnc_Init(CLenEnc *p) +{ + unsigned i; + p->choice = p->choice2 = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) + p->low[i] = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) + p->mid[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, uint32_t symbol, uint32_t posState) +{ + if (symbol < kLenNumLowSymbols) + { + RangeEnc_EncodeBit(rc, &p->choice, 0); + RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); + } + else + { + RangeEnc_EncodeBit(rc, &p->choice, 1); + if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) + { + RangeEnc_EncodeBit(rc, &p->choice2, 0); + RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, + symbol - kLenNumLowSymbols); + } + else + { + RangeEnc_EncodeBit(rc, &p->choice2, 1); + RcTree_Encode(rc, p->high, kLenNumHighBits, + symbol - kLenNumLowSymbols - kLenNumMidSymbols); + } + } +} + +static void LenEnc_SetPrices(CLenEnc *p, uint32_t posState, uint32_t numSymbols, + uint32_t *prices, uint32_t *ProbPrices) +{ + uint32_t a0 = GET_PRICE_0a(p->choice); + uint32_t a1 = GET_PRICE_1a(p->choice); + uint32_t b0 = a1 + GET_PRICE_0a(p->choice2); + uint32_t b1 = a1 + GET_PRICE_1a(p->choice2); + uint32_t i = 0; + for (i = 0; i < kLenNumLowSymbols; i++) + { + if (i >= numSymbols) + return; + prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, + i, ProbPrices); + } + for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) + { + if (i >= numSymbols) + return; + prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, + i - kLenNumLowSymbols, ProbPrices); + } + for (; i < numSymbols; i++) + prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, + i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); +} + +static void MY_FAST_CALL +LenPriceEnc_UpdateTable(CLenPriceEnc *p, uint32_t posState, uint32_t *ProbPrices) +{ + LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); + p->counters[posState] = p->tableSize; +} + +static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, uint32_t numPosStates, + uint32_t *ProbPrices) +{ + uint32_t posState; + for (posState = 0; posState < numPosStates; posState++) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + +static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, uint32_t symbol, uint32_t posState, + Bool updatePrice, uint32_t *ProbPrices) +{ + LenEnc_Encode(&p->p, rc, symbol, posState); + if (updatePrice) + if (--p->counters[posState] == 0) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + +static void MovePos(CLzmaEnc *p, uint32_t num) +{ +#ifdef SHOW_STAT + ttt += num; + printf("\n MovePos %d", num); +#endif + if (num != 0) + { + p->additionalOffset += num; + p->matchFinder.Skip(p->matchFinderObj, num); + } +} + +static uint32_t ReadMatchDistances(CLzmaEnc *p, uint32_t *numDistancePairsRes) +{ + uint32_t lenRes = 0, numPairs; + p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); + numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); +#ifdef SHOW_STAT + printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); + ttt++; + { + uint32_t i; + for (i = 0; i < numPairs; i += 2) + printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); + } +#endif + if (numPairs > 0) + { + lenRes = p->matches[numPairs - 2]; + if (lenRes == p->numFastBytes) + { + const uint8_t *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + uint32_t distance = p->matches[numPairs - 1] + 1; + uint32_t numAvail = p->numAvail; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + { + const uint8_t *pby2 = pby - distance; + for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++) + ; + } + } + } + p->additionalOffset++; + *numDistancePairsRes = numPairs; + return lenRes; +} + +#define MakeAsChar(p) \ + (p)->backPrev = (uint32_t)(-1); \ + (p)->prev1IsChar = False; +#define MakeAsShortRep(p) \ + (p)->backPrev = 0; \ + (p)->prev1IsChar = False; +#define IsShortRep(p) ((p)->backPrev == 0) + +static uint32_t GetRepLen1Price(CLzmaEnc *p, uint32_t state, uint32_t posState) +{ + return GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]); +} + +static uint32_t GetPureRepPrice(CLzmaEnc *p, uint32_t repIndex, uint32_t state, + uint32_t posState) +{ + uint32_t price; + if (repIndex == 0) + { + price = GET_PRICE_0(p->isRepG0[state]); + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } + else + { + price = GET_PRICE_1(p->isRepG0[state]); + if (repIndex == 1) + price += GET_PRICE_0(p->isRepG1[state]); + else + { + price += GET_PRICE_1(p->isRepG1[state]); + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + +static uint32_t GetRepPrice(CLzmaEnc *p, uint32_t repIndex, uint32_t len, uint32_t state, + uint32_t posState) +{ + return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + + GetPureRepPrice(p, repIndex, state, posState); +} + +static uint32_t Backward(CLzmaEnc *p, uint32_t *backRes, uint32_t cur) +{ + uint32_t posMem = p->opt[cur].posPrev; + uint32_t backMem = p->opt[cur].backPrev; + p->optimumEndIndex = cur; + do + { + if (p->opt[cur].prev1IsChar) + { + MakeAsChar(&p->opt[posMem]) + p->opt[posMem].posPrev = posMem - 1; + if (p->opt[cur].prev2) + { + p->opt[posMem - 1].prev1IsChar = False; + p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; + p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; + } + } + { + uint32_t posPrev = posMem; + uint32_t backCur = backMem; + + backMem = p->opt[posPrev].backPrev; + posMem = p->opt[posPrev].posPrev; + + p->opt[posPrev].backPrev = backCur; + p->opt[posPrev].posPrev = cur; + cur = posPrev; + } + } while (cur != 0); + *backRes = p->opt[0].backPrev; + p->optimumCurrentIndex = p->opt[0].posPrev; + return p->optimumCurrentIndex; +} + +#define LIT_PROBS(pos, prevByte) \ + (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) + +static uint32_t GetOptimum(CLzmaEnc *p, uint32_t position, uint32_t *backRes) +{ + uint32_t numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; + uint32_t matchPrice, repMatchPrice, normalMatchPrice; + uint32_t reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; + uint32_t *matches; + const uint8_t *data; + uint8_t curByte, matchByte; + if (p->optimumEndIndex != p->optimumCurrentIndex) + { + const COptimal *opt = &p->opt[p->optimumCurrentIndex]; + uint32_t lenRes = opt->posPrev - p->optimumCurrentIndex; + *backRes = opt->backPrev; + p->optimumCurrentIndex = opt->posPrev; + return lenRes; + } + p->optimumCurrentIndex = p->optimumEndIndex = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + if (numAvail < 2) + { + *backRes = (uint32_t)(-1); + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t lenTest; + const uint8_t *data2; + reps[i] = p->reps[i]; + data2 = data - (reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + { + repLens[i] = 0; + continue; + } + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) + ; + repLens[i] = lenTest; + if (lenTest > repLens[repMaxIndex]) + repMaxIndex = i; + } + if (repLens[repMaxIndex] >= p->numFastBytes) + { + uint32_t lenRes; + *backRes = repMaxIndex; + lenRes = repLens[repMaxIndex]; + MovePos(p, lenRes - 1); + return lenRes; + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) + { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) + { + *backRes = (uint32_t) - 1; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = + GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsCharState(p->state) + ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) + : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + MakeAsChar(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + if (matchByte == curByte) + { + uint32_t shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) + { + p->opt[1].price = shortRepPrice; + MakeAsShortRep(&p->opt[1]); + } + } + lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); + + if (lenEnd < 2) + { + *backRes = p->opt[1].backPrev; + return 1; + } + + p->opt[1].posPrev = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + p->opt[0].backs[i] = reps[i]; + + len = lenEnd; + do + p->opt[len--].price = kInfinityPrice; + while (len >= 2); + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t repLen = repLens[i]; + uint32_t price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); + do + { + uint32_t curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; + COptimal *opt = &p->opt[repLen]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = i; + opt->prev1IsChar = False; + } + } while (--repLen >= 2); + } + + normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); + if (len <= mainLen) + { + uint32_t offs = 0; + while (len > matches[offs]) + offs += 2; + for (;; len++) + { + COptimal *opt; + uint32_t distance = matches[offs + 1]; + + uint32_t curAndLenPrice = + normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; + uint32_t lenToPosState = GetLenToPosState(len); + if (distance < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][distance]; + else + { + uint32_t slot; + GetPosSlot2(distance, slot); + curAndLenPrice += p->alignPrices[distance & kAlignMask] + + p->posSlotPrices[lenToPosState][slot]; + } + opt = &p->opt[len]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = distance + LZMA_NUM_REPS; + opt->prev1IsChar = False; + } + if (len == matches[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } + } + + cur = 0; + +#ifdef SHOW_STAT2 + if (position >= 0) + { + unsigned i; + printf("\n pos = %4X", position); + for (i = cur; i <= lenEnd; i++) + printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); + } +#endif + + for (;;) + { + uint32_t numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; + uint32_t curPrice, curAnd1Price, matchPrice, repMatchPrice; + Bool nextIsChar; + uint8_t curByte, matchByte; + const uint8_t *data; + COptimal *curOpt; + COptimal *nextOpt; + + cur++; + if (cur == lenEnd) + return Backward(p, backRes, cur); + + newLen = ReadMatchDistances(p, &numPairs); + if (newLen >= p->numFastBytes) + { + p->numPairs = numPairs; + p->longestMatchLength = newLen; + return Backward(p, backRes, cur); + } + position++; + curOpt = &p->opt[cur]; + posPrev = curOpt->posPrev; + if (curOpt->prev1IsChar) + { + posPrev--; + if (curOpt->prev2) + { + state = p->opt[curOpt->posPrev2].state; + if (curOpt->backPrev2 < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + else + state = p->opt[posPrev].state; + state = kLiteralNextStates[state]; + } + else + state = p->opt[posPrev].state; + if (posPrev == cur - 1) + { + if (IsShortRep(curOpt)) + state = kShortRepNextStates[state]; + else + state = kLiteralNextStates[state]; + } + else + { + uint32_t pos; + const COptimal *prevOpt; + if (curOpt->prev1IsChar && curOpt->prev2) + { + posPrev = curOpt->posPrev2; + pos = curOpt->backPrev2; + state = kRepNextStates[state]; + } + else + { + pos = curOpt->backPrev; + if (pos < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + prevOpt = &p->opt[posPrev]; + if (pos < LZMA_NUM_REPS) + { + uint32_t i; + reps[0] = prevOpt->backs[pos]; + for (i = 1; i <= pos; i++) + reps[i] = prevOpt->backs[i - 1]; + for (; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i]; + } + else + { + uint32_t i; + reps[0] = (pos - LZMA_NUM_REPS); + for (i = 1; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i - 1]; + } + } + curOpt->state = (CState)state; + + curOpt->backs[0] = reps[0]; + curOpt->backs[1] = reps[1]; + curOpt->backs[2] = reps[2]; + curOpt->backs[3] = reps[3]; + + curPrice = curOpt->price; + nextIsChar = False; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + posState = (position & p->pbMask); + + curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + curAnd1Price += + (!IsCharState(state) + ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) + : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + nextOpt = &p->opt[cur + 1]; + + if (curAnd1Price < nextOpt->price) + { + nextOpt->price = curAnd1Price; + nextOpt->posPrev = cur; + MakeAsChar(nextOpt); + nextIsChar = True; + } + + matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + + if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) + { + uint32_t shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); + if (shortRepPrice <= nextOpt->price) + { + nextOpt->price = shortRepPrice; + nextOpt->posPrev = cur; + MakeAsShortRep(nextOpt); + nextIsChar = True; + } + } + numAvailFull = p->numAvail; + { + uint32_t temp = kNumOpts - 1 - cur; + if (temp < numAvailFull) + numAvailFull = temp; + } + + if (numAvailFull < 2) + continue; + numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); + + if (!nextIsChar && matchByte != curByte) /* speed optimization */ + { + /* try Literal + rep0 */ + uint32_t temp; + uint32_t lenTest2; + const uint8_t *data2 = data - (reps[0] + 1); + uint32_t limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + + for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++) + ; + lenTest2 = temp - 1; + if (lenTest2 >= 2) + { + uint32_t state2 = kLiteralNextStates[state]; + uint32_t posStateNext = (position + 1) & p->pbMask; + uint32_t nextRepMatchPrice = curAnd1Price + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + /* for (; lenTest2 >= 2; lenTest2--) */ + { + uint32_t curAndLenPrice; + COptimal *opt; + uint32_t offset = cur + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = + nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = False; + } + } + } + } + + startLen = 2; /* speed optimization */ + { + uint32_t repIndex; + for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) + { + uint32_t lenTest; + uint32_t lenTestTemp; + uint32_t price; + const uint8_t *data2 = data - (reps[repIndex] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; + lenTest++) + ; + while (lenEnd < cur + lenTest) + p->opt[++lenEnd].price = kInfinityPrice; + lenTestTemp = lenTest; + price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); + do + { + uint32_t curAndLenPrice = + price + p->repLenEnc.prices[posState][lenTest - 2]; + COptimal *opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = repIndex; + opt->prev1IsChar = False; + } + } while (--lenTest >= 2); + lenTest = lenTestTemp; + + if (repIndex == 0) + startLen = lenTest + 1; + + /* if (_maxMode) */ + { + uint32_t lenTest2 = lenTest + 1; + uint32_t limit = lenTest2 + p->numFastBytes; + uint32_t nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) + ; + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) + { + uint32_t state2 = kRepNextStates[state]; + uint32_t posStateNext = (position + lenTest) & p->pbMask; + uint32_t curAndLenCharPrice = + price + p->repLenEnc.prices[posState][lenTest - 2] + + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched( + LIT_PROBS(position + lenTest, data[lenTest - 1]), data[lenTest], + data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (position + lenTest + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + uint32_t curAndLenPrice; + COptimal *opt; + uint32_t offset = cur + lenTest + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = True; + opt->posPrev2 = cur; + opt->backPrev2 = repIndex; + } + } + } + } + } + } + /* for (uint32_t lenTest = 2; lenTest <= newLen; lenTest++) */ + if (newLen > numAvail) + { + newLen = numAvail; + for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2) + ; + matches[numPairs] = newLen; + numPairs += 2; + } + if (newLen >= startLen) + { + uint32_t normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + uint32_t offs, curBack, posSlot; + uint32_t lenTest; + while (lenEnd < cur + newLen) + p->opt[++lenEnd].price = kInfinityPrice; + + offs = 0; + while (startLen > matches[offs]) + offs += 2; + curBack = matches[offs + 1]; + GetPosSlot2(curBack, posSlot); + for (lenTest = /*2*/ startLen;; lenTest++) + { + uint32_t curAndLenPrice = + normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; + uint32_t lenToPosState = GetLenToPosState(lenTest); + COptimal *opt; + if (curBack < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; + else + curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + + p->alignPrices[curBack & kAlignMask]; + + opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = curBack + LZMA_NUM_REPS; + opt->prev1IsChar = False; + } + + if (/*_maxMode && */ lenTest == matches[offs]) + { + /* Try Match + Literal + Rep0 */ + const uint8_t *data2 = data - (curBack + 1); + uint32_t lenTest2 = lenTest + 1; + uint32_t limit = lenTest2 + p->numFastBytes; + uint32_t nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) + ; + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) + { + uint32_t state2 = kMatchNextStates[state]; + uint32_t posStateNext = (position + lenTest) & p->pbMask; + uint32_t curAndLenCharPrice = + curAndLenPrice + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched( + LIT_PROBS(position + lenTest, data[lenTest - 1]), data[lenTest], + data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (posStateNext + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + uint32_t offset = cur + lenTest + 1 + lenTest2; + uint32_t curAndLenPrice; + COptimal *opt; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = True; + opt->posPrev2 = cur; + opt->backPrev2 = curBack + LZMA_NUM_REPS; + } + } + } + offs += 2; + if (offs == numPairs) + break; + curBack = matches[offs + 1]; + if (curBack >= kNumFullDistances) + GetPosSlot2(curBack, posSlot); + } + } + } + } +} + +#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) + +static uint32_t GetOptimumFast(CLzmaEnc *p, uint32_t *backRes) +{ + uint32_t numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; + const uint8_t *data; + const uint32_t *matches; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + *backRes = (uint32_t) - 1; + if (numAvail < 2) + return 1; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + + repLen = repIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t len; + const uint8_t *data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + ; + if (len >= p->numFastBytes) + { + *backRes = i; + MovePos(p, len - 1); + return len; + } + if (len > repLen) + { + repIndex = i; + repLen = len; + } + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) + { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + + mainDist = 0; /* for GCC */ + if (mainLen >= 2) + { + mainDist = matches[numPairs - 1]; + while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) + { + if (!ChangePair(matches[numPairs - 3], mainDist)) + break; + numPairs -= 2; + mainLen = matches[numPairs - 2]; + mainDist = matches[numPairs - 1]; + } + if (mainLen == 2 && mainDist >= 0x80) + mainLen = 1; + } + + if (repLen >= 2 && + ((repLen + 1 >= mainLen) || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || + (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) + { + *backRes = repIndex; + MovePos(p, repLen - 1); + return repLen; + } + + if (mainLen < 2 || numAvail <= 2) + return 1; + + p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); + if (p->longestMatchLength >= 2) + { + uint32_t newDistance = matches[p->numPairs - 1]; + if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || + (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || + (p->longestMatchLength > mainLen + 1) || + (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && + ChangePair(newDistance, mainDist))) + return 1; + } + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t len, limit; + const uint8_t *data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + limit = mainLen - 1; + for (len = 2; len < limit && data[len] == data2[len]; len++) + ; + if (len >= limit) + return 1; + } + *backRes = mainDist + LZMA_NUM_REPS; + MovePos(p, mainLen - 2); + return mainLen; +} + +static void WriteEndMarker(CLzmaEnc *p, uint32_t posState) +{ + uint32_t len; + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + len = LZMA_MATCH_LEN_MIN; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, + p->ProbPrices); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, + (1 << kNumPosSlotBits) - 1); + RangeEnc_EncodeDirectBits(&p->rc, (((uint32_t)1 << 30) - 1) >> kNumAlignBits, + 30 - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); +} + +static SRes CheckErrors(CLzmaEnc *p) +{ + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + if (p->matchFinderBase.result != SZ_OK) + p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) + p->finished = True; + return p->result; +} + +static SRes Flush(CLzmaEnc *p, uint32_t nowPos) +{ + /* ReleaseMFStream(); */ + p->finished = True; + if (p->writeEndMark) + WriteEndMarker(p, nowPos & p->pbMask); + RangeEnc_FlushData(&p->rc); + RangeEnc_FlushStream(&p->rc); + return CheckErrors(p); +} + +static void FillAlignPrices(CLzmaEnc *p) +{ + uint32_t i; + for (i = 0; i < kAlignTableSize; i++) + p->alignPrices[i] = + RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + p->alignPriceCount = 0; +} + +static void FillDistancesPrices(CLzmaEnc *p) +{ + uint32_t tempPrices[kNumFullDistances]; + uint32_t i, lenToPosState; + for (i = kStartPosModelIndex; i < kNumFullDistances; i++) + { + uint32_t posSlot = GetPosSlot1(i); + uint32_t footerBits = ((posSlot >> 1) - 1); + uint32_t base = ((2 | (posSlot & 1)) << footerBits); + tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, + i - base, p->ProbPrices); + } + + for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) + { + uint32_t posSlot; + const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState]; + uint32_t *posSlotPrices = p->posSlotPrices[lenToPosState]; + for (posSlot = 0; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] = + RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); + for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] += + ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + + { + uint32_t *distancesPrices = p->distancesPrices[lenToPosState]; + uint32_t i; + for (i = 0; i < kStartPosModelIndex; i++) + distancesPrices[i] = posSlotPrices[i]; + for (; i < kNumFullDistances; i++) + distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; + } + } + p->matchPriceCount = 0; +} + +void LzmaEnc_Construct(CLzmaEnc *p) +{ + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&p->matchFinderBase); +#ifdef COMPRESS_MF_MT + MatchFinderMt_Construct(&p->matchFinderMt); + p->matchFinderMt.MatchFinder = &p->matchFinderBase; +#endif + + { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + LzmaEnc_SetProps(p, &props); + } + +#ifndef LZMA_LOG_BSR + LzmaEnc_FastPosInit(p->g_FastPos); +#endif + + LzmaEnc_InitPriceTables(p->ProbPrices); + p->litProbs = 0; + p->saveState.litProbs = 0; +} + +CLzmaEncHandle LzmaEnc_Create() +{ + void *p; + p = malloc(sizeof(CLzmaEnc)); + if (p != 0) + LzmaEnc_Construct((CLzmaEnc *)p); + return p; +} + +void LzmaEnc_FreeLits(CLzmaEnc *p) +{ + free(p->litProbs); + free(p->saveState.litProbs); + p->litProbs = 0; + p->saveState.litProbs = 0; +} + +void LzmaEnc_Destruct(CLzmaEnc *p) +{ +#ifdef COMPRESS_MF_MT + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); +#endif + MatchFinder_Free(&p->matchFinderBase); + LzmaEnc_FreeLits(p); + RangeEnc_Free(&p->rc); +} + +void LzmaEnc_Destroy(CLzmaEncHandle p) +{ + LzmaEnc_Destruct((CLzmaEnc *)p); + free(p); +} + +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, uint32_t maxPackSize, + uint32_t maxUnpackSize) +{ + uint32_t nowPos32, startPos32; + if (p->inStream != 0) + { + p->matchFinderBase.stream = p->inStream; + p->matchFinder.Init(p->matchFinderObj); + p->inStream = 0; + } + + if (p->finished) + return p->result; + RINOK(CheckErrors(p)); + + nowPos32 = (uint32_t)p->nowPos64; + startPos32 = nowPos32; + + if (p->nowPos64 == 0) + { + uint32_t numPairs; + uint8_t curByte; + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + return Flush(p, nowPos32); + ReadMatchDistances(p, &numPairs); + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); + p->state = kLiteralNextStates[p->state]; + curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); + LitEnc_Encode(&p->rc, p->litProbs, curByte); + p->additionalOffset--; + nowPos32++; + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + for (;;) + { + uint32_t pos, len, posState; + + if (p->fastMode) + len = GetOptimumFast(p, &pos); + else + len = GetOptimum(p, nowPos32, &pos); + +#ifdef SHOW_STAT2 + printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); +#endif + + posState = nowPos32 & p->pbMask; + if (len == 1 && pos == (uint32_t) - 1) + { + uint8_t curByte; + CLzmaProb *probs; + const uint8_t *data; + + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - + p->additionalOffset; + curByte = *data; + probs = LIT_PROBS(nowPos32, *(data - 1)); + if (IsCharState(p->state)) + LitEnc_Encode(&p->rc, probs, curByte); + else + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); + p->state = kLiteralNextStates[p->state]; + } + else + { + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + if (pos < LZMA_NUM_REPS) + { + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); + if (pos == 0) + { + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); + RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], + ((len == 1) ? 0 : 1)); + } + else + { + uint32_t distance = p->reps[pos]; + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); + if (pos == 1) + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); + else + { + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); + if (pos == 3) + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + } + p->reps[1] = p->reps[0]; + p->reps[0] = distance; + } + if (len == 1) + p->state = kShortRepNextStates[p->state]; + else + { + LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, + posState, !p->fastMode, p->ProbPrices); + p->state = kRepNextStates[p->state]; + } + } + else + { + uint32_t posSlot; + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, + !p->fastMode, p->ProbPrices); + pos -= LZMA_NUM_REPS; + GetPosSlot(pos, posSlot); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], + kNumPosSlotBits, posSlot); + + if (posSlot >= kStartPosModelIndex) + { + uint32_t footerBits = ((posSlot >> 1) - 1); + uint32_t base = ((2 | (posSlot & 1)) << footerBits); + uint32_t posReduced = pos - base; + + if (posSlot < kEndPosModelIndex) + RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, + footerBits, posReduced); + else + { + RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, + footerBits - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, + posReduced & kAlignMask); + p->alignPriceCount++; + } + } + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = pos; + p->matchPriceCount++; + } + } + p->additionalOffset -= len; + nowPos32 += len; + if (p->additionalOffset == 0) + { + uint32_t processed; + if (!p->fastMode) + { + if (p->matchPriceCount >= (1 << 7)) + FillDistancesPrices(p); + if (p->alignPriceCount >= kAlignTableSize) + FillAlignPrices(p); + } + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + break; + processed = nowPos32 - startPos32; + if (useLimits) + { + if (processed + kNumOpts + 300 >= maxUnpackSize || + RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) + break; + } + else if (processed >= (1 << 15)) + { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } + } + } + p->nowPos64 += nowPos32 - startPos32; + return Flush(p, nowPos32); +} + +#define kBigHashDicLimit ((uint32_t)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc *p, uint32_t keepWindowSize) +{ + uint32_t beforeSize = kNumOpts; + Bool btMode; + if (!RangeEnc_Alloc(&p->rc)) + return SZ_ERROR_MEM; + btMode = (p->matchFinderBase.btMode != 0); +#ifdef COMPRESS_MF_MT + p->mtMode = (p->multiThread && !p->fastMode && btMode); +#endif + + { + unsigned lclp = p->lc + p->lp; + if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) + { + LzmaEnc_FreeLits(p); + p->litProbs = (CLzmaProb *)malloc((0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb *)malloc((0x300 << lclp) * sizeof(CLzmaProb)); + if (p->litProbs == 0 || p->saveState.litProbs == 0) + { + LzmaEnc_FreeLits(p); + return SZ_ERROR_MEM; + } + p->lclp = lclp; + } + } + + p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); + + if (beforeSize + p->dictSize < keepWindowSize) + beforeSize = keepWindowSize - p->dictSize; + +#ifdef COMPRESS_MF_MT + if (p->mtMode) + { + RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, + LZMA_MATCH_LEN_MAX)); + p->matchFinderObj = &p->matchFinderMt; + MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); + } + else +#endif + { + if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, + LZMA_MATCH_LEN_MAX)) + return SZ_ERROR_MEM; + p->matchFinderObj = &p->matchFinderBase; + MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + } + return SZ_OK; +} + +void LzmaEnc_Init(CLzmaEnc *p) +{ + uint32_t i; + p->state = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + p->reps[i] = 0; + + RangeEnc_Init(&p->rc); + + for (i = 0; i < kNumStates; i++) + { + uint32_t j; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) + { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + + { + uint32_t num = 0x300 << (p->lp + p->lc); + for (i = 0; i < num; i++) + p->litProbs[i] = kProbInitValue; + } + + { + for (i = 0; i < kNumLenToPosStates; i++) + { + CLzmaProb *probs = p->posSlotEncoder[i]; + uint32_t j; + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + } + { + for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) + p->posEncoders[i] = kProbInitValue; + } + + LenEnc_Init(&p->lenEnc.p); + LenEnc_Init(&p->repLenEnc.p); + + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; + + p->optimumEndIndex = 0; + p->optimumCurrentIndex = 0; + p->additionalOffset = 0; + + p->pbMask = (1 << p->pb) - 1; + p->lpMask = (1 << p->lp) - 1; +} + +void LzmaEnc_InitPrices(CLzmaEnc *p) +{ + if (!p->fastMode) + { + FillDistancesPrices(p); + FillAlignPrices(p); + } + + p->lenEnc.tableSize = p->repLenEnc.tableSize = p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); +} + +static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, uint32_t keepWindowSize) +{ + uint32_t i; + for (i = 0; i < (uint32_t)kDicLogSizeMaxCompress; i++) + if (p->dictSize <= ((uint32_t)1 << i)) + break; + p->distTableSize = i * 2; + + p->finished = False; + p->result = SZ_OK; + RINOK(LzmaEnc_Alloc(p, keepWindowSize)); + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + p->nowPos64 = 0; + return SZ_OK; +} + +static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream *inStream, ISeqOutStream *outStream) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->inStream = inStream; + p->rc.outStream = outStream; + return LzmaEnc_AllocAndInit(p, 0); +} + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, uint32_t keepWindowSize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->inStream = inStream; + return LzmaEnc_AllocAndInit(p, keepWindowSize); +} + +static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const uint8_t *src, size_t srcLen) +{ + p->seqBufInStream.funcTable.Read = MyRead; + p->seqBufInStream.data = src; + p->seqBufInStream.rem = srcLen; +} + +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const uint8_t *src, size_t srcLen, + uint32_t keepWindowSize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + LzmaEnc_SetInputBuf(p, src, srcLen); + p->inStream = &p->seqBufInStream.funcTable; + return LzmaEnc_AllocAndInit(p, keepWindowSize); +} + +void LzmaEnc_Finish(CLzmaEncHandle pp) +{ +#ifdef COMPRESS_MF_MT + CLzmaEnc *p = (CLzmaEnc *)pp; + if (p->mtMode) + MatchFinderMt_ReleaseStream(&p->matchFinderMt); +#else + pp = pp; +#endif +} + +typedef struct _CSeqOutStreamBuf +{ + ISeqOutStream funcTable; + uint8_t *data; + size_t rem; + Bool overflow; +} CSeqOutStreamBuf; + +static size_t MyWrite(void *pp, const void *data, size_t size) +{ + CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp; + if (p->rem < size) + { + size = p->rem; + p->overflow = True; + } + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + return size; +} + +uint32_t LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +} + +const uint8_t *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +} + +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, uint8_t *dest, size_t *destLen, + uint32_t desiredPackSize, uint32_t *unpackSize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + uint64_t nowPos64; + SRes res; + CSeqOutStreamBuf outStream; + + outStream.funcTable.Write = MyWrite; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = False; + p->finished = False; + p->result = SZ_OK; + + if (reInit) + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + nowPos64 = p->nowPos64; + RangeEnc_Init(&p->rc); + p->rc.outStream = &outStream.funcTable; + + res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); + + *unpackSize = (uint32_t)(p->nowPos64 - nowPos64); + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + + return res; +} + +SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, + ICompressProgress *progress) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + SRes res = SZ_OK; + +#ifdef COMPRESS_MF_MT + Byte allocaDummy[0x300]; + int i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)i; +#endif + + RINOK(LzmaEnc_Prepare(pp, inStream, outStream)); + + for (;;) + { + res = LzmaEnc_CodeOneBlock(p, False, 0, 0); + if (res != SZ_OK || p->finished != 0) + break; + if (progress != 0) + { + res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + if (res != SZ_OK) + { + res = SZ_ERROR_PROGRESS; + break; + } + } + } + LzmaEnc_Finish(pp); + return res; +} + +SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, uint8_t *props, size_t *size) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + int i; + uint32_t dictSize = p->dictSize; + if (*size < LZMA_PROPS_SIZE) + return SZ_ERROR_PARAM; + *size = LZMA_PROPS_SIZE; + props[0] = (uint8_t)((p->pb * 5 + p->lp) * 9 + p->lc); + + for (i = 11; i <= 30; i++) + { + if (dictSize <= ((uint32_t)2 << i)) + { + dictSize = (2 << i); + break; + } + if (dictSize <= ((uint32_t)3 << i)) + { + dictSize = (3 << i); + break; + } + } + + for (i = 0; i < 4; i++) + props[1 + i] = (uint8_t)(dictSize >> (8 * i)); + return SZ_OK; +} + +SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, uint8_t *dest, size_t *destLen, const uint8_t *src, + size_t srcLen, int writeEndMark, ICompressProgress *progress) +{ + SRes res; + CLzmaEnc *p = (CLzmaEnc *)pp; + + CSeqOutStreamBuf outStream; + + LzmaEnc_SetInputBuf(p, src, srcLen); + + outStream.funcTable.Write = MyWrite; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = writeEndMark; + res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable, progress); + + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + return res; +} + +SRes LzmaEncode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t srcLen, + const CLzmaEncProps *props, uint8_t *propsEncoded, size_t *propsSize, + int writeEndMark, ICompressProgress *progress) +{ + CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(); + SRes res; + if (p == 0) + return SZ_ERROR_MEM; + + res = LzmaEnc_SetProps(p, props); + if (res == SZ_OK) + { + res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); + if (res == SZ_OK) + res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, writeEndMark, progress); + } + + LzmaEnc_Destroy(p); + return res; +} diff --git a/depends/lzma/pavlov/LzmaEnc.h b/depends/lzma/pavlov/LzmaEnc.h new file mode 100755 index 00000000..961436e4 --- /dev/null +++ b/depends/lzma/pavlov/LzmaEnc.h @@ -0,0 +1,71 @@ +/* LzmaEnc.h -- LZMA Encoder +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZMAENC_H +#define __LZMAENC_H + +#include "Types.h" + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaEncProps +{ + int level; /* 0 <= level <= 9 */ + uint32_t dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (1 << 30) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + uint32_t mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ +} CLzmaEncProps; + +void LzmaEncProps_Init(CLzmaEncProps *p); +void LzmaEncProps_Normalize(CLzmaEncProps *p); +uint32_t LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc_* functions can return the following exit codes: +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - Write callback error. + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +typedef void *CLzmaEncHandle; + +CLzmaEncHandle LzmaEnc_Create(); +void LzmaEnc_Destroy(CLzmaEncHandle p); +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, uint8_t *properties, size_t *size); +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, + ICompressProgress *progress); +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, uint8_t *dest, size_t *destLen, const uint8_t *src, + size_t srcLen, int writeEndMark, ICompressProgress *progress); + +/* ---------- One Call Interface ---------- */ + +/* LzmaEncode +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +SRes LzmaEncode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t srcLen, + const CLzmaEncProps *props, uint8_t *propsEncoded, size_t *propsSize, + int writeEndMark, ICompressProgress *progress); + +#endif diff --git a/depends/lzma/pavlov/LzmaLib.c b/depends/lzma/pavlov/LzmaLib.c new file mode 100755 index 00000000..6759d69b --- /dev/null +++ b/depends/lzma/pavlov/LzmaLib.c @@ -0,0 +1,41 @@ +/* LzmaLib.c -- LZMA library wrapper +2008-08-05 +Igor Pavlov +Public domain */ + +#include "LzmaEnc.h" +#include "LzmaDec.h" +#include "LzmaLib.h" + +MY_STDAPI +LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ) +{ + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = level; + props.dictSize = dictSize; + props.lc = lc; + props.lp = lp; + props.pb = pb; + props.fb = fb; + props.numThreads = numThreads; + + return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, NULL); +} + +MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, + size_t *srcLen, const unsigned char *props, size_t propsSize) +{ + ELzmaStatus status; + return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, + &status); +} diff --git a/depends/lzma/pavlov/LzmaLib.h b/depends/lzma/pavlov/LzmaLib.h new file mode 100755 index 00000000..804329d1 --- /dev/null +++ b/depends/lzma/pavlov/LzmaLib.h @@ -0,0 +1,137 @@ +/* LzmaLib.h -- LZMA library interface +2008-08-05 +Igor Pavlov +Public domain */ + +#ifndef __LZMALIB_H +#define __LZMALIB_H + +#include "Types.h" + +#ifdef __cplusplus +#define MY_EXTERN_C extern "C" +#else +#define MY_EXTERN_C extern +#endif + +#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL + +#define LZMA_PROPS_SIZE 5 + +/* +RAM requirements for LZMA: + for compression: (dictSize * 11.5 + 6 MB) + state_size + for decompression: dictSize + state_size + state_size = (4 + (1.5 << (lc + lp))) KB + by default (lc=3, lp=0), state_size = 16 KB. + +LZMA properties (5 bytes) format + Offset Size Description + 0 1 lc, lp and pb in encoded form. + 1 4 dictSize (little endian). +*/ + +/* +LzmaCompress +------------ + +outPropsSize - + In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = +LZMA_PROPS_SIZE = 5. + + LZMA Encoder will use defult values for any parameter, if it is + -1 for any from: level, loc, lp, pb, fb, numThreads + 0 for dictSize + +level - compression level: 0 <= level <= 9; + + level dictSize algo fb + 0: 16 KB 0 32 + 1: 64 KB 0 32 + 2: 256 KB 0 32 + 3: 1 MB 0 32 + 4: 4 MB 0 32 + 5: 16 MB 1 32 + 6: 32 MB 1 32 + 7+: 64 MB 1 64 + + The default value for "level" is 5. + + algo = 0 means fast method + algo = 1 means normal method + +dictSize - The dictionary size in bytes. The maximum value is + 128 MB = (1 << 27) bytes for 32-bit version + 1 GB = (1 << 30) bytes for 64-bit version + The default value is 16 MB = (1 << 24) bytes. + It's recommended to use the dictionary that is larger than 4 KB and + that can be calculated as (1 << N) or (3 << N) sizes. + +lc - The number of literal context bits (high bits of previous literal). + It can be in the range from 0 to 8. The default value is 3. + Sometimes lc=4 gives the gain for big files. + +lp - The number of literal pos bits (low bits of current position for literals). + It can be in the range from 0 to 4. The default value is 0. + The lp switch is intended for periodical data when the period is equal to 2^lp. + For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's + better to set lc=0, if you change lp switch. + +pb - The number of pos bits (low bits of current position). + It can be in the range from 0 to 4. The default value is 2. + The pb switch is intended for periodical data when the period is equal 2^pb. + +fb - Word size (the number of fast bytes). + It can be in the range from 5 to 273. The default value is 32. + Usually, a big number gives a little bit better compression ratio and + slower compression process. + +numThreads - The number of thereads. 1 or 2. The default value is 2. + Fast mode (algo = 0) can use only 1 thread. + +Out: + destLen - processed output size +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, + size_t srcLen, unsigned char *outProps, + size_t *outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); + +/* +LzmaUncompress +-------------- +In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size +Out: + destLen - processed output size + srcLen - processed input size +Returns: + SZ_OK - OK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation arror + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) +*/ + +MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, + size_t *srcLen, const unsigned char *props, size_t propsSize); + +#endif diff --git a/depends/lzma/pavlov/Types.h b/depends/lzma/pavlov/Types.h new file mode 100755 index 00000000..e75bcb4a --- /dev/null +++ b/depends/lzma/pavlov/Types.h @@ -0,0 +1,87 @@ +/* Types.h -- Basic types +2008-11-23 : Igor Pavlov : Public domain */ + +#pragma once + +#include +#include + +#ifdef _WIN32 +#include +#endif + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + +#ifndef RINOK +#define RINOK(x) \ + { \ + int __result__ = (x); \ + if (__result__ != 0) \ + return __result__; \ + } +#endif + +typedef int Bool; +#define True 1 +#define False 0 + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_CDECL __cdecl +#define MY_STD_CALL __stdcall +#define MY_FAST_CALL MY_NO_INLINE __fastcall + +#else + +#define MY_CDECL +#define MY_STD_CALL +#define MY_FAST_CALL + +#endif + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct +{ + SRes (*Read)(void *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +} ISeqInStream; + +typedef struct +{ + size_t (*Write)(void *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +} ISeqOutStream; + +typedef struct +{ + SRes (*Progress)(void *p, uint64_t inSize, uint64_t outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (uint64_t)(int64_t)-1 for size means unknown value. */ +} ICompressProgress; diff --git a/depends/lzma/wrapper/common_internal.c b/depends/lzma/wrapper/common_internal.c new file mode 100644 index 00000000..c9213ef4 --- /dev/null +++ b/depends/lzma/wrapper/common_internal.c @@ -0,0 +1,46 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + */ + +#include "common_internal.h" + +static void *elzmaAlloc(void *p, size_t size) +{ + struct elzma_alloc_struct *as = (struct elzma_alloc_struct *)p; + if (as->clientMallocFunc) + { + return as->clientMallocFunc(as->clientMallocContext, size); + } + return malloc(size); +} + +static void elzmaFree(void *p, void *address) +{ + struct elzma_alloc_struct *as = (struct elzma_alloc_struct *)p; + if (as->clientFreeFunc) + { + as->clientFreeFunc(as->clientMallocContext, address); + } + else + { + free(address); + } +} + +void init_alloc_struct(struct elzma_alloc_struct *as, elzma_malloc clientMallocFunc, + void *clientMallocContext, elzma_free clientFreeFunc, + void *clientFreeContext) +{ + as->Alloc = elzmaAlloc; + as->Free = elzmaFree; + as->clientMallocFunc = clientMallocFunc; + as->clientMallocContext = clientMallocContext; + as->clientFreeFunc = clientFreeFunc; + as->clientFreeContext = clientFreeContext; +} diff --git a/depends/lzma/wrapper/common_internal.h b/depends/lzma/wrapper/common_internal.h new file mode 100644 index 00000000..2c46fadf --- /dev/null +++ b/depends/lzma/wrapper/common_internal.h @@ -0,0 +1,60 @@ +#ifndef __ELZMA_COMMON_INTERNAL_H__ +#define __ELZMA_COMMON_INTERNAL_H__ + +#include "common.h" + +/** a structure which may be cast and passed into Igor's allocate + * routines */ +struct elzma_alloc_struct +{ + void *(*Alloc)(void *p, size_t size); + void (*Free)(void *p, void *address); /* address can be 0 */ + + elzma_malloc clientMallocFunc; + void *clientMallocContext; + + elzma_free clientFreeFunc; + void *clientFreeContext; +}; + +/* initialize an allocation structure, may be called safely multiple + * times */ +void init_alloc_struct(struct elzma_alloc_struct *allocStruct, elzma_malloc clientMallocFunc, + void *clientMallocContext, elzma_free clientFreeFunc, + void *clientFreeContext); + +/** superset representation of a compressed file header */ +struct elzma_file_header +{ + unsigned char pb; + unsigned char lp; + unsigned char lc; + unsigned char isStreamed; + long long unsigned int uncompressedSize; + unsigned int dictSize; +}; + +/** superset representation of a compressed file footer */ +struct elzma_file_footer +{ + unsigned int crc32; + long long unsigned int uncompressedSize; +}; + +/** a structure which encapsulates information about the particular + * file header and footer in use (lzip vs lzma vs (eventually) xz. + * The intention of this structure is to simplify compression and + * decompression logic by abstracting the file format details a bit. */ +struct elzma_format_handler +{ + unsigned int header_size; + void (*init_header)(struct elzma_file_header *hdr); + int (*parse_header)(const unsigned char *hdrBuf, struct elzma_file_header *hdr); + int (*serialize_header)(unsigned char *hdrBuf, const struct elzma_file_header *hdr); + + unsigned int footer_size; + int (*serialize_footer)(struct elzma_file_footer *ftr, unsigned char *ftrBuf); + int (*parse_footer)(const unsigned char *ftrBuf, struct elzma_file_footer *ftr); +}; + +#endif diff --git a/depends/lzma/wrapper/compress.c b/depends/lzma/wrapper/compress.c new file mode 100644 index 00000000..38ca0a68 --- /dev/null +++ b/depends/lzma/wrapper/compress.c @@ -0,0 +1,297 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + */ + +#include "compress.h" +#include "lzma_header.h" +#include "lzip_header.h" +#include "common_internal.h" + +#include "pavlov/Types.h" +#include "pavlov/LzmaEnc.h" +#include "pavlov/7zCrc.h" + +#include + +struct _elzma_compress_handle +{ + CLzmaEncProps props; + CLzmaEncHandle encHand; + unsigned long long uncompressedSize; + elzma_file_format format; + struct elzma_alloc_struct allocStruct; + struct elzma_format_handler formatHandler; +}; + +elzma_compress_handle elzma_compress_alloc() +{ + elzma_compress_handle hand = malloc(sizeof(struct _elzma_compress_handle)); + memset((void *)hand, 0, sizeof(struct _elzma_compress_handle)); + + /* "reasonable" defaults for props */ + LzmaEncProps_Init(&(hand->props)); + hand->props.lc = 3; + hand->props.lp = 0; + hand->props.pb = 2; + hand->props.level = 5; + hand->props.algo = 1; + hand->props.fb = 32; + hand->props.dictSize = 1 << 24; + hand->props.btMode = 1; + hand->props.numHashBytes = 4; + hand->props.mc = 32; + hand->props.numThreads = 1; + hand->props.writeEndMark = 1; + + init_alloc_struct(&(hand->allocStruct), NULL, NULL, NULL, NULL); + + /* default format is LZMA-Alone */ + initializeLZMAFormatHandler(&(hand->formatHandler)); + + return hand; +} + +void elzma_compress_free(elzma_compress_handle *hand) +{ + if (hand && *hand) + { + if ((*hand)->encHand) + { + LzmaEnc_Destroy((*hand)->encHand); + } + } + *hand = NULL; +} + +int elzma_compress_config(elzma_compress_handle hand, unsigned char lc, unsigned char lp, + unsigned char pb, unsigned char level, unsigned int dictionarySize, + elzma_file_format format, unsigned long long uncompressedSize) +{ + /* XXX: validate arguments are in valid ranges */ + + hand->props.lc = lc; + hand->props.lp = lp; + hand->props.pb = pb; + hand->props.level = level; + hand->props.dictSize = dictionarySize; + hand->uncompressedSize = uncompressedSize; + hand->format = format; + + /* default of LZMA-Alone is set at alloc time, and there are only + * two possible formats */ + if (format == ELZMA_lzip) + { + initializeLZIPFormatHandler(&(hand->formatHandler)); + } + + return ELZMA_E_OK; +} + +/* use Igor's stream hooks for compression. */ +struct elzmaInStream +{ + SRes (*ReadPtr)(void *p, void *buf, size_t *size); + elzma_read_callback inputStream; + void *inputContext; + unsigned int crc32; + unsigned int crc32a; + unsigned int crc32b; + unsigned int crc32c; + int calculateCRC; +}; + +static SRes elzmaReadFunc(void *p, void *buf, size_t *size) +{ + int rv; + struct elzmaInStream *is = (struct elzmaInStream *)p; + rv = is->inputStream(is->inputContext, buf, size); + if (rv == 0 && *size > 0 && is->calculateCRC) + { + is->crc32 = CrcUpdate(is->crc32, buf, *size); + } + return rv; +} + +struct elzmaOutStream +{ + size_t (*WritePtr)(void *p, const void *buf, size_t size); + elzma_write_callback outputStream; + void *outputContext; +}; + +static size_t elzmaWriteFunc(void *p, const void *buf, size_t size) +{ + struct elzmaOutStream *os = (struct elzmaOutStream *)p; + return os->outputStream(os->outputContext, buf, size); +} + +/* use Igor's stream hooks for compression. */ +struct elzmaProgressStruct +{ + SRes (*Progress)(void *p, uint64_t inSize, uint64_t outSize); + long long unsigned int uncompressedSize; + elzma_progress_callback progressCallback; + void *progressContext; +}; + +#include +static SRes elzmaProgress(void *p, uint64_t inSize, uint64_t outSize) +{ + struct elzmaProgressStruct *ps = (struct elzmaProgressStruct *)p; + if (ps->progressCallback) + { + ps->progressCallback(ps->progressContext, inSize, ps->uncompressedSize); + } + return SZ_OK; +} + +void elzma_compress_set_allocation_callbacks(elzma_compress_handle hand, + elzma_malloc mallocFunc, void *mallocFuncContext, + elzma_free freeFunc, void *freeFuncContext) +{ + if (hand) + { + init_alloc_struct(&(hand->allocStruct), mallocFunc, mallocFuncContext, freeFunc, + freeFuncContext); + } +} + +int elzma_compress_run(elzma_compress_handle hand, elzma_read_callback inputStream, + void *inputContext, elzma_write_callback outputStream, + void *outputContext, elzma_progress_callback progressCallback, + void *progressContext) +{ + struct elzmaInStream inStreamStruct; + struct elzmaOutStream outStreamStruct; + struct elzmaProgressStruct progressStruct; + SRes r; + + CrcGenerateTable(); + + if (hand == NULL || inputStream == NULL) + return ELZMA_E_BAD_PARAMS; + + /* initialize stream structrures */ + inStreamStruct.ReadPtr = elzmaReadFunc; + inStreamStruct.inputStream = inputStream; + inStreamStruct.inputContext = inputContext; + inStreamStruct.crc32 = CRC_INIT_VAL; + inStreamStruct.calculateCRC = (hand->formatHandler.serialize_footer != NULL); + + outStreamStruct.WritePtr = elzmaWriteFunc; + outStreamStruct.outputStream = outputStream; + outStreamStruct.outputContext = outputContext; + + progressStruct.Progress = elzmaProgress; + progressStruct.uncompressedSize = hand->uncompressedSize; + progressStruct.progressCallback = progressCallback; + progressStruct.progressContext = progressContext; + + /* create an encoding object */ + hand->encHand = LzmaEnc_Create(); + + if (hand->encHand == NULL) + { + return ELZMA_E_COMPRESS_ERROR; + } + + /* inintialize with compression parameters */ + if (SZ_OK != LzmaEnc_SetProps(hand->encHand, &(hand->props))) + { + return ELZMA_E_BAD_PARAMS; + } + + /* verify format is sane */ + if (ELZMA_lzma != hand->format && ELZMA_lzip != hand->format) + { + return ELZMA_E_UNSUPPORTED_FORMAT; + } + + /* now write the compression header header */ + { + unsigned char *hdr = + hand->allocStruct.Alloc(&(hand->allocStruct), hand->formatHandler.header_size); + + struct elzma_file_header h; + size_t wt; + + hand->formatHandler.init_header(&h); + h.pb = (unsigned char)hand->props.pb; + h.lp = (unsigned char)hand->props.lp; + h.lc = (unsigned char)hand->props.lc; + h.dictSize = hand->props.dictSize; + h.isStreamed = (unsigned char)(hand->uncompressedSize == 0); + h.uncompressedSize = hand->uncompressedSize; + + hand->formatHandler.serialize_header(hdr, &h); + + wt = outputStream(outputContext, (void *)hdr, hand->formatHandler.header_size); + + hand->allocStruct.Free(&(hand->allocStruct), hdr); + + if (wt != hand->formatHandler.header_size) + { + return ELZMA_E_OUTPUT_ERROR; + } + } + + /* begin LZMA encoding */ + /* XXX: expose encoding progress */ + r = LzmaEnc_Encode(hand->encHand, (ISeqOutStream *)&outStreamStruct, + (ISeqInStream *)&inStreamStruct, (ICompressProgress *)&progressStruct); + + if (r != SZ_OK) + return ELZMA_E_COMPRESS_ERROR; + + /* support a footer! (lzip) */ + if (hand->formatHandler.serialize_footer != NULL && hand->formatHandler.footer_size > 0) + { + size_t wt; + unsigned char *ftrBuf = + hand->allocStruct.Alloc(&(hand->allocStruct), hand->formatHandler.footer_size); + struct elzma_file_footer ftr; + ftr.crc32 = inStreamStruct.crc32 ^ 0xFFFFFFFF; + ftr.uncompressedSize = hand->uncompressedSize; + + hand->formatHandler.serialize_footer(&ftr, ftrBuf); + + wt = outputStream(outputContext, (void *)ftrBuf, hand->formatHandler.footer_size); + + hand->allocStruct.Free(&(hand->allocStruct), ftrBuf); + + if (wt != hand->formatHandler.footer_size) + { + return ELZMA_E_OUTPUT_ERROR; + } + } + + return ELZMA_E_OK; +} + +unsigned int elzma_get_dict_size(unsigned long long size) +{ + int i = 13; /* 16k dict is minimum */ + + /* now we'll find the closes power of two with a max at 16< * + * if the size is greater than 8m, we'll divide by two, all of this + * is based on a quick set of emperical tests on hopefully + * representative sample data */ + if (size > (1 << 23)) + size >>= 1; + + while (size >> i) + i++; + + if (i > 23) + return 1 << 23; + + /* now 1 << i is greater than size, let's return either 1< (size - (1 << (i - 1)))) ? i - 1 : i); +} diff --git a/depends/lzma/wrapper/decompress.c b/depends/lzma/wrapper/decompress.c new file mode 100644 index 00000000..65ff9119 --- /dev/null +++ b/depends/lzma/wrapper/decompress.c @@ -0,0 +1,263 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + */ + +#include "include/decompress.h" +#include "pavlov/LzmaDec.h" +#include "pavlov/7zCrc.h" +#include "common_internal.h" +#include "lzma_header.h" +#include "lzip_header.h" + +#include +#include + +#define ELZMA_DECOMPRESS_INPUT_BUFSIZE (1024 * 64) +#define ELZMA_DECOMPRESS_OUTPUT_BUFSIZE (1024 * 256) + +/** an opaque handle to an lzma decompressor */ +struct _elzma_decompress_handle +{ + char inbuf[ELZMA_DECOMPRESS_INPUT_BUFSIZE]; + char outbuf[ELZMA_DECOMPRESS_OUTPUT_BUFSIZE]; + struct elzma_alloc_struct allocStruct; +}; + +elzma_decompress_handle elzma_decompress_alloc() +{ + elzma_decompress_handle hand = malloc(sizeof(struct _elzma_decompress_handle)); + memset((void *)hand, 0, sizeof(struct _elzma_decompress_handle)); + init_alloc_struct(&(hand->allocStruct), NULL, NULL, NULL, NULL); + return hand; +} + +void elzma_decompress_set_allocation_callbacks(elzma_decompress_handle hand, + elzma_malloc mallocFunc, void *mallocFuncContext, + elzma_free freeFunc, void *freeFuncContext) +{ + if (hand) + { + init_alloc_struct(&(hand->allocStruct), mallocFunc, mallocFuncContext, freeFunc, + freeFuncContext); + } +} + +void elzma_decompress_free(elzma_decompress_handle *hand) +{ + if (*hand) + free(*hand); + *hand = NULL; +} + +int elzma_decompress_run(elzma_decompress_handle hand, elzma_read_callback inputStream, + void *inputContext, elzma_write_callback outputStream, + void *outputContext, elzma_file_format format) +{ + unsigned long long int totalRead = 0; /* total amount read from stream */ + unsigned int crc32 = CRC_INIT_VAL; /* running crc32 (lzip case) */ + CLzmaDec dec; + unsigned int errorCode = ELZMA_E_OK; + struct elzma_format_handler formatHandler; + struct elzma_file_header h; + struct elzma_file_footer f; + + /* switch between supported formats */ + if (format == ELZMA_lzma) + { + initializeLZMAFormatHandler(&formatHandler); + } + else if (format == ELZMA_lzip) + { + CrcGenerateTable(); + initializeLZIPFormatHandler(&formatHandler); + } + else + { + return ELZMA_E_BAD_PARAMS; + } + + /* initialize footer */ + f.crc32 = 0; + f.uncompressedSize = 0; + + /* initialize decoder memory */ + memset((void *)&dec, 0, sizeof(dec)); + LzmaDec_Init(&dec); + + /* decode the header. */ + { + unsigned char *hdr = + hand->allocStruct.Alloc(&(hand->allocStruct), formatHandler.header_size); + + size_t sz = formatHandler.header_size; + + formatHandler.init_header(&h); + + if (inputStream(inputContext, hdr, &sz) != 0 || sz != formatHandler.header_size) + { + hand->allocStruct.Free(&(hand->allocStruct), hdr); + return ELZMA_E_INPUT_ERROR; + } + + if (0 != formatHandler.parse_header(hdr, &h)) + { + hand->allocStruct.Free(&(hand->allocStruct), hdr); + return ELZMA_E_CORRUPT_HEADER; + } + + /* the LzmaDec_Allocate call requires 5 bytes which have + * compression properties encoded in them. In the case of + * lzip, the header format does not already contain what + * LzmaDec_Allocate expects, so we must craft it, silly */ + { + unsigned char propsBuf[13]; + const unsigned char *propsPtr = hdr; + + if (format == ELZMA_lzip) + { + struct elzma_format_handler lzmaHand; + initializeLZMAFormatHandler(&lzmaHand); + lzmaHand.serialize_header(propsBuf, &h); + propsPtr = propsBuf; + } + + /* now we're ready to allocate the decoder */ + LzmaDec_Allocate(&dec, propsPtr, 5); + } + + hand->allocStruct.Free(&(hand->allocStruct), hdr); + } + + /* perform the decoding */ + for (;;) + { + size_t dstLen = ELZMA_DECOMPRESS_OUTPUT_BUFSIZE; + size_t srcLen = ELZMA_DECOMPRESS_INPUT_BUFSIZE; + size_t amt = 0; + size_t bufOff = 0; + ELzmaStatus stat; + + if (0 != inputStream(inputContext, hand->inbuf, &srcLen)) + { + errorCode = ELZMA_E_INPUT_ERROR; + goto decompressEnd; + } + + /* handle the case where the input prematurely finishes */ + if (srcLen == 0) + { + errorCode = ELZMA_E_INSUFFICIENT_INPUT; + goto decompressEnd; + } + + amt = srcLen; + + /* handle the case where a single read buffer of compressed bytes + * will translate into multiple buffers of uncompressed bytes, + * with this inner loop */ + stat = LZMA_STATUS_NOT_SPECIFIED; + + while (bufOff < srcLen) + { + SRes r = LzmaDec_DecodeToBuf(&dec, (uint8_t *)hand->outbuf, &dstLen, + ((uint8_t *)hand->inbuf + bufOff), &amt, + LZMA_FINISH_ANY, &stat); + + /* XXX deal with result code more granularly*/ + if (r != SZ_OK) + { + errorCode = ELZMA_E_DECOMPRESS_ERROR; + goto decompressEnd; + } + + /* write what we've read */ + { + size_t wt; + + /* if decoding lzip, update our crc32 value */ + if (format == ELZMA_lzip && dstLen > 0) + { + crc32 = CrcUpdate(crc32, hand->outbuf, dstLen); + } + totalRead += dstLen; + + wt = outputStream(outputContext, hand->outbuf, dstLen); + if (wt != dstLen) + { + errorCode = ELZMA_E_OUTPUT_ERROR; + goto decompressEnd; + } + } + + /* do we have more data on the input buffer? */ + bufOff += amt; + assert(bufOff <= srcLen); + if (bufOff >= srcLen) + break; + amt = srcLen - bufOff; + + /* with lzip, we will have the footer left on the buffer! */ + if (stat == LZMA_STATUS_FINISHED_WITH_MARK) + { + break; + } + } + + /* now check status */ + if (stat == LZMA_STATUS_FINISHED_WITH_MARK) + { + /* read a footer if one is expected and + * present */ + if (formatHandler.footer_size > 0 && amt >= formatHandler.footer_size && + formatHandler.parse_footer != NULL) + { + formatHandler.parse_footer((unsigned char *)hand->inbuf + bufOff, &f); + } + + break; + } + /* for LZMA utils, we don't always have a finished mark */ + if (!h.isStreamed && totalRead >= h.uncompressedSize) + { + break; + } + } + + /* finish the calculated crc32 */ + crc32 ^= 0xFFFFFFFF; + + /* if we have a footer, check that the calculated crc32 matches + * the encoded crc32, and that the sizes match */ + if (formatHandler.footer_size) + { + if (f.crc32 != crc32) + { + errorCode = ELZMA_E_CRC32_MISMATCH; + } + else if (f.uncompressedSize != totalRead) + { + errorCode = ELZMA_E_SIZE_MISMATCH; + } + } + else if (!h.isStreamed) + { + /* if the format does not support a footer and has an uncompressed + * size in the header, let's compare that with how much we actually + * read */ + if (h.uncompressedSize != totalRead) + { + errorCode = ELZMA_E_SIZE_MISMATCH; + } + } + +decompressEnd: + LzmaDec_Free(&dec); + + return errorCode; +} diff --git a/depends/lzma/wrapper/lzip_header.c b/depends/lzma/wrapper/lzip_header.c new file mode 100644 index 00000000..39872813 --- /dev/null +++ b/depends/lzma/wrapper/lzip_header.c @@ -0,0 +1,96 @@ +#include "lzip_header.h" + +#include + +#define ELZMA_LZIP_HEADER_SIZE 6 +#define ELZMA_LZIP_FOOTER_SIZE 12 + +static void initLzipHeader(struct elzma_file_header *hdr) +{ + memset((void *)hdr, 0, sizeof(struct elzma_file_header)); +} + +static int parseLzipHeader(const unsigned char *hdrBuf, struct elzma_file_header *hdr) +{ + if (0 != strncmp("LZIP", (char *)hdrBuf, 4)) + return 1; + /* XXX: ignore version for now */ + hdr->pb = 2; + hdr->lp = 0; + hdr->lc = 3; + /* unknown at this point */ + hdr->isStreamed = 1; + hdr->uncompressedSize = 0; + hdr->dictSize = 1 << (hdrBuf[5] & 0x1F); + return 0; +} + +static int serializeLzipHeader(unsigned char *hdrBuf, const struct elzma_file_header *hdr) +{ + hdrBuf[0] = 'L'; + hdrBuf[1] = 'Z'; + hdrBuf[2] = 'I'; + hdrBuf[3] = 'P'; + hdrBuf[4] = 0; + { + int r = 0; + while ((hdr->dictSize >> r) != 0) + r++; + hdrBuf[5] = (unsigned char)(r - 1) & 0x1F; + } + return 0; +} + +static int serializeLzipFooter(struct elzma_file_footer *ftr, unsigned char *ftrBuf) +{ + unsigned int i = 0; + + /* first crc32 */ + for (i = 0; i < 4; i++) + { + *(ftrBuf++) = (unsigned char)(ftr->crc32 >> (i * 8)); + } + + /* next data size */ + for (i = 0; i < 8; i++) + { + *(ftrBuf++) = (unsigned char)(ftr->uncompressedSize >> (i * 8)); + } + + /* write version 0 files, omit member length for now*/ + + return 0; +} + +static int parseLzipFooter(const unsigned char *ftrBuf, struct elzma_file_footer *ftr) +{ + unsigned int i = 0; + ftr->crc32 = 0; + ftr->uncompressedSize = 0; + + /* first crc32 */ + for (i = 0; i < 4; i++) + { + ftr->crc32 += ((unsigned int)*(ftrBuf++) << (i * 8)); + } + + /* next data size */ + for (i = 0; i < 8; i++) + { + ftr->uncompressedSize += (unsigned long long)*(ftrBuf++) << (i * 8); + } + /* read version 0 files, omit member length for now*/ + + return 0; +} + +void initializeLZIPFormatHandler(struct elzma_format_handler *hand) +{ + hand->header_size = ELZMA_LZIP_HEADER_SIZE; + hand->init_header = initLzipHeader; + hand->parse_header = parseLzipHeader; + hand->serialize_header = serializeLzipHeader; + hand->footer_size = ELZMA_LZIP_FOOTER_SIZE; + hand->serialize_footer = serializeLzipFooter; + hand->parse_footer = parseLzipFooter; +} diff --git a/depends/lzma/wrapper/lzip_header.h b/depends/lzma/wrapper/lzip_header.h new file mode 100644 index 00000000..138afa60 --- /dev/null +++ b/depends/lzma/wrapper/lzip_header.h @@ -0,0 +1,11 @@ +#ifndef __EASYLZMA_LZIP_HEADER__ +#define __EASYLZMA_LZIP_HEADER__ + +#include "common_internal.h" + +/* lzip file format documented here: + * http://download.savannah.gnu.org/releases-noredirect/lzip/manual/ */ + +void initializeLZIPFormatHandler(struct elzma_format_handler *hand); + +#endif diff --git a/depends/lzma/wrapper/lzma_header.c b/depends/lzma/wrapper/lzma_header.c new file mode 100644 index 00000000..ab32549f --- /dev/null +++ b/depends/lzma/wrapper/lzma_header.c @@ -0,0 +1,134 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + */ + +/* XXX: clean this up, it's mostly lifted from pavel */ + +#include "lzma_header.h" + +#include +#include + +#define ELZMA_LZMA_HEADER_SIZE 13 +#define ELZMA_LZMA_PROPSBUF_SIZE 5 + +/**************** + Header parsing + ****************/ + +#ifndef UINT64_MAX +#define UINT64_MAX ((unsigned long long)-1) +#endif + +/* Parse the properties byte */ +static char lzmadec_header_properties(unsigned char *pb, unsigned char *lp, unsigned char *lc, + const unsigned char c) +{ + /* pb, lp and lc are encoded into a single byte. */ + if (c > (9 * 5 * 5)) + return -1; + *pb = c / (9 * 5); /* 0 <= pb <= 4 */ + *lp = (c % (9 * 5)) / 9; /* 0 <= lp <= 4 */ + *lc = c % 9; /* 0 <= lc <= 8 */ + + assert(*pb < 5 && *lp < 5 && *lc < 9); + return 0; +} + +/* Parse the dictionary size (4 bytes, little endian) */ +static char lzmadec_header_dictionary(unsigned int *size, const unsigned char *buffer) +{ + unsigned int i; + *size = 0; + for (i = 0; i < 4; i++) + *size += (unsigned int)(*buffer++) << (i * 8); + /* The dictionary size is limited to 256 MiB (checked from + * LZMA SDK 4.30) */ + if (*size > (1 << 28)) + return -1; + return 0; +} + +/* Parse the uncompressed size field (8 bytes, little endian) */ +static void lzmadec_header_uncompressed(unsigned long long *size, unsigned char *is_streamed, + const unsigned char *buffer) +{ + unsigned int i; + + /* Streamed files have all 64 bits set in the size field. + * We don't know the uncompressed size beforehand. */ + *is_streamed = 1; /* Assume streamed. */ + *size = 0; + for (i = 0; i < 8; i++) + { + *size += (unsigned long long)buffer[i] << (i * 8); + if (buffer[i] != 255) + *is_streamed = 0; + } + assert((*is_streamed == 1 && *size == UINT64_MAX) || + (*is_streamed == 0 && *size < UINT64_MAX)); +} + +static void initLzmaHeader(struct elzma_file_header *hdr) +{ + memset((void *)hdr, 0, sizeof(struct elzma_file_header)); +} + +static int parseLzmaHeader(const unsigned char *hdrBuf, struct elzma_file_header *hdr) +{ + if (lzmadec_header_properties(&(hdr->pb), &(hdr->lp), &(hdr->lc), *hdrBuf) || + lzmadec_header_dictionary(&(hdr->dictSize), hdrBuf + 1)) + { + return 1; + } + lzmadec_header_uncompressed(&(hdr->uncompressedSize), &(hdr->isStreamed), hdrBuf + 5); + + return 0; +} + +static int serializeLzmaHeader(unsigned char *hdrBuf, const struct elzma_file_header *hdr) +{ + unsigned int i; + + memset((void *)hdrBuf, 0, ELZMA_LZMA_HEADER_SIZE); + + /* encode lc, pb, and lp */ + *hdrBuf++ = hdr->lc + (hdr->pb * 45) + (hdr->lp * 45 * 9); + + /* encode dictionary size */ + for (i = 0; i < 4; i++) + { + *(hdrBuf++) = (unsigned char)(hdr->dictSize >> (i * 8)); + } + + /* encode uncompressed size */ + for (i = 0; i < 8; i++) + { + if (hdr->isStreamed) + { + *(hdrBuf++) = 0xff; + } + else + { + *(hdrBuf++) = (unsigned char)(hdr->uncompressedSize >> (i * 8)); + } + } + + return 0; +} + +void initializeLZMAFormatHandler(struct elzma_format_handler *hand) +{ + hand->header_size = ELZMA_LZMA_HEADER_SIZE; + hand->init_header = initLzmaHeader; + hand->parse_header = parseLzmaHeader; + hand->serialize_header = serializeLzmaHeader; + hand->footer_size = 0; + hand->serialize_footer = NULL; +} diff --git a/depends/lzma/wrapper/lzma_header.h b/depends/lzma/wrapper/lzma_header.h new file mode 100644 index 00000000..6a5d7a9c --- /dev/null +++ b/depends/lzma/wrapper/lzma_header.h @@ -0,0 +1,10 @@ +#ifndef __EASYLZMA_LZMA_HEADER__ +#define __EASYLZMA_LZMA_HEADER__ + +#include "common_internal.h" + +/* LZMA-Alone header format gleaned from reading Igor's code */ + +void initializeLZMAFormatHandler(struct elzma_format_handler *hand); + +#endif diff --git a/depends/lzma/wrapper/simple.c b/depends/lzma/wrapper/simple.c new file mode 100644 index 00000000..98d3c285 --- /dev/null +++ b/depends/lzma/wrapper/simple.c @@ -0,0 +1,139 @@ +/* + * Written in 2009 by Lloyd Hilaiel + * + * License + * + * All the cruft you find here is public domain. You don't have to credit + * anyone to use this code, but my personal request is that you mention + * Igor Pavlov for his hard, high quality work. + * + * simple.c - a wrapper around easylzma to compress/decompress to memory + */ + +#include "simple.h" + +#include +#include + +struct dataStream +{ + const unsigned char *inData; + size_t inLen; + + unsigned char *outData; + size_t outLen; +}; + +static int inputCallback(void *ctx, void *buf, size_t *size) +{ + size_t rd = 0; + struct dataStream *ds = (struct dataStream *)ctx; + assert(ds != NULL); + + rd = (ds->inLen < *size) ? ds->inLen : *size; + + if (rd > 0) + { + memcpy(buf, (void *)ds->inData, rd); + ds->inData += rd; + ds->inLen -= rd; + } + + *size = rd; + + return 0; +} + +static size_t outputCallback(void *ctx, const void *buf, size_t size) +{ + struct dataStream *ds = (struct dataStream *)ctx; + assert(ds != NULL); + + if (size > 0) + { + ds->outData = realloc(ds->outData, ds->outLen + size); + memcpy((void *)(ds->outData + ds->outLen), buf, size); + ds->outLen += size; + } + + return size; +} + +int simpleCompress(elzma_file_format format, const unsigned char *inData, size_t inLen, + unsigned char **outData, size_t *outLen) +{ + int rc; + elzma_compress_handle hand; + + /* allocate compression handle */ + hand = elzma_compress_alloc(); + assert(hand != NULL); + + rc = elzma_compress_config(hand, ELZMA_LC_DEFAULT, ELZMA_LP_DEFAULT, ELZMA_PB_DEFAULT, 5, + (1 << 20) /* 1mb */, format, inLen); + + if (rc != ELZMA_E_OK) + { + elzma_compress_free(&hand); + return rc; + } + + /* now run the compression */ + { + struct dataStream ds; + ds.inData = inData; + ds.inLen = inLen; + ds.outData = NULL; + ds.outLen = 0; + + rc = elzma_compress_run(hand, inputCallback, (void *)&ds, outputCallback, (void *)&ds, + NULL, NULL); + + if (rc != ELZMA_E_OK) + { + if (ds.outData != NULL) + free(ds.outData); + elzma_compress_free(&hand); + return rc; + } + + *outData = ds.outData; + *outLen = ds.outLen; + } + + return rc; +} + +int simpleDecompress(elzma_file_format format, const unsigned char *inData, size_t inLen, + unsigned char **outData, size_t *outLen) +{ + int rc; + elzma_decompress_handle hand; + + hand = elzma_decompress_alloc(); + + /* now run the compression */ + { + struct dataStream ds; + ds.inData = inData; + ds.inLen = inLen; + ds.outData = NULL; + ds.outLen = 0; + + rc = elzma_decompress_run(hand, inputCallback, (void *)&ds, outputCallback, (void *)&ds, + format); + + if (rc != ELZMA_E_OK) + { + if (ds.outData != NULL) + free(ds.outData); + elzma_decompress_free(&hand); + return rc; + } + + *outData = ds.outData; + *outLen = ds.outLen; + } + + return rc; +} diff --git a/depends/pack200/CMakeLists.txt b/depends/pack200/CMakeLists.txt new file mode 100644 index 00000000..1befa352 --- /dev/null +++ b/depends/pack200/CMakeLists.txt @@ -0,0 +1,43 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) + +project(unpack200) + +# Find ZLIB for quazip +# Use system zlib on unix and Qt ZLIB on Windows +IF(UNIX) + find_package(ZLIB REQUIRED) +ELSE(UNIX) + get_filename_component (ZLIB_FOUND_DIR "${Qt5Core_DIR}/../../../include/QtZlib" ABSOLUTE) + SET(ZLIB_INCLUDE_DIRS ${ZLIB_FOUND_DIR} CACHE PATH "Path to ZLIB headers of Qt") + SET(ZLIB_LIBRARIES "") + IF(NOT EXISTS "${ZLIB_INCLUDE_DIRS}/zlib.h") + MESSAGE("Please specify a valid zlib include dir") + ENDIF(NOT EXISTS "${ZLIB_INCLUDE_DIRS}/zlib.h") +ENDIF(UNIX) + +SET(PACK200_SRC +src/bands.cpp +src/bands.h +src/bytes.cpp +src/bytes.h +src/coding.cpp +src/coding.h +src/constants.h +src/defines.h +src/main.cpp +src/unpack.cpp +src/unpack.h +src/utils.cpp +src/utils.h +src/zip.cpp +src/zip.h +) + +add_executable(unpack200 ${PACK200_SRC}) + +IF(UNIX) + target_link_libraries(unpack200 ${ZLIB_LIBRARIES}) +ELSE() + # zlib is part of Qt on windows. use it. + QT5_USE_MODULES(unpack200 Core) +ENDIF() diff --git a/depends/pack200/include/unpack200.h b/depends/pack200/include/unpack200.h new file mode 100644 index 00000000..8d1c8b69 --- /dev/null +++ b/depends/pack200/include/unpack200.h @@ -0,0 +1 @@ + diff --git a/depends/pack200/src/bands.cpp b/depends/pack200/src/bands.cpp new file mode 100644 index 00000000..41547ad1 --- /dev/null +++ b/depends/pack200/src/bands.cpp @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// -*- C++ -*- +// Small program for unpacking specially compressed Java packages. +// John R. Rose + +#include + +#include +#include +#include +#include +#include + +#include "defines.h" +#include "bytes.h" +#include "utils.h" +#include "coding.h" +#include "bands.h" + +#include "constants.h" +#include "unpack.h" + +inline void band::abort(const char *msg) +{ + u->abort(msg); +} +inline bool band::aborting() +{ + return u->aborting(); +} + +void band::readData(int expectedLength) +{ + CHECK; + assert(expectedLength >= 0); + assert(vs[0].cmk == cmk_ERROR); + if (expectedLength != 0) + { + assert(length == 0); + length = expectedLength; + } + if (length == 0) + { + assert((rplimit = cm.vs0.rp = u->rp) != nullptr); + return; + } + assert(length > 0); + + bool is_BYTE1 = (defc->spec == BYTE1_spec); + + if (is_BYTE1) + { + // No possibility of coding change. Sizing is exact. + u->ensure_input(length); + } + else + { + // Make a conservatively generous estimate of band size in bytes. + // Assume B == 5 everywhere. + // Assume awkward pop with all {U} values (2*5 per value) + jlong generous = (jlong)length * (B_MAX * 3 + 1) + C_SLOP; + u->ensure_input(generous); + } + + // Read one value to see what it might be. + int XB = _meta_default; + int cp1 = 0, cp2 = 0; + if (!is_BYTE1) + { + // must be a variable-length coding + assert(defc->B() > 1 && defc->L() > 0); + // must have already read from previous band: + assert(bn >= BAND_LIMIT || bn <= 0 || bn == e_cp_Utf8_big_chars || + endsWith(name, "_lo") // preceded by _hi conditional band + || + bn == e_file_options // preceded by conditional band + || + u->rp == u->all_bands[bn - 1].maxRP() || u->all_bands[bn - 1].defc == nullptr); + + value_stream xvs; + coding *valc = defc; + if (valc->D() != 0) + { + valc = coding::findBySpec(defc->B(), defc->H(), defc->S()); + assert(!valc->isMalloc); + } + xvs.init(u->rp, u->rplimit, valc); + CHECK; + int X = xvs.getInt(); + if (valc->S() != 0) + { + assert(valc->min <= -256); + XB = -1 - X; + } + else + { + int L = valc->L(); + assert(valc->max >= L + 255); + XB = X - L; + } + if (0 <= XB && XB < 256) + { + // Skip over the escape value. + u->rp = xvs.rp; + cp1 = 1; + } + else + { + // No, it's still default. + XB = _meta_default; + } + } + + if (XB <= _meta_canon_max) + { + byte XB_byte = (byte)XB; + byte *XB_ptr = &XB_byte; + cm.init(u->rp, u->rplimit, XB_ptr, 0, defc, length, nullptr); + CHECK; + } + else + { + assert(u->meta_rp != nullptr); + // Scribble the initial byte onto the band. + byte *save_meta_rp = --u->meta_rp; + byte save_meta_xb = (*save_meta_rp); + (*save_meta_rp) = (byte)XB; + cm.init(u->rp, u->rplimit, u->meta_rp, 0, defc, length, nullptr); + (*save_meta_rp) = save_meta_xb; // put it back, just to be tidy + } + rplimit = u->rp; + + rewind(); +} + +void band::setIndex(cpindex *ix_) +{ + assert(ix_ == nullptr || ixTag == ix_->ixTag); + ix = ix_; +} +void band::setIndexByTag(byte tag) +{ + setIndex(u->cp.getIndex(tag)); +} + +entry *band::getRefCommon(cpindex *ix_, bool nullOKwithCaller) +{ + CHECK_0; + assert(ix_->ixTag == ixTag || + (ixTag == CONSTANT_Literal && ix_->ixTag >= CONSTANT_Integer && + ix_->ixTag <= CONSTANT_String)); + int n = vs[0].getInt() - nullOK; + // Note: band-local nullOK means nullptr encodes as 0. + // But nullOKwithCaller means caller is willing to tolerate a nullptr. + entry *ref = ix_->get(n); + if (ref == nullptr && !(nullOKwithCaller && n == -1)) + abort(n == -1 ? "nullptr ref" : "bad ref"); + return ref; +} + +jlong band::getLong(band &lo_band, bool have_hi) +{ + band &hi_band = (*this); + assert(lo_band.bn == hi_band.bn + 1); + uint lo = lo_band.getInt(); + if (!have_hi) + { + assert(hi_band.length == 0); + return makeLong(0, lo); + } + uint hi = hi_band.getInt(); + return makeLong(hi, lo); +} + +int band::getIntTotal() +{ + CHECK_0; + if (length == 0) + return 0; + if (total_memo > 0) + return total_memo - 1; + int total = getInt(); + // overflow checks require that none of the addends are <0, + // and that the partial sums never overflow (wrap negative) + if (total < 0) + { + abort("overflow detected"); + return 0; + } + for (int k = length - 1; k > 0; k--) + { + int prev_total = total; + total += vs[0].getInt(); + if (total < prev_total) + { + abort("overflow detected"); + return 0; + } + } + rewind(); + total_memo = total + 1; + return total; +} + +int band::getIntCount(int tag) +{ + CHECK_0; + if (length == 0) + return 0; + if (tag >= HIST0_MIN && tag <= HIST0_MAX) + { + if (hist0 == nullptr) + { + // Lazily calculate an approximate histogram. + hist0 = U_NEW(int, (HIST0_MAX - HIST0_MIN) + 1); + CHECK_0; + for (int k = length; k > 0; k--) + { + int x = vs[0].getInt(); + if (x >= HIST0_MIN && x <= HIST0_MAX) + hist0[x - HIST0_MIN] += 1; + } + rewind(); + } + return hist0[tag - HIST0_MIN]; + } + int total = 0; + for (int k = length; k > 0; k--) + { + total += (vs[0].getInt() == tag) ? 1 : 0; + } + rewind(); + return total; +} + +#define INDEX_INIT(tag, nullOK, subindex) ((tag) + (subindex) * SUBINDEX_BIT + (nullOK) * 256) + +#define INDEX(tag) INDEX_INIT(tag, 0, 0) +#define NULL_OR_INDEX(tag) INDEX_INIT(tag, 1, 0) +#define SUB_INDEX(tag) INDEX_INIT(tag, 0, 1) +#define NO_INDEX 0 + +struct band_init +{ + int defc; + int index; +}; + +#define BAND_INIT(name, cspec, ix) \ + { \ + cspec, ix \ + } + +const band_init all_band_inits[] = + { + // BAND_INIT(archive_magic, BYTE1_spec, 0), + // BAND_INIT(archive_header, UNSIGNED5_spec, 0), + // BAND_INIT(band_headers, BYTE1_spec, 0), + BAND_INIT(cp_Utf8_prefix, DELTA5_spec, 0), BAND_INIT(cp_Utf8_suffix, UNSIGNED5_spec, 0), + BAND_INIT(cp_Utf8_chars, CHAR3_spec, 0), BAND_INIT(cp_Utf8_big_suffix, DELTA5_spec, 0), + BAND_INIT(cp_Utf8_big_chars, DELTA5_spec, 0), BAND_INIT(cp_Int, UDELTA5_spec, 0), + BAND_INIT(cp_Float, UDELTA5_spec, 0), BAND_INIT(cp_Long_hi, UDELTA5_spec, 0), + BAND_INIT(cp_Long_lo, DELTA5_spec, 0), BAND_INIT(cp_Double_hi, UDELTA5_spec, 0), + BAND_INIT(cp_Double_lo, DELTA5_spec, 0), + BAND_INIT(cp_String, UDELTA5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(cp_Class, UDELTA5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(cp_Signature_form, DELTA5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(cp_Signature_classes, UDELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(cp_Descr_name, DELTA5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(cp_Descr_type, UDELTA5_spec, INDEX(CONSTANT_Signature)), + BAND_INIT(cp_Field_class, DELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(cp_Field_desc, UDELTA5_spec, INDEX(CONSTANT_NameandType)), + BAND_INIT(cp_Method_class, DELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(cp_Method_desc, UDELTA5_spec, INDEX(CONSTANT_NameandType)), + BAND_INIT(cp_Imethod_class, DELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(cp_Imethod_desc, UDELTA5_spec, INDEX(CONSTANT_NameandType)), + BAND_INIT(attr_definition_headers, BYTE1_spec, 0), + BAND_INIT(attr_definition_name, UNSIGNED5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(attr_definition_layout, UNSIGNED5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(ic_this_class, UDELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(ic_flags, UNSIGNED5_spec, 0), + BAND_INIT(ic_outer_class, DELTA5_spec, NULL_OR_INDEX(CONSTANT_Class)), + BAND_INIT(ic_name, DELTA5_spec, NULL_OR_INDEX(CONSTANT_Utf8)), + BAND_INIT(class_this, DELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(class_super, DELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(class_interface_count, DELTA5_spec, 0), + BAND_INIT(class_interface, DELTA5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(class_field_count, DELTA5_spec, 0), + BAND_INIT(class_method_count, DELTA5_spec, 0), + BAND_INIT(field_descr, DELTA5_spec, INDEX(CONSTANT_NameandType)), + BAND_INIT(field_flags_hi, UNSIGNED5_spec, 0), + BAND_INIT(field_flags_lo, UNSIGNED5_spec, 0), + BAND_INIT(field_attr_count, UNSIGNED5_spec, 0), + BAND_INIT(field_attr_indexes, UNSIGNED5_spec, 0), + BAND_INIT(field_attr_calls, UNSIGNED5_spec, 0), + BAND_INIT(field_ConstantValue_KQ, UNSIGNED5_spec, INDEX(CONSTANT_Literal)), + BAND_INIT(field_Signature_RS, UNSIGNED5_spec, INDEX(CONSTANT_Signature)), + BAND_INIT(field_metadata_bands, -1, -1), BAND_INIT(field_attr_bands, -1, -1), + BAND_INIT(method_descr, MDELTA5_spec, INDEX(CONSTANT_NameandType)), + BAND_INIT(method_flags_hi, UNSIGNED5_spec, 0), + BAND_INIT(method_flags_lo, UNSIGNED5_spec, 0), + BAND_INIT(method_attr_count, UNSIGNED5_spec, 0), + BAND_INIT(method_attr_indexes, UNSIGNED5_spec, 0), + BAND_INIT(method_attr_calls, UNSIGNED5_spec, 0), + BAND_INIT(method_Exceptions_N, UNSIGNED5_spec, 0), + BAND_INIT(method_Exceptions_RC, UNSIGNED5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(method_Signature_RS, UNSIGNED5_spec, INDEX(CONSTANT_Signature)), + BAND_INIT(method_metadata_bands, -1, -1), BAND_INIT(method_attr_bands, -1, -1), + BAND_INIT(class_flags_hi, UNSIGNED5_spec, 0), + BAND_INIT(class_flags_lo, UNSIGNED5_spec, 0), + BAND_INIT(class_attr_count, UNSIGNED5_spec, 0), + BAND_INIT(class_attr_indexes, UNSIGNED5_spec, 0), + BAND_INIT(class_attr_calls, UNSIGNED5_spec, 0), + BAND_INIT(class_SourceFile_RUN, UNSIGNED5_spec, NULL_OR_INDEX(CONSTANT_Utf8)), + BAND_INIT(class_EnclosingMethod_RC, UNSIGNED5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(class_EnclosingMethod_RDN, UNSIGNED5_spec, + NULL_OR_INDEX(CONSTANT_NameandType)), + BAND_INIT(class_Signature_RS, UNSIGNED5_spec, INDEX(CONSTANT_Signature)), + BAND_INIT(class_metadata_bands, -1, -1), + BAND_INIT(class_InnerClasses_N, UNSIGNED5_spec, 0), + BAND_INIT(class_InnerClasses_RC, UNSIGNED5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(class_InnerClasses_F, UNSIGNED5_spec, 0), + BAND_INIT(class_InnerClasses_outer_RCN, UNSIGNED5_spec, NULL_OR_INDEX(CONSTANT_Class)), + BAND_INIT(class_InnerClasses_name_RUN, UNSIGNED5_spec, NULL_OR_INDEX(CONSTANT_Utf8)), + BAND_INIT(class_ClassFile_version_minor_H, UNSIGNED5_spec, 0), + BAND_INIT(class_ClassFile_version_major_H, UNSIGNED5_spec, 0), + BAND_INIT(class_attr_bands, -1, -1), BAND_INIT(code_headers, BYTE1_spec, 0), + BAND_INIT(code_max_stack, UNSIGNED5_spec, 0), + BAND_INIT(code_max_na_locals, UNSIGNED5_spec, 0), + BAND_INIT(code_handler_count, UNSIGNED5_spec, 0), + BAND_INIT(code_handler_start_P, BCI5_spec, 0), + BAND_INIT(code_handler_end_PO, BRANCH5_spec, 0), + BAND_INIT(code_handler_catch_PO, BRANCH5_spec, 0), + BAND_INIT(code_handler_class_RCN, UNSIGNED5_spec, NULL_OR_INDEX(CONSTANT_Class)), + BAND_INIT(code_flags_hi, UNSIGNED5_spec, 0), + BAND_INIT(code_flags_lo, UNSIGNED5_spec, 0), + BAND_INIT(code_attr_count, UNSIGNED5_spec, 0), + BAND_INIT(code_attr_indexes, UNSIGNED5_spec, 0), + BAND_INIT(code_attr_calls, UNSIGNED5_spec, 0), + BAND_INIT(code_StackMapTable_N, UNSIGNED5_spec, 0), + BAND_INIT(code_StackMapTable_frame_T, BYTE1_spec, 0), + BAND_INIT(code_StackMapTable_local_N, UNSIGNED5_spec, 0), + BAND_INIT(code_StackMapTable_stack_N, UNSIGNED5_spec, 0), + BAND_INIT(code_StackMapTable_offset, UNSIGNED5_spec, 0), + BAND_INIT(code_StackMapTable_T, BYTE1_spec, 0), + BAND_INIT(code_StackMapTable_RC, UNSIGNED5_spec, INDEX(CONSTANT_Class)), + BAND_INIT(code_StackMapTable_P, BCI5_spec, 0), + BAND_INIT(code_LineNumberTable_N, UNSIGNED5_spec, 0), + BAND_INIT(code_LineNumberTable_bci_P, BCI5_spec, 0), + BAND_INIT(code_LineNumberTable_line, UNSIGNED5_spec, 0), + BAND_INIT(code_LocalVariableTable_N, UNSIGNED5_spec, 0), + BAND_INIT(code_LocalVariableTable_bci_P, BCI5_spec, 0), + BAND_INIT(code_LocalVariableTable_span_O, BRANCH5_spec, 0), + BAND_INIT(code_LocalVariableTable_name_RU, UNSIGNED5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(code_LocalVariableTable_type_RS, UNSIGNED5_spec, INDEX(CONSTANT_Signature)), + BAND_INIT(code_LocalVariableTable_slot, UNSIGNED5_spec, 0), + BAND_INIT(code_LocalVariableTypeTable_N, UNSIGNED5_spec, 0), + BAND_INIT(code_LocalVariableTypeTable_bci_P, BCI5_spec, 0), + BAND_INIT(code_LocalVariableTypeTable_span_O, BRANCH5_spec, 0), + BAND_INIT(code_LocalVariableTypeTable_name_RU, UNSIGNED5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(code_LocalVariableTypeTable_type_RS, UNSIGNED5_spec, + INDEX(CONSTANT_Signature)), + BAND_INIT(code_LocalVariableTypeTable_slot, UNSIGNED5_spec, 0), + BAND_INIT(code_attr_bands, -1, -1), BAND_INIT(bc_codes, BYTE1_spec, 0), + BAND_INIT(bc_case_count, UNSIGNED5_spec, 0), BAND_INIT(bc_case_value, DELTA5_spec, 0), + BAND_INIT(bc_byte, BYTE1_spec, 0), BAND_INIT(bc_short, DELTA5_spec, 0), + BAND_INIT(bc_local, UNSIGNED5_spec, 0), BAND_INIT(bc_label, BRANCH5_spec, 0), + BAND_INIT(bc_intref, DELTA5_spec, INDEX(CONSTANT_Integer)), + BAND_INIT(bc_floatref, DELTA5_spec, INDEX(CONSTANT_Float)), + BAND_INIT(bc_longref, DELTA5_spec, INDEX(CONSTANT_Long)), + BAND_INIT(bc_doubleref, DELTA5_spec, INDEX(CONSTANT_Double)), + BAND_INIT(bc_stringref, DELTA5_spec, INDEX(CONSTANT_String)), + BAND_INIT(bc_classref, UNSIGNED5_spec, NULL_OR_INDEX(CONSTANT_Class)), + BAND_INIT(bc_fieldref, DELTA5_spec, INDEX(CONSTANT_Fieldref)), + BAND_INIT(bc_methodref, UNSIGNED5_spec, INDEX(CONSTANT_Methodref)), + BAND_INIT(bc_imethodref, DELTA5_spec, INDEX(CONSTANT_InterfaceMethodref)), + BAND_INIT(bc_thisfield, UNSIGNED5_spec, SUB_INDEX(CONSTANT_Fieldref)), + BAND_INIT(bc_superfield, UNSIGNED5_spec, SUB_INDEX(CONSTANT_Fieldref)), + BAND_INIT(bc_thismethod, UNSIGNED5_spec, SUB_INDEX(CONSTANT_Methodref)), + BAND_INIT(bc_supermethod, UNSIGNED5_spec, SUB_INDEX(CONSTANT_Methodref)), + BAND_INIT(bc_initref, UNSIGNED5_spec, SUB_INDEX(CONSTANT_Methodref)), + BAND_INIT(bc_escref, UNSIGNED5_spec, INDEX(CONSTANT_All)), + BAND_INIT(bc_escrefsize, UNSIGNED5_spec, 0), BAND_INIT(bc_escsize, UNSIGNED5_spec, 0), + BAND_INIT(bc_escbyte, BYTE1_spec, 0), + BAND_INIT(file_name, UNSIGNED5_spec, INDEX(CONSTANT_Utf8)), + BAND_INIT(file_size_hi, UNSIGNED5_spec, 0), BAND_INIT(file_size_lo, UNSIGNED5_spec, 0), + BAND_INIT(file_modtime, DELTA5_spec, 0), BAND_INIT(file_options, UNSIGNED5_spec, 0), + // BAND_INIT(file_bits, BYTE1_spec, 0), + {0, 0}}; +#define NUM_BAND_INITS (sizeof(all_band_inits) / sizeof(all_band_inits[0])) + +band *band::makeBands(unpacker *u) +{ + band *tmp_all_bands = U_NEW(band, BAND_LIMIT); + for (int i = 0; i < BAND_LIMIT; i++) + { + assert((byte *)&all_band_inits[i + 1] < + (byte *)all_band_inits + sizeof(all_band_inits)); + const band_init &bi = all_band_inits[i]; + band &b = tmp_all_bands[i]; + coding *defc = coding::findBySpec(bi.defc); + assert((defc == nullptr) == (bi.defc == -1)); // no garbage, please + assert(defc == nullptr || !defc->isMalloc); + assert(bi.bn == i); // band array consistent w/ band enum + b.init(u, i, defc); + if (bi.index > 0) + { + b.nullOK = ((bi.index >> 8) & 1); + b.ixTag = (bi.index & 0xFF); + } + } + return tmp_all_bands; +} + +void band::initIndexes(unpacker *u) +{ + band *tmp_all_bands = u->all_bands; + for (int i = 0; i < BAND_LIMIT; i++) + { + band *scan = &tmp_all_bands[i]; + uint tag = scan->ixTag; // Cf. #define INDEX(tag) above + if (tag != 0 && tag != CONSTANT_Literal && (tag & SUBINDEX_BIT) == 0) + { + scan->setIndex(u->cp.getIndex(tag)); + } + } +} diff --git a/depends/pack200/src/bands.h b/depends/pack200/src/bands.h new file mode 100644 index 00000000..3f944481 --- /dev/null +++ b/depends/pack200/src/bands.h @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2002, 2005, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// -*- C++ -*- +struct entry; +struct cpindex; +struct unpacker; + +struct band +{ + int bn; // band_number of this band + coding *defc; // default coding method + cpindex *ix; // CP entry mapping, if CPRefBand + byte ixTag; // 0 or 1; nullptr is coded as (nullOK?0:-1) + byte nullOK; // 0 or 1; nullptr is coded as (nullOK?0:-1) + int length; // expected # values + unpacker *u; // back pointer + + value_stream vs[2]; // source of values + coding_method cm; // method used for initial state of vs[0] + byte *rplimit; // end of band (encoded, transmitted) + + int total_memo; // cached value of getIntTotal, or -1 + int *hist0; // approximate. histogram + enum + { + HIST0_MIN = 0, + HIST0_MAX = 255 + }; // catches the usual cases + + // properties for attribute layout elements: + byte le_kind; // EK_XXX + byte le_bci; // 0,EK_BCI,EK_BCD,EK_BCO + byte le_back; // ==EF_BACK + byte le_len; // 0,1,2,4 (size in classfile), or call addr + band **le_body; // body of repl, union, call (nullptr-terminated) +// Note: EK_CASE elements use hist0 to record union tags. +#define le_casetags hist0 + + band &nextBand() + { + return this[1]; + } + band &prevBand() + { + return this[-1]; + } + + void init(unpacker *u_, int bn_, coding *defc_) + { + u = u_; + cm.u = u_; + bn = bn_; + defc = defc_; + } + void init(unpacker *u_, int bn_, int defcSpec) + { + init(u_, bn_, coding::findBySpec(defcSpec)); + } + void initRef(int ixTag_ = 0, bool nullOK_ = false) + { + ixTag = ixTag_; + nullOK = nullOK_; + setIndexByTag(ixTag); + } + + void expectMoreLength(int l) + { + assert(length >= 0); // able to accept a length + assert((int)l >= 0); // no overflow + assert(rplimit == nullptr); // readData not yet called + length += l; + assert(length >= l); // no overflow + } + + void setIndex(cpindex *ix_); + void setIndexByTag(byte tag); + + // Parse the band and its meta-coding header. + void readData(int expectedLength = 0); + + // Reset the band for another pass (Cf. Java Band.resetForSecondPass.) + void rewind() + { + cm.reset(&vs[0]); + } + + byte *&curRP() + { + return vs[0].rp; + } + byte *minRP() + { + return cm.vs0.rp; + } + byte *maxRP() + { + return rplimit; + } + size_t size() + { + return maxRP() - minRP(); + } + + int getByte() + { + assert(ix == nullptr); + return vs[0].getByte(); + } + int getInt() + { + assert(ix == nullptr); + return vs[0].getInt(); + } + entry *getRefN() + { + assert(ix != nullptr); + return getRefCommon(ix, true); + } + entry *getRef() + { + assert(ix != nullptr); + return getRefCommon(ix, false); + } + entry *getRefUsing(cpindex *ix2) + { + assert(ix == nullptr); + return getRefCommon(ix2, true); + } + entry *getRefCommon(cpindex *ix, bool nullOK); + jlong getLong(band &lo_band, bool have_hi); + + static jlong makeLong(uint hi, uint lo) + { + return ((julong)hi << 32) + (((julong)lo << 32) >> 32); + } + + int getIntTotal(); + int getIntCount(int tag); + + static band *makeBands(unpacker *u); + static void initIndexes(unpacker *u); + + void abort(const char *msg = nullptr); //{ u->abort(msg); } + bool aborting(); //{ return u->aborting(); } +}; + +extern band all_bands[]; + +#define BAND_LOCAL /* \ + band* band_temp = all_bands; \ + band* all_bands = band_temp */ + +// Band schema: +enum band_number +{ + // e_archive_magic, + // e_archive_header, + // e_band_headers, + + // constant pool contents + e_cp_Utf8_prefix, + e_cp_Utf8_suffix, + e_cp_Utf8_chars, + e_cp_Utf8_big_suffix, + e_cp_Utf8_big_chars, + e_cp_Int, + e_cp_Float, + e_cp_Long_hi, + e_cp_Long_lo, + e_cp_Double_hi, + e_cp_Double_lo, + e_cp_String, + e_cp_Class, + e_cp_Signature_form, + e_cp_Signature_classes, + e_cp_Descr_name, + e_cp_Descr_type, + e_cp_Field_class, + e_cp_Field_desc, + e_cp_Method_class, + e_cp_Method_desc, + e_cp_Imethod_class, + e_cp_Imethod_desc, + + // bands which define transmission of attributes + e_attr_definition_headers, + e_attr_definition_name, + e_attr_definition_layout, + + // band for hardwired InnerClasses attribute (shared across the package) + e_ic_this_class, + e_ic_flags, + // These bands contain data only where flags sets ACC_IC_LONG_FORM: + e_ic_outer_class, + e_ic_name, + + // bands for carrying class schema information: + e_class_this, + e_class_super, + e_class_interface_count, + e_class_interface, + + // bands for class members + e_class_field_count, + e_class_method_count, + e_field_descr, + e_field_flags_hi, + e_field_flags_lo, + e_field_attr_count, + e_field_attr_indexes, + e_field_attr_calls, + e_field_ConstantValue_KQ, + e_field_Signature_RS, + e_field_metadata_bands, + e_field_attr_bands, + e_method_descr, + e_method_flags_hi, + e_method_flags_lo, + e_method_attr_count, + e_method_attr_indexes, + e_method_attr_calls, + e_method_Exceptions_N, + e_method_Exceptions_RC, + e_method_Signature_RS, + e_method_metadata_bands, + e_method_attr_bands, + e_class_flags_hi, + e_class_flags_lo, + e_class_attr_count, + e_class_attr_indexes, + e_class_attr_calls, + e_class_SourceFile_RUN, + e_class_EnclosingMethod_RC, + e_class_EnclosingMethod_RDN, + e_class_Signature_RS, + e_class_metadata_bands, + e_class_InnerClasses_N, + e_class_InnerClasses_RC, + e_class_InnerClasses_F, + e_class_InnerClasses_outer_RCN, + e_class_InnerClasses_name_RUN, + e_class_ClassFile_version_minor_H, + e_class_ClassFile_version_major_H, + e_class_attr_bands, + e_code_headers, + e_code_max_stack, + e_code_max_na_locals, + e_code_handler_count, + e_code_handler_start_P, + e_code_handler_end_PO, + e_code_handler_catch_PO, + e_code_handler_class_RCN, + + // code attributes + e_code_flags_hi, + e_code_flags_lo, + e_code_attr_count, + e_code_attr_indexes, + e_code_attr_calls, + e_code_StackMapTable_N, + e_code_StackMapTable_frame_T, + e_code_StackMapTable_local_N, + e_code_StackMapTable_stack_N, + e_code_StackMapTable_offset, + e_code_StackMapTable_T, + e_code_StackMapTable_RC, + e_code_StackMapTable_P, + e_code_LineNumberTable_N, + e_code_LineNumberTable_bci_P, + e_code_LineNumberTable_line, + e_code_LocalVariableTable_N, + e_code_LocalVariableTable_bci_P, + e_code_LocalVariableTable_span_O, + e_code_LocalVariableTable_name_RU, + e_code_LocalVariableTable_type_RS, + e_code_LocalVariableTable_slot, + e_code_LocalVariableTypeTable_N, + e_code_LocalVariableTypeTable_bci_P, + e_code_LocalVariableTypeTable_span_O, + e_code_LocalVariableTypeTable_name_RU, + e_code_LocalVariableTypeTable_type_RS, + e_code_LocalVariableTypeTable_slot, + e_code_attr_bands, + + // bands for bytecodes + e_bc_codes, + // remaining bands provide typed opcode fields required by the bc_codes + e_bc_case_count, + e_bc_case_value, + e_bc_byte, + e_bc_short, + e_bc_local, + e_bc_label, + + // ldc* operands: + e_bc_intref, + e_bc_floatref, + e_bc_longref, + e_bc_doubleref, + e_bc_stringref, + e_bc_classref, + e_bc_fieldref, + e_bc_methodref, + e_bc_imethodref, + + // _self_linker_op family + e_bc_thisfield, + e_bc_superfield, + e_bc_thismethod, + e_bc_supermethod, + + // bc_invokeinit family: + e_bc_initref, + + // bytecode escape sequences + e_bc_escref, + e_bc_escrefsize, + e_bc_escsize, + e_bc_escbyte, + + // file attributes and contents + e_file_name, + e_file_size_hi, + e_file_size_lo, + e_file_modtime, + e_file_options, + // e_file_bits, // handled specially as an appendix + BAND_LIMIT +}; + +// Symbolic names for bands, as if in a giant global struct: +//#define archive_magic all_bands[e_archive_magic] +//#define archive_header all_bands[e_archive_header] +//#define band_headers all_bands[e_band_headers] +#define cp_Utf8_prefix all_bands[e_cp_Utf8_prefix] +#define cp_Utf8_suffix all_bands[e_cp_Utf8_suffix] +#define cp_Utf8_chars all_bands[e_cp_Utf8_chars] +#define cp_Utf8_big_suffix all_bands[e_cp_Utf8_big_suffix] +#define cp_Utf8_big_chars all_bands[e_cp_Utf8_big_chars] +#define cp_Int all_bands[e_cp_Int] +#define cp_Float all_bands[e_cp_Float] +#define cp_Long_hi all_bands[e_cp_Long_hi] +#define cp_Long_lo all_bands[e_cp_Long_lo] +#define cp_Double_hi all_bands[e_cp_Double_hi] +#define cp_Double_lo all_bands[e_cp_Double_lo] +#define cp_String all_bands[e_cp_String] +#define cp_Class all_bands[e_cp_Class] +#define cp_Signature_form all_bands[e_cp_Signature_form] +#define cp_Signature_classes all_bands[e_cp_Signature_classes] +#define cp_Descr_name all_bands[e_cp_Descr_name] +#define cp_Descr_type all_bands[e_cp_Descr_type] +#define cp_Field_class all_bands[e_cp_Field_class] +#define cp_Field_desc all_bands[e_cp_Field_desc] +#define cp_Method_class all_bands[e_cp_Method_class] +#define cp_Method_desc all_bands[e_cp_Method_desc] +#define cp_Imethod_class all_bands[e_cp_Imethod_class] +#define cp_Imethod_desc all_bands[e_cp_Imethod_desc] +#define attr_definition_headers all_bands[e_attr_definition_headers] +#define attr_definition_name all_bands[e_attr_definition_name] +#define attr_definition_layout all_bands[e_attr_definition_layout] +#define ic_this_class all_bands[e_ic_this_class] +#define ic_flags all_bands[e_ic_flags] +#define ic_outer_class all_bands[e_ic_outer_class] +#define ic_name all_bands[e_ic_name] +#define class_this all_bands[e_class_this] +#define class_super all_bands[e_class_super] +#define class_interface_count all_bands[e_class_interface_count] +#define class_interface all_bands[e_class_interface] +#define class_field_count all_bands[e_class_field_count] +#define class_method_count all_bands[e_class_method_count] +#define field_descr all_bands[e_field_descr] +#define field_flags_hi all_bands[e_field_flags_hi] +#define field_flags_lo all_bands[e_field_flags_lo] +#define field_attr_count all_bands[e_field_attr_count] +#define field_attr_indexes all_bands[e_field_attr_indexes] +#define field_ConstantValue_KQ all_bands[e_field_ConstantValue_KQ] +#define field_Signature_RS all_bands[e_field_Signature_RS] +#define field_attr_bands all_bands[e_field_attr_bands] +#define method_descr all_bands[e_method_descr] +#define method_flags_hi all_bands[e_method_flags_hi] +#define method_flags_lo all_bands[e_method_flags_lo] +#define method_attr_count all_bands[e_method_attr_count] +#define method_attr_indexes all_bands[e_method_attr_indexes] +#define method_Exceptions_N all_bands[e_method_Exceptions_N] +#define method_Exceptions_RC all_bands[e_method_Exceptions_RC] +#define method_Signature_RS all_bands[e_method_Signature_RS] +#define method_attr_bands all_bands[e_method_attr_bands] +#define class_flags_hi all_bands[e_class_flags_hi] +#define class_flags_lo all_bands[e_class_flags_lo] +#define class_attr_count all_bands[e_class_attr_count] +#define class_attr_indexes all_bands[e_class_attr_indexes] +#define class_SourceFile_RUN all_bands[e_class_SourceFile_RUN] +#define class_EnclosingMethod_RC all_bands[e_class_EnclosingMethod_RC] +#define class_EnclosingMethod_RDN all_bands[e_class_EnclosingMethod_RDN] +#define class_Signature_RS all_bands[e_class_Signature_RS] +#define class_InnerClasses_N all_bands[e_class_InnerClasses_N] +#define class_InnerClasses_RC all_bands[e_class_InnerClasses_RC] +#define class_InnerClasses_F all_bands[e_class_InnerClasses_F] +#define class_InnerClasses_outer_RCN all_bands[e_class_InnerClasses_outer_RCN] +#define class_InnerClasses_name_RUN all_bands[e_class_InnerClasses_name_RUN] +#define class_ClassFile_version_minor_H all_bands[e_class_ClassFile_version_minor_H] +#define class_ClassFile_version_major_H all_bands[e_class_ClassFile_version_major_H] +#define class_attr_bands all_bands[e_class_attr_bands] +#define code_headers all_bands[e_code_headers] +#define code_max_stack all_bands[e_code_max_stack] +#define code_max_na_locals all_bands[e_code_max_na_locals] +#define code_handler_count all_bands[e_code_handler_count] +#define code_handler_start_P all_bands[e_code_handler_start_P] +#define code_handler_end_PO all_bands[e_code_handler_end_PO] +#define code_handler_catch_PO all_bands[e_code_handler_catch_PO] +#define code_handler_class_RCN all_bands[e_code_handler_class_RCN] +#define code_flags_hi all_bands[e_code_flags_hi] +#define code_flags_lo all_bands[e_code_flags_lo] +#define code_attr_count all_bands[e_code_attr_count] +#define code_attr_indexes all_bands[e_code_attr_indexes] +#define code_StackMapTable_N all_bands[e_code_StackMapTable_N] +#define code_StackMapTable_frame_T all_bands[e_code_StackMapTable_frame_T] +#define code_StackMapTable_local_N all_bands[e_code_StackMapTable_local_N] +#define code_StackMapTable_stack_N all_bands[e_code_StackMapTable_stack_N] +#define code_StackMapTable_offset all_bands[e_code_StackMapTable_offset] +#define code_StackMapTable_T all_bands[e_code_StackMapTable_T] +#define code_StackMapTable_RC all_bands[e_code_StackMapTable_RC] +#define code_StackMapTable_P all_bands[e_code_StackMapTable_P] +#define code_LineNumberTable_N all_bands[e_code_LineNumberTable_N] +#define code_LineNumberTable_bci_P all_bands[e_code_LineNumberTable_bci_P] +#define code_LineNumberTable_line all_bands[e_code_LineNumberTable_line] +#define code_LocalVariableTable_N all_bands[e_code_LocalVariableTable_N] +#define code_LocalVariableTable_bci_P all_bands[e_code_LocalVariableTable_bci_P] +#define code_LocalVariableTable_span_O all_bands[e_code_LocalVariableTable_span_O] +#define code_LocalVariableTable_name_RU all_bands[e_code_LocalVariableTable_name_RU] +#define code_LocalVariableTable_type_RS all_bands[e_code_LocalVariableTable_type_RS] +#define code_LocalVariableTable_slot all_bands[e_code_LocalVariableTable_slot] +#define code_LocalVariableTypeTable_N all_bands[e_code_LocalVariableTypeTable_N] +#define code_LocalVariableTypeTable_bci_P all_bands[e_code_LocalVariableTypeTable_bci_P] +#define code_LocalVariableTypeTable_span_O all_bands[e_code_LocalVariableTypeTable_span_O] +#define code_LocalVariableTypeTable_name_RU all_bands[e_code_LocalVariableTypeTable_name_RU] +#define code_LocalVariableTypeTable_type_RS all_bands[e_code_LocalVariableTypeTable_type_RS] +#define code_LocalVariableTypeTable_slot all_bands[e_code_LocalVariableTypeTable_slot] +#define code_attr_bands all_bands[e_code_attr_bands] +#define bc_codes all_bands[e_bc_codes] +#define bc_case_count all_bands[e_bc_case_count] +#define bc_case_value all_bands[e_bc_case_value] +#define bc_byte all_bands[e_bc_byte] +#define bc_short all_bands[e_bc_short] +#define bc_local all_bands[e_bc_local] +#define bc_label all_bands[e_bc_label] +#define bc_intref all_bands[e_bc_intref] +#define bc_floatref all_bands[e_bc_floatref] +#define bc_longref all_bands[e_bc_longref] +#define bc_doubleref all_bands[e_bc_doubleref] +#define bc_stringref all_bands[e_bc_stringref] +#define bc_classref all_bands[e_bc_classref] +#define bc_fieldref all_bands[e_bc_fieldref] +#define bc_methodref all_bands[e_bc_methodref] +#define bc_imethodref all_bands[e_bc_imethodref] +#define bc_thisfield all_bands[e_bc_thisfield] +#define bc_superfield all_bands[e_bc_superfield] +#define bc_thismethod all_bands[e_bc_thismethod] +#define bc_supermethod all_bands[e_bc_supermethod] +#define bc_initref all_bands[e_bc_initref] +#define bc_escref all_bands[e_bc_escref] +#define bc_escrefsize all_bands[e_bc_escrefsize] +#define bc_escsize all_bands[e_bc_escsize] +#define bc_escbyte all_bands[e_bc_escbyte] +#define file_name all_bands[e_file_name] +#define file_size_hi all_bands[e_file_size_hi] +#define file_size_lo all_bands[e_file_size_lo] +#define file_modtime all_bands[e_file_modtime] +#define file_options all_bands[e_file_options] diff --git a/depends/pack200/src/bytes.cpp b/depends/pack200/src/bytes.cpp new file mode 100644 index 00000000..b82a987a --- /dev/null +++ b/depends/pack200/src/bytes.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include +#include +#include +#include +#include "defines.h" +#include "bytes.h" +#include "utils.h" + +static byte dummy[1 << 10]; + +bool bytes::inBounds(const void *p) +{ + return p >= ptr && p < limit(); +} + +void bytes::malloc(size_t len_) +{ + len = len_; + ptr = NEW(byte, add_size(len_, 1)); // add trailing zero byte always + if (ptr == nullptr) + { + // set ptr to some victim memory, to ease escape + set(dummy, sizeof(dummy) - 1); + unpack_abort(ERROR_ENOMEM); + } +} + +void bytes::realloc(size_t len_) +{ + if (len == len_) + return; // nothing to do + if (ptr == dummy) + return; // escaping from an error + if (ptr == nullptr) + { + malloc(len_); + return; + } + byte *oldptr = ptr; + ptr = (len_ >= PSIZE_MAX) ? nullptr : (byte *)::realloc(ptr, add_size(len_, 1)); + if (ptr != nullptr) + { + if (len < len_) + memset(ptr + len, 0, len_ - len); + ptr[len_] = 0; + len = len_; + } + else + { + ptr = oldptr; // ease our escape + unpack_abort(ERROR_ENOMEM); + } +} + +void bytes::free() +{ + if (ptr == dummy) + return; // escaping from an error + if (ptr != nullptr) + { + ::free(ptr); + } + len = 0; + ptr = 0; +} + +int bytes::indexOf(byte c) +{ + byte *p = (byte *)memchr(ptr, c, len); + return (p == 0) ? -1 : (int)(p - ptr); +} + +byte *bytes::writeTo(byte *bp) +{ + memcpy(bp, ptr, len); + return bp + len; +} + +int bytes::compareTo(bytes &other) +{ + size_t l1 = len; + size_t l2 = other.len; + int cmp = memcmp(ptr, other.ptr, (l1 < l2) ? l1 : l2); + if (cmp != 0) + return cmp; + return (l1 < l2) ? -1 : (l1 > l2) ? 1 : 0; +} + +void bytes::saveFrom(const void *ptr_, size_t len_) +{ + malloc(len_); + // Save as much as possible. (Helps unpacker::abort.) + if (len_ > len) + { + assert(ptr == dummy); // error recovery + len_ = len; + } + copyFrom(ptr_, len_); +} + +//#TODO: Need to fix for exception handling +void bytes::copyFrom(const void *ptr_, size_t len_, size_t offset) +{ + assert(len_ == 0 || inBounds(ptr + offset)); + assert(len_ == 0 || inBounds(ptr + offset + len_ - 1)); + memcpy(ptr + offset, ptr_, len_); +} + +// Make sure there are 'o' bytes beyond the fill pointer, +// advance the fill pointer, and return the old fill pointer. +byte *fillbytes::grow(size_t s) +{ + size_t nlen = add_size(b.len, s); + if (nlen <= allocated) + { + b.len = nlen; + return limit() - s; + } + size_t maxlen = nlen; + if (maxlen < 128) + maxlen = 128; + if (maxlen < allocated * 2) + maxlen = allocated * 2; + if (allocated == 0) + { + // Initial buffer was not malloced. Do not reallocate it. + bytes old = b; + b.malloc(maxlen); + if (b.len == maxlen) + old.writeTo(b.ptr); + } + else + { + b.realloc(maxlen); + } + allocated = b.len; + if (allocated != maxlen) + { + assert(unpack_aborting()); + b.len = nlen - s; // back up + return dummy; // scribble during error recov. + } + // after realloc, recompute pointers + b.len = nlen; + assert(b.len <= allocated); + return limit() - s; +} + +void fillbytes::ensureSize(size_t s) +{ + if (allocated >= s) + return; + size_t len0 = b.len; + grow(s - size()); + b.len = len0; // put it back +} + +int ptrlist::indexOf(const void *x) +{ + int len = length(); + for (int i = 0; i < len; i++) + { + if (get(i) == x) + return i; + } + return -1; +} + +void ptrlist::freeAll() +{ + int len = length(); + for (int i = 0; i < len; i++) + { + void *p = (void *)get(i); + if (p != nullptr) + { + ::free(p); + } + } + free(); +} + +int intlist::indexOf(int x) +{ + int len = length(); + for (int i = 0; i < len; i++) + { + if (get(i) == x) + return i; + } + return -1; +} diff --git a/depends/pack200/src/bytes.h b/depends/pack200/src/bytes.h new file mode 100644 index 00000000..3926f9f2 --- /dev/null +++ b/depends/pack200/src/bytes.h @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +struct bytes +{ + int8_t *ptr; + size_t len; + int8_t *limit() + { + return ptr + len; + } + + void set(int8_t *ptr_, size_t len_) + { + ptr = ptr_; + len = len_; + } + void set(const char *str) + { + ptr = (int8_t *)str; + len = strlen(str); + } + bool inBounds(const void *p); // p in [ptr, limit) + void malloc(size_t len_); + void realloc(size_t len_); + void free(); + void copyFrom(const void *ptr_, size_t len_, size_t offset = 0); + void saveFrom(const void *ptr_, size_t len_); + void saveFrom(const char *str) + { + saveFrom(str, strlen(str)); + } + void copyFrom(bytes &other, size_t offset = 0) + { + copyFrom(other.ptr, other.len, offset); + } + void saveFrom(bytes &other) + { + saveFrom(other.ptr, other.len); + } + void clear(int fill_byte = 0) + { + memset(ptr, fill_byte, len); + } + int8_t *writeTo(int8_t *bp); + bool equals(bytes &other) + { + return 0 == compareTo(other); + } + int compareTo(bytes &other); + bool contains(int8_t c) + { + return indexOf(c) >= 0; + } + int indexOf(int8_t c); + // substrings: + static bytes of(int8_t *ptr, size_t len) + { + bytes res; + res.set(ptr, len); + return res; + } + bytes slice(size_t beg, size_t end) + { + bytes res; + res.ptr = ptr + beg; + res.len = end - beg; + assert(res.len == 0 || inBounds(res.ptr) && inBounds(res.limit() - 1)); + return res; + } + // building C strings inside byte buffers: + bytes &strcat(const char *str) + { + ::strcat((char *)ptr, str); + return *this; + } + bytes &strcat(bytes &other) + { + ::strncat((char *)ptr, (char *)other.ptr, other.len); + return *this; + } + char *strval() + { + assert(strlen((char *)ptr) == len); + return (char *)ptr; + } +}; +#define BYTES_OF(var) (bytes::of((int8_t *)&(var), sizeof(var))) + +struct fillbytes +{ + bytes b; + size_t allocated; + + int8_t *base() + { + return b.ptr; + } + size_t size() + { + return b.len; + } + int8_t *limit() + { + return b.limit(); + } // logical limit + void setLimit(int8_t *lp) + { + assert(isAllocated(lp)); + b.len = lp - b.ptr; + } + int8_t *end() + { + return b.ptr + allocated; + } // physical limit + int8_t *loc(size_t o) + { + assert(o < b.len); + return b.ptr + o; + } + void init() + { + allocated = 0; + b.set(nullptr, 0); + } + void init(size_t s) + { + init(); + ensureSize(s); + } + void free() + { + if (allocated != 0) + b.free(); + allocated = 0; + } + void empty() + { + b.len = 0; + } + int8_t *grow(size_t s); // grow so that limit() += s + int getByte(uint i) + { + return *loc(i) & 0xFF; + } + void addByte(int8_t x) + { + *grow(1) = x; + } + void ensureSize(size_t s); // make sure allocated >= s + void trimToSize() + { + if (allocated > size()) + b.realloc(allocated = size()); + } + bool canAppend(size_t s) + { + return allocated > b.len + s; + } + bool isAllocated(int8_t *p) + { + return p >= base() && p <= end(); + } // asserts + void set(bytes &src) + { + set(src.ptr, src.len); + } + + void set(int8_t *ptr, size_t len) + { + b.set(ptr, len); + allocated = 0; // mark as not reallocatable + } + + // block operations on resizing byte buffer: + fillbytes &append(const void *ptr_, size_t len_) + { + memcpy(grow(len_), ptr_, len_); + return (*this); + } + fillbytes &append(bytes &other) + { + return append(other.ptr, other.len); + } + fillbytes &append(const char *str) + { + return append(str, strlen(str)); + } +}; + +struct ptrlist : fillbytes +{ + typedef const void *cvptr; + int length() + { + return (int)(size() / sizeof(cvptr)); + } + cvptr *base() + { + return (cvptr *)fillbytes::base(); + } + cvptr &get(int i) + { + return *(cvptr *)loc(i * sizeof(cvptr)); + } + cvptr *limit() + { + return (cvptr *)fillbytes::limit(); + } + void add(cvptr x) + { + *(cvptr *)grow(sizeof(x)) = x; + } + void popTo(int l) + { + assert(l <= length()); + b.len = l * sizeof(cvptr); + } + int indexOf(cvptr x); + bool contains(cvptr x) + { + return indexOf(x) >= 0; + } + void freeAll(); // frees every ptr on the list, plus the list itself +}; +// Use a macro rather than mess with subtle mismatches +// between member and non-member function pointers. +#define PTRLIST_QSORT(ptrls, fn) ::qsort((ptrls).base(), (ptrls).length(), sizeof(void *), fn) + +struct intlist : fillbytes +{ + int length() + { + return (int)(size() / sizeof(int)); + } + int *base() + { + return (int *)fillbytes::base(); + } + int &get(int i) + { + return *(int *)loc(i * sizeof(int)); + } + int *limit() + { + return (int *)fillbytes::limit(); + } + void add(int x) + { + *(int *)grow(sizeof(x)) = x; + } + void popTo(int l) + { + assert(l <= length()); + b.len = l * sizeof(int); + } + int indexOf(int x); + bool contains(int x) + { + return indexOf(x) >= 0; + } +}; diff --git a/depends/pack200/src/coding.cpp b/depends/pack200/src/coding.cpp new file mode 100644 index 00000000..32977e05 --- /dev/null +++ b/depends/pack200/src/coding.cpp @@ -0,0 +1,1049 @@ +/* + * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// -*- C++ -*- +// Small program for unpacking specially compressed Java packages. +// John R. Rose + +#include +#include +#include +#include +#include + +#include "defines.h" +#include "bytes.h" +#include "utils.h" +#include "coding.h" + +#include "constants.h" +#include "unpack.h" + +extern coding basic_codings[]; + +#define CODING_PRIVATE(spec) \ + int spec_ = spec; \ + int B = CODING_B(spec_); \ + int H = CODING_H(spec_); \ + int L = 256 - H; \ + int S = CODING_S(spec_); \ + int D = CODING_D(spec_) + +#define IS_NEG_CODE(S, codeVal) ((((int)(codeVal) + 1) & ((1 << S) - 1)) == 0) + +#define DECODE_SIGN_S1(ux) (((uint)(ux) >> 1) ^ -((int)(ux) & 1)) + +static int decode_sign(int S, uint ux) +{ // == Coding.decodeSign32 + assert(S > 0); + uint sigbits = (ux >> S); + if (IS_NEG_CODE(S, ux)) + return (int)(~sigbits); + else + return (int)(ux - sigbits); + // Note that (int)(ux-sigbits) can be negative, if ux is large enough. +} + +coding *coding::init() +{ + if (umax > 0) + return this; // already done + assert(spec != 0); // sanity + + // fill in derived fields + CODING_PRIVATE(spec); + + // Return nullptr if 'arb(BHSD)' parameter constraints are not met: + if (B < 1 || B > B_MAX) + return nullptr; + if (H < 1 || H > 256) + return nullptr; + if (S < 0 || S > 2) + return nullptr; + if (D < 0 || D > 1) + return nullptr; + if (B == 1 && H != 256) + return nullptr; // 1-byte coding must be fixed-size + if (B >= 5 && H == 256) + return nullptr; // no 5-byte fixed-size coding + + // first compute the range of the coding, in 64 bits + jlong range = 0; + { + jlong H_i = 1; + for (int i = 0; i < B; i++) + { + range += H_i; + H_i *= H; + } + range *= L; + range += H_i; + } + assert(range > 0); // no useless codings, please + + int this_umax; + + // now, compute min and max + if (range >= ((jlong)1 << 32)) + { + this_umax = INT_MAX_VALUE; + this->umin = INT_MIN_VALUE; + this->max = INT_MAX_VALUE; + this->min = INT_MIN_VALUE; + } + else + { + this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range - 1; + this->max = this_umax; + this->min = this->umin = 0; + if (S != 0 && range != 0) + { + int Smask = (1 << S) - 1; + jlong maxPosCode = range - 1; + jlong maxNegCode = range - 1; + while (IS_NEG_CODE(S, maxPosCode)) + --maxPosCode; + while (!IS_NEG_CODE(S, maxNegCode)) + --maxNegCode; + int maxPos = decode_sign(S, (uint)maxPosCode); + if (maxPos < 0) + this->max = INT_MAX_VALUE; // 32-bit wraparound + else + this->max = maxPos; + if (maxNegCode < 0) + this->min = 0; // No negative codings at all. + else + this->min = decode_sign(S, (uint)maxNegCode); + } + } + + assert(!(isFullRange | isSigned | isSubrange)); // init + if (min < 0) + this->isSigned = true; + if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE) + this->isSubrange = true; + if (max == INT_MAX_VALUE && min == INT_MIN_VALUE) + this->isFullRange = true; + + // do this last, to reduce MT exposure (should have a membar too) + this->umax = this_umax; + + return this; +} + +coding *coding::findBySpec(int spec) +{ + for (coding *scan = &basic_codings[0];; scan++) + { + if (scan->spec == spec) + return scan->init(); + if (scan->spec == 0) + break; + } + coding *ptr = NEW(coding, 1); + CHECK_NULL_0(ptr); + coding *c = ptr->initFrom(spec); + if (c == nullptr) + { + ::free(ptr); + } + else + // else caller should free it... + c->isMalloc = true; + return c; +} + +coding *coding::findBySpec(int B, int H, int S, int D) +{ + if (B < 1 || B > B_MAX) + return nullptr; + if (H < 1 || H > 256) + return nullptr; + if (S < 0 || S > 2) + return nullptr; + if (D < 0 || D > 1) + return nullptr; + return findBySpec(CODING_SPEC(B, H, S, D)); +} + +void coding::free() +{ + if (isMalloc) + { + ::free(this); + } +} + +void coding_method::reset(value_stream *state) +{ + assert(state->rp == state->rplimit); // not in mid-stream, please + // assert(this == vs0.cm); + state[0] = vs0; + if (uValues != nullptr) + { + uValues->reset(state->helper()); + } +} + +uint coding::parse(byte *&rp, int B, int H) +{ + int L = 256 - H; + byte *ptr = rp; + // hand peel the i==0 part of the loop: + uint b_i = *ptr++ & 0xFF; + if (B == 1 || b_i < (uint)L) + { + rp = ptr; + return b_i; + } + uint sum = b_i; + uint H_i = H; + assert(B <= B_MAX); + for (int i = 2; i <= B_MAX; i++) + { // easy for compilers to unroll if desired + b_i = *ptr++ & 0xFF; + sum += b_i * H_i; + if (i == B || b_i < (uint)L) + { + rp = ptr; + return sum; + } + H_i *= H; + } + assert(false); + return 0; +} + +uint coding::parse_lgH(byte *&rp, int B, int H, int lgH) +{ + assert(H == (1 << lgH)); + int L = 256 - (1 << lgH); + byte *ptr = rp; + // hand peel the i==0 part of the loop: + uint b_i = *ptr++ & 0xFF; + if (B == 1 || b_i < (uint)L) + { + rp = ptr; + return b_i; + } + uint sum = b_i; + uint lg_H_i = lgH; + assert(B <= B_MAX); + for (int i = 2; i <= B_MAX; i++) + { // easy for compilers to unroll if desired + b_i = *ptr++ & 0xFF; + sum += b_i << lg_H_i; + if (i == B || b_i < (uint)L) + { + rp = ptr; + return sum; + } + lg_H_i += lgH; + } + assert(false); + return 0; +} + +static const char ERB[] = "EOF reading band"; + +void coding::parseMultiple(byte *&rp, int N, byte *limit, int B, int H) +{ + if (N < 0) + { + abort("bad value count"); + return; + } + byte *ptr = rp; + if (B == 1 || H == 256) + { + size_t len = (size_t)N * B; + if (len / B != (size_t)N || ptr + len > limit) + { + abort(ERB); + return; + } + rp = ptr + len; + return; + } + // Note: We assume rp has enough zero-padding. + int L = 256 - H; + int n = B; + while (N > 0) + { + ptr += 1; + if (--n == 0) + { + // end of encoding at B bytes, regardless of byte value + } + else + { + int b = (ptr[-1] & 0xFF); + if (b >= L) + { + // keep going, unless we find a byte < L + continue; + } + } + // found the last byte + N -= 1; + n = B; // reset length counter + // do an error check here + if (ptr > limit) + { + abort(ERB); + return; + } + } + rp = ptr; + return; +} + +bool value_stream::hasHelper() +{ + // If my coding method is a pop-style method, + // then I need a second value stream to transmit + // unfavored values. + // This can be determined by examining fValues. + return cm->fValues != nullptr; +} + +void value_stream::init(byte *rp_, byte *rplimit_, coding *defc) +{ + rp = rp_; + rplimit = rplimit_; + sum = 0; + cm = nullptr; // no need in the simple case + setCoding(defc); +} + +void value_stream::setCoding(coding *defc) +{ + if (defc == nullptr) + { + unpack_abort("bad coding"); + defc = coding::findByIndex(_meta_canon_min); // random pick for recovery + } + + c = (*defc); + + // choose cmk + cmk = cmk_ERROR; + switch (c.spec) + { + case BYTE1_spec: + cmk = cmk_BYTE1; + break; + case CHAR3_spec: + cmk = cmk_CHAR3; + break; + case UNSIGNED5_spec: + cmk = cmk_UNSIGNED5; + break; + case DELTA5_spec: + cmk = cmk_DELTA5; + break; + case BCI5_spec: + cmk = cmk_BCI5; + break; + case BRANCH5_spec: + cmk = cmk_BRANCH5; + break; + default: + if (c.D() == 0) + { + switch (c.S()) + { + case 0: + cmk = cmk_BHS0; + break; + case 1: + cmk = cmk_BHS1; + break; + default: + cmk = cmk_BHS; + break; + } + } + else + { + if (c.S() == 1) + { + if (c.isFullRange) + cmk = cmk_BHS1D1full; + if (c.isSubrange) + cmk = cmk_BHS1D1sub; + } + if (cmk == cmk_ERROR) + cmk = cmk_BHSD1; + } + } +} + +static int getPopValue(value_stream *self, uint uval) +{ + if (uval > 0) + { + // note that the initial parse performed a range check + assert(uval <= (uint)self->cm->fVlength); + return self->cm->fValues[uval - 1]; + } + else + { + // take an unfavored value + return self->helper()->getInt(); + } +} + +int coding::sumInUnsignedRange(int x, int y) +{ + assert(isSubrange); + int range = (int)(umax + 1); + assert(range > 0); + x += y; + if (x != (int)((jlong)(x - y) + (jlong)y)) + { + // 32-bit overflow interferes with range reduction. + // Back off from the overflow by adding a multiple of range: + if (x < 0) + { + x -= range; + assert(x >= 0); + } + else + { + x += range; + assert(x < 0); + } + } + if (x < 0) + { + x += range; + if (x >= 0) + return x; + } + else if (x >= range) + { + x -= range; + if (x < range) + return x; + } + else + { + // in range + return x; + } + // do it the hard way + x %= range; + if (x < 0) + x += range; + return x; +} + +static int getDeltaValue(value_stream *self, uint uval, bool isSubrange) +{ + assert((uint)(self->c.isSubrange) == (uint)isSubrange); + assert(self->c.isSubrange | self->c.isFullRange); + if (isSubrange) + return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); + else + return self->sum += (int)uval; +} + +bool value_stream::hasValue() +{ + if (rp < rplimit) + return true; + if (cm == nullptr) + return false; + if (cm->next == nullptr) + return false; + cm->next->reset(this); + return hasValue(); +} + +int value_stream::getInt() +{ + if (rp >= rplimit) + { + // Advance to next coding segment. + if (rp > rplimit || cm == nullptr || cm->next == nullptr) + { + // Must perform this check and throw an exception on bad input. + unpack_abort(ERB); + return 0; + } + cm->next->reset(this); + return getInt(); + } + + CODING_PRIVATE(c.spec); + uint uval; + enum + { + B5 = 5, + B3 = 3, + H128 = 128, + H64 = 64, + H4 = 4 + }; + switch (cmk) + { + case cmk_BHS: + assert(D == 0); + uval = coding::parse(rp, B, H); + if (S == 0) + return (int)uval; + return decode_sign(S, uval); + + case cmk_BHS0: + assert(S == 0 && D == 0); + uval = coding::parse(rp, B, H); + return (int)uval; + + case cmk_BHS1: + assert(S == 1 && D == 0); + uval = coding::parse(rp, B, H); + return DECODE_SIGN_S1(uval); + + case cmk_BYTE1: + assert(c.spec == BYTE1_spec); + assert(B == 1 && H == 256 && S == 0 && D == 0); + return *rp++ & 0xFF; + + case cmk_CHAR3: + assert(c.spec == CHAR3_spec); + assert(B == B3 && H == H128 && S == 0 && D == 0); + return coding::parse_lgH(rp, B3, H128, 7); + + case cmk_UNSIGNED5: + assert(c.spec == UNSIGNED5_spec); + assert(B == B5 && H == H64 && S == 0 && D == 0); + return coding::parse_lgH(rp, B5, H64, 6); + + case cmk_BHSD1: + assert(D == 1); + uval = coding::parse(rp, B, H); + if (S != 0) + uval = (uint)decode_sign(S, uval); + return getDeltaValue(this, uval, (bool)c.isSubrange); + + case cmk_BHS1D1full: + assert(S == 1 && D == 1 && c.isFullRange); + uval = coding::parse(rp, B, H); + uval = (uint)DECODE_SIGN_S1(uval); + return getDeltaValue(this, uval, false); + + case cmk_BHS1D1sub: + assert(S == 1 && D == 1 && c.isSubrange); + uval = coding::parse(rp, B, H); + uval = (uint)DECODE_SIGN_S1(uval); + return getDeltaValue(this, uval, true); + + case cmk_DELTA5: + assert(c.spec == DELTA5_spec); + assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange); + uval = coding::parse_lgH(rp, B5, H64, 6); + sum += DECODE_SIGN_S1(uval); + return sum; + + case cmk_BCI5: + assert(c.spec == BCI5_spec); + assert(B == B5 && H == H4 && S == 0 && D == 0); + return coding::parse_lgH(rp, B5, H4, 2); + + case cmk_BRANCH5: + assert(c.spec == BRANCH5_spec); + assert(B == B5 && H == H4 && S == 2 && D == 0); + uval = coding::parse_lgH(rp, B5, H4, 2); + return decode_sign(S, uval); + + case cmk_pop: + uval = coding::parse(rp, B, H); + if (S != 0) + { + uval = (uint)decode_sign(S, uval); + } + if (D != 0) + { + assert(c.isSubrange | c.isFullRange); + if (c.isSubrange) + sum = c.sumInUnsignedRange(sum, (int)uval); + else + sum += (int)uval; + uval = (uint)sum; + } + return getPopValue(this, uval); + + case cmk_pop_BHS0: + assert(S == 0 && D == 0); + uval = coding::parse(rp, B, H); + return getPopValue(this, uval); + + case cmk_pop_BYTE1: + assert(c.spec == BYTE1_spec); + assert(B == 1 && H == 256 && S == 0 && D == 0); + return getPopValue(this, *rp++ & 0xFF); + + default: + break; + } + assert(false); + return 0; +} + +static int moreCentral(int x, int y) +{ // used to find end of Pop.{F} + // Suggested implementation from the Pack200 specification: + uint kx = (x >> 31) ^ (x << 1); + uint ky = (y >> 31) ^ (y << 1); + return (kx < ky ? x : y); +} +// static maybe_inline +// int moreCentral2(int x, int y, int min) { +// // Strict implementation of buggy 150.7 specification. +// // The bug is that the spec. says absolute-value ties are broken +// // in favor of positive numbers, but the suggested implementation +// // (also mentioned in the spec.) breaks ties in favor of negative numbers. +// if ((x + y) != 0) +// return min; +// else +// // return the other value, which breaks a tie in the positive direction +// return (x > y)? x: y; +//} + +static const byte *no_meta[] = {nullptr}; +#define NO_META (*(byte **)no_meta) +enum +{ + POP_FAVORED_N = -2 +}; + +// mode bits +#define DISABLE_RUN 1 // used immediately inside ACodee +#define DISABLE_POP 2 // used recursively in all pop sub-bands + +// This function knows all about meta-coding. +void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int mode, + coding *defc, int N, intlist *valueSink) +{ + assert(N != 0); + + assert(u != nullptr); // must be pre-initialized + // if (u == nullptr) u = unpacker::current(); // expensive + + int op = (meta_rp == nullptr) ? _meta_default : (*meta_rp++ & 0xFF); + coding *foundc = nullptr; + coding *to_free = nullptr; + + if (op == _meta_default) + { + foundc = defc; + // and fall through + } + else if (op >= _meta_canon_min && op <= _meta_canon_max) + { + foundc = coding::findByIndex(op); + // and fall through + } + else if (op == _meta_arb) + { + int args = (*meta_rp++ & 0xFF); + // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1)) + int D = ((args >> 0) & 1); + int S = ((args >> 1) & 3); + int B = ((args >> 3) & -1) + 1; + // & (H[1..256]-1) + int H = (*meta_rp++ & 0xFF) + 1; + foundc = coding::findBySpec(B, H, S, D); + to_free = foundc; // findBySpec may dynamically allocate + if (foundc == nullptr) + { + abort("illegal arb. coding"); + return; + } + // and fall through + } + else if (op >= _meta_run && op < _meta_pop) + { + int args = (op - _meta_run); + // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2]) + int KX = ((args >> 0) & 3); + int KBFlag = ((args >> 2) & 1); + int ABDef = ((args >> 3) & -1); + assert(ABDef <= 2); + // & KB: one of [0..255] if KBFlag=1 + int KB = (!KBFlag ? 3 : (*meta_rp++ & 0xFF)); + int K = (KB + 1) << (KX * 4); + int N2 = (N >= 0) ? N - K : N; + if (N == 0 || (N2 <= 0 && N2 != N)) + { + abort("illegal run encoding"); + return; + } + if ((mode & DISABLE_RUN) != 0) + { + abort("illegal nested run encoding"); + return; + } + + // & Enc{ ACode } if ADef=0 (ABDef != 1) + // No direct nesting of 'run' in ACode, but in BCode it's OK. + int disRun = mode | DISABLE_RUN; + if (ABDef == 1) + { + this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink); + } + else + { + this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); + } + CHECK; + + // & Enc{ BCode } if BDef=0 (ABDef != 2) + coding_method *tail = U_NEW(coding_method, 1); + CHECK_NULL(tail); + tail->u = u; + + // The 'run' codings may be nested indirectly via 'pop' codings. + // This means that this->next may already be filled in, if + // ACode was of type 'pop' with a 'run' token coding. + // No problem: Just chain the upcoming BCode onto the end. + for (coding_method *self = this;; self = self->next) + { + if (self->next == nullptr) + { + self->next = tail; + break; + } + } + + if (ABDef == 2) + { + tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink); + } + else + { + tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink); + } + // Note: The preceding calls to init should be tail-recursive. + + return; // done; no falling through + } + else if (op >= _meta_pop && op < _meta_limit) + { + int args = (op - _meta_pop); + // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11]) + int FDef = ((args >> 0) & 1); + int UDef = ((args >> 1) & 1); + int TDefL = ((args >> 2) & -1); + assert(TDefL <= 11); + int TDef = (TDefL > 0); + int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11 - TDefL))); + int TH = (256 - TL); + if (N <= 0) + { + abort("illegal pop encoding"); + return; + } + if ((mode & DISABLE_POP) != 0) + { + abort("illegal nested pop encoding"); + return; + } + + // No indirect nesting of 'pop', but 'run' is OK. + int disPop = DISABLE_POP; + + // & Enc{ FCode } if FDef=0 + int FN = POP_FAVORED_N; + assert(valueSink == nullptr); + intlist fValueSink; + fValueSink.init(); + coding_method fval; + BYTES_OF(fval).clear(); + fval.u = u; + if (FDef != 0) + { + fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink); + } + else + { + fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink); + } + bytes fvbuf; + fValues = (u->saveTo(fvbuf, fValueSink.b), (int *)fvbuf.ptr); + fVlength = fValueSink.length(); // i.e., the parameter K + fValueSink.free(); + CHECK; + + // Skip the first {F} run in all subsequent passes. + // The next call to this->init(...) will set vs0.rp to point after the {F}. + + // & Enc{ TCode } if TDef=0 (TDefL==0) + if (TDef != 0) + { + coding *tcode = coding::findBySpec(1, 256); // BYTE1 + // find the most narrowly sufficient code: + for (int B = 2; B <= B_MAX; B++) + { + if (fVlength <= tcode->umax) + break; // found it + tcode->free(); + tcode = coding::findBySpec(B, TH); + CHECK_NULL(tcode); + } + if (!(fVlength <= tcode->umax)) + { + abort("pop.L value too small"); + return; + } + this->init(band_rp, band_limit, NO_META, disPop, tcode, N, nullptr); + tcode->free(); + } + else + { + this->init(band_rp, band_limit, meta_rp, disPop, defc, N, nullptr); + } + CHECK; + + // Count the number of zero tokens right now. + // Also verify that they are in bounds. + int UN = 0; // one {U} for each zero in {T} + value_stream vs = vs0; + for (int i = 0; i < N; i++) + { + uint val = vs.getInt(); + if (val == 0) + UN += 1; + if (!(val <= (uint)fVlength)) + { + abort("pop token out of range"); + return; + } + } + vs.done(); + + // & Enc{ UCode } if UDef=0 + if (UN != 0) + { + uValues = U_NEW(coding_method, 1); + CHECK_NULL(uValues); + uValues->u = u; + if (UDef != 0) + { + uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, nullptr); + } + else + { + uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, nullptr); + } + } + else + { + if (UDef == 0) + { + int uop = (*meta_rp++ & 0xFF); + if (uop > _meta_canon_max) + // %%% Spec. requires the more strict (uop != _meta_default). + abort("bad meta-coding for empty pop/U"); + } + } + + // Bug fix for 6259542 + // Last of all, adjust vs0.cmk to the 'pop' flavor + for (coding_method *self = this; self != nullptr; self = self->next) + { + coding_method_kind cmk2 = cmk_pop; + switch (self->vs0.cmk) + { + case cmk_BHS0: + cmk2 = cmk_pop_BHS0; + break; + case cmk_BYTE1: + cmk2 = cmk_pop_BYTE1; + break; + default: + break; + } + self->vs0.cmk = cmk2; + if (self != this) + { + assert(self->fValues == nullptr); // no double init + self->fValues = this->fValues; + self->fVlength = this->fVlength; + assert(self->uValues == nullptr); // must stay nullptr + } + } + + return; // done; no falling through + } + else + { + abort("bad meta-coding"); + return; + } + + // Common code here skips a series of values with one coding. + assert(foundc != nullptr); + + assert(vs0.cmk == cmk_ERROR); // no garbage, please + assert(vs0.rp == nullptr); // no garbage, please + assert(vs0.rplimit == nullptr); // no garbage, please + assert(vs0.sum == 0); // no garbage, please + + vs0.init(band_rp, band_limit, foundc); + + // Done with foundc. Free if necessary. + if (to_free != nullptr) + { + to_free->free(); + to_free = nullptr; + } + foundc = nullptr; + + coding &c = vs0.c; + CODING_PRIVATE(c.spec); + // assert sane N + assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); + + // Look at the values, or at least skip over them quickly. + if (valueSink == nullptr) + { + // Skip and ignore values in the first pass. + c.parseMultiple(band_rp, N, band_limit, B, H); + } + else if (N >= 0) + { + // Pop coding, {F} sequence, initial run of values... + assert((mode & DISABLE_POP) != 0); + value_stream vs = vs0; + for (int n = 0; n < N; n++) + { + int val = vs.getInt(); + valueSink->add(val); + } + band_rp = vs.rp; + } + else + { + // Pop coding, {F} sequence, final run of values... + assert((mode & DISABLE_POP) != 0); + assert(N == POP_FAVORED_N); + int min = INT_MIN_VALUE; // farthest from the center + // min2 is based on the buggy specification of centrality in version 150.7 + // no known implementations transmit this value, but just in case... + // int min2 = INT_MIN_VALUE; + int last = 0; + // if there were initial runs, find the potential sentinels in them: + for (int i = 0; i < valueSink->length(); i++) + { + last = valueSink->get(i); + min = moreCentral(min, last); + // min2 = moreCentral2(min2, last, min); + } + value_stream vs = vs0; + for (;;) + { + int val = vs.getInt(); + if (valueSink->length() > 0 && (val == last || val == min)) //|| val == min2 + break; + valueSink->add(val); + CHECK; + last = val; + min = moreCentral(min, last); + // min2 = moreCentral2(min2, last, min); + } + band_rp = vs.rp; + } + CHECK; + + // Get an accurate upper limit now. + vs0.rplimit = band_rp; + vs0.cm = this; + + return; // success +} + +coding basic_codings[] = { + // This one is not a usable irregular coding, but is used by cp_Utf8_chars. + CODING_INIT(3, 128, 0, 0), + + // Fixed-length codings: + CODING_INIT(1, 256, 0, 0), CODING_INIT(1, 256, 1, 0), CODING_INIT(1, 256, 0, 1), + CODING_INIT(1, 256, 1, 1), CODING_INIT(2, 256, 0, 0), CODING_INIT(2, 256, 1, 0), + CODING_INIT(2, 256, 0, 1), CODING_INIT(2, 256, 1, 1), CODING_INIT(3, 256, 0, 0), + CODING_INIT(3, 256, 1, 0), CODING_INIT(3, 256, 0, 1), CODING_INIT(3, 256, 1, 1), + CODING_INIT(4, 256, 0, 0), CODING_INIT(4, 256, 1, 0), CODING_INIT(4, 256, 0, 1), + CODING_INIT(4, 256, 1, 1), + + // Full-range variable-length codings: + CODING_INIT(5, 4, 0, 0), CODING_INIT(5, 4, 1, 0), CODING_INIT(5, 4, 2, 0), + CODING_INIT(5, 16, 0, 0), CODING_INIT(5, 16, 1, 0), CODING_INIT(5, 16, 2, 0), + CODING_INIT(5, 32, 0, 0), CODING_INIT(5, 32, 1, 0), CODING_INIT(5, 32, 2, 0), + CODING_INIT(5, 64, 0, 0), CODING_INIT(5, 64, 1, 0), CODING_INIT(5, 64, 2, 0), + CODING_INIT(5, 128, 0, 0), CODING_INIT(5, 128, 1, 0), CODING_INIT(5, 128, 2, 0), + CODING_INIT(5, 4, 0, 1), CODING_INIT(5, 4, 1, 1), CODING_INIT(5, 4, 2, 1), + CODING_INIT(5, 16, 0, 1), CODING_INIT(5, 16, 1, 1), CODING_INIT(5, 16, 2, 1), + CODING_INIT(5, 32, 0, 1), CODING_INIT(5, 32, 1, 1), CODING_INIT(5, 32, 2, 1), + CODING_INIT(5, 64, 0, 1), CODING_INIT(5, 64, 1, 1), CODING_INIT(5, 64, 2, 1), + CODING_INIT(5, 128, 0, 1), CODING_INIT(5, 128, 1, 1), CODING_INIT(5, 128, 2, 1), + + // Variable length subrange codings: + CODING_INIT(2, 192, 0, 0), CODING_INIT(2, 224, 0, 0), CODING_INIT(2, 240, 0, 0), + CODING_INIT(2, 248, 0, 0), CODING_INIT(2, 252, 0, 0), CODING_INIT(2, 8, 0, 1), + CODING_INIT(2, 8, 1, 1), CODING_INIT(2, 16, 0, 1), CODING_INIT(2, 16, 1, 1), + CODING_INIT(2, 32, 0, 1), CODING_INIT(2, 32, 1, 1), CODING_INIT(2, 64, 0, 1), + CODING_INIT(2, 64, 1, 1), CODING_INIT(2, 128, 0, 1), CODING_INIT(2, 128, 1, 1), + CODING_INIT(2, 192, 0, 1), CODING_INIT(2, 192, 1, 1), CODING_INIT(2, 224, 0, 1), + CODING_INIT(2, 224, 1, 1), CODING_INIT(2, 240, 0, 1), CODING_INIT(2, 240, 1, 1), + CODING_INIT(2, 248, 0, 1), CODING_INIT(2, 248, 1, 1), CODING_INIT(3, 192, 0, 0), + CODING_INIT(3, 224, 0, 0), CODING_INIT(3, 240, 0, 0), CODING_INIT(3, 248, 0, 0), + CODING_INIT(3, 252, 0, 0), CODING_INIT(3, 8, 0, 1), CODING_INIT(3, 8, 1, 1), + CODING_INIT(3, 16, 0, 1), CODING_INIT(3, 16, 1, 1), CODING_INIT(3, 32, 0, 1), + CODING_INIT(3, 32, 1, 1), CODING_INIT(3, 64, 0, 1), CODING_INIT(3, 64, 1, 1), + CODING_INIT(3, 128, 0, 1), CODING_INIT(3, 128, 1, 1), CODING_INIT(3, 192, 0, 1), + CODING_INIT(3, 192, 1, 1), CODING_INIT(3, 224, 0, 1), CODING_INIT(3, 224, 1, 1), + CODING_INIT(3, 240, 0, 1), CODING_INIT(3, 240, 1, 1), CODING_INIT(3, 248, 0, 1), + CODING_INIT(3, 248, 1, 1), CODING_INIT(4, 192, 0, 0), CODING_INIT(4, 224, 0, 0), + CODING_INIT(4, 240, 0, 0), CODING_INIT(4, 248, 0, 0), CODING_INIT(4, 252, 0, 0), + CODING_INIT(4, 8, 0, 1), CODING_INIT(4, 8, 1, 1), CODING_INIT(4, 16, 0, 1), + CODING_INIT(4, 16, 1, 1), CODING_INIT(4, 32, 0, 1), CODING_INIT(4, 32, 1, 1), + CODING_INIT(4, 64, 0, 1), CODING_INIT(4, 64, 1, 1), CODING_INIT(4, 128, 0, 1), + CODING_INIT(4, 128, 1, 1), CODING_INIT(4, 192, 0, 1), CODING_INIT(4, 192, 1, 1), + CODING_INIT(4, 224, 0, 1), CODING_INIT(4, 224, 1, 1), CODING_INIT(4, 240, 0, 1), + CODING_INIT(4, 240, 1, 1), CODING_INIT(4, 248, 0, 1), CODING_INIT(4, 248, 1, 1), + CODING_INIT(0, 0, 0, 0)}; +#define BASIC_INDEX_LIMIT (int)(sizeof(basic_codings) / sizeof(basic_codings[0]) - 1) + +coding *coding::findByIndex(int idx) +{ + int index_limit = BASIC_INDEX_LIMIT; + assert(_meta_canon_min == 1 && _meta_canon_max + 1 == index_limit); + + if (idx >= _meta_canon_min && idx <= _meta_canon_max) + return basic_codings[idx].init(); + else + return nullptr; +} diff --git a/depends/pack200/src/coding.h b/depends/pack200/src/coding.h new file mode 100644 index 00000000..5f017b9e --- /dev/null +++ b/depends/pack200/src/coding.h @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +struct unpacker; + +#define INT_MAX_VALUE ((int)0x7FFFFFFF) +#define INT_MIN_VALUE ((int)0x80000000) + +#define CODING_SPEC(B, H, S, D) ((B) << 20 | (H) << 8 | (S) << 4 | (D) << 0) +#define CODING_B(x) ((x) >> 20 & 0xF) +#define CODING_H(x) ((x) >> 8 & 0xFFF) +#define CODING_S(x) ((x) >> 4 & 0xF) +#define CODING_D(x) ((x) >> 0 & 0xF) + +#define CODING_INIT(B, H, S, D) \ + { \ + CODING_SPEC(B, H, S, D), 0, 0, 0, 0, 0, 0, 0, 0 \ + } + +// For debugging purposes, some compilers do not like this and will complain. +// #define long do_not_use_C_long_types_use_jlong_or_int +// Use of the type "long" is problematic, do not use it. + +struct coding +{ + int spec; // B,H,S,D + + // Handy values derived from the spec: + int B() + { + return CODING_B(spec); + } + int H() + { + return CODING_H(spec); + } + int S() + { + return CODING_S(spec); + } + int D() + { + return CODING_D(spec); + } + int L() + { + return 256 - CODING_H(spec); + } + int min, max; + int umin, umax; + char isSigned, isSubrange, isFullRange, isMalloc; + + coding *init(); // returns self or nullptr if error + coding *initFrom(int spec_) + { + assert(this->spec == 0); + this->spec = spec_; + return init(); + } + + static coding *findBySpec(int spec); + static coding *findBySpec(int B, int H, int S = 0, int D = 0); + static coding *findByIndex(int irregularCodingIndex); + + static uint parse(byte *&rp, int B, int H); + static uint parse_lgH(byte *&rp, int B, int H, int lgH); + static void parseMultiple(byte *&rp, int N, byte *limit, int B, int H); + + uint parse(byte *&rp) + { + return parse(rp, CODING_B(spec), CODING_H(spec)); + } + void parseMultiple(byte *&rp, int N, byte *limit) + { + parseMultiple(rp, N, limit, CODING_B(spec), CODING_H(spec)); + } + + bool canRepresent(int x) + { + return (x >= min && x <= max); + } + bool canRepresentUnsigned(int x) + { + return (x >= umin && x <= umax); + } + + int sumInUnsignedRange(int x, int y); + + int readFrom(byte *&rpVar, int *dbase); + void readArrayFrom(byte *&rpVar, int *dbase, int length, int *values); + void skipArrayFrom(byte *&rpVar, int length) + { + readArrayFrom(rpVar, (int *)NULL, length, (int *)NULL); + } + + void free(); // free self if isMalloc + + // error handling + static void abort(const char *msg = nullptr) + { + unpack_abort(msg); + } +}; + +enum coding_method_kind +{ + cmk_ERROR, + cmk_BHS, + cmk_BHS0, + cmk_BHS1, + cmk_BHSD1, + cmk_BHS1D1full, // isFullRange + cmk_BHS1D1sub, // isSubRange + + // special cases hand-optimized (~50% of all decoded values) + cmk_BYTE1, //(1,256) 6% + cmk_CHAR3, //(3,128) 7% + cmk_UNSIGNED5, //(5,64) 13% + cmk_DELTA5, //(5,64,1,1) 5% + cmk_BCI5, //(5,4) 18% + cmk_BRANCH5, //(5,4,2) 4% + // cmk_UNSIGNED5H16, //(5,16) 5% + // cmk_UNSIGNED2H4, //(2,4) 6% + // cmk_DELTA4H8, //(4,8,1,1) 10% + // cmk_DELTA3H16, //(3,16,1,1) 9% + cmk_BHS_LIMIT, + cmk_pop, + cmk_pop_BHS0, + cmk_pop_BYTE1, + cmk_pop_LIMIT, + cmk_LIMIT +}; + +enum +{ + BYTE1_spec = CODING_SPEC(1, 256, 0, 0), + CHAR3_spec = CODING_SPEC(3, 128, 0, 0), + UNSIGNED4_spec = CODING_SPEC(4, 256, 0, 0), + UNSIGNED5_spec = CODING_SPEC(5, 64, 0, 0), + SIGNED5_spec = CODING_SPEC(5, 64, 1, 0), + DELTA5_spec = CODING_SPEC(5, 64, 1, 1), + UDELTA5_spec = CODING_SPEC(5, 64, 0, 1), + MDELTA5_spec = CODING_SPEC(5, 64, 2, 1), + BCI5_spec = CODING_SPEC(5, 4, 0, 0), + BRANCH5_spec = CODING_SPEC(5, 4, 2, 0) +}; + +enum +{ + B_MAX = 5, + C_SLOP = B_MAX * 10 +}; + +struct coding_method; + +// iterator under the control of a meta-coding +struct value_stream +{ + // current coding of values or values + coding c; // B,H,S,D,etc. + coding_method_kind cmk; // type of decoding needed + byte *rp; // read pointer + byte *rplimit; // final value of read pointer + int sum; // partial sum of all values so far (D=1 only) + coding_method *cm; // coding method that defines this stream + + void init(byte *band_rp, byte *band_limit, coding *defc); + void init(byte *band_rp, byte *band_limit, int spec) + { + init(band_rp, band_limit, coding::findBySpec(spec)); + } + + void setCoding(coding *c); + void setCoding(int spec) + { + setCoding(coding::findBySpec(spec)); + } + + // Parse and decode a single value. + int getInt(); + + // Parse and decode a single byte, with no error checks. + int getByte() + { + assert(cmk == cmk_BYTE1); + assert(rp < rplimit); + return *rp++ & 0xFF; + } + + // Used only for asserts. + bool hasValue(); + + void done() + { + assert(!hasValue()); + } + + // Sometimes a value stream has an auxiliary (but there are never two). + value_stream *helper() + { + assert(hasHelper()); + return this + 1; + } + bool hasHelper(); + + // error handling + // inline void abort(const char* msg); + // inline void aborting(); +}; + +struct coding_method +{ + value_stream vs0; // initial state snapshot (vs.meta==this) + + coding_method *next; // what to do when we run out of bytes + + // these fields are used for pop codes only: + int *fValues; // favored value array + int fVlength; // maximum favored value token + coding_method *uValues; // unfavored value stream + + // pointer to outer unpacker, for error checks etc. + unpacker *u; + + // Initialize a value stream. + void reset(value_stream *state); + + // Parse a band header, size a band, and initialize for further action. + // band_rp advances (but not past band_limit), and meta_rp advances. + // The mode gives context, such as "inside a pop". + // The defc and N are the incoming parameters to a meta-coding. + // The value sink is used to collect output values, when desired. + void init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int mode, coding *defc, int N, + intlist *valueSink); + + // error handling + void abort(const char *msg) + { + unpack_abort(msg, u); + } + bool aborting() + { + return unpack_aborting(u); + } +}; + +// inline void value_stream::abort(const char* msg) { cm->abort(msg); } +// inline void value_stream::aborting() { cm->aborting(); } diff --git a/depends/pack200/src/constants.h b/depends/pack200/src/constants.h new file mode 100644 index 00000000..aeb3335d --- /dev/null +++ b/depends/pack200/src/constants.h @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + Java Class Version numbers history + 1.0 to 1.3.X 45,3 + 1.4 to 1.4.X 46,0 + 1.5 to 1.5.X 49,0 + 1.6 to 1.5.x 50,0 NOTE Assumed for now +*/ + +// classfile constants +#define JAVA_MAGIC 0xCAFEBABE +#define JAVA_MIN_MAJOR_VERSION 45 +#define JAVA_MIN_MINOR_VERSION 3 +#define JAVA5_MAX_MAJOR_VERSION 49 +#define JAVA5_MAX_MINOR_VERSION 0 +// NOTE: Assume for now +#define JAVA6_MAX_MAJOR_VERSION 50 +#define JAVA6_MAX_MINOR_VERSION 0 + +// package file constants +#define JAVA_PACKAGE_MAGIC 0xCAFED00D +#define JAVA5_PACKAGE_MAJOR_VERSION 150 +#define JAVA5_PACKAGE_MINOR_VERSION 7 + +#define JAVA6_PACKAGE_MAJOR_VERSION 160 +#define JAVA6_PACKAGE_MINOR_VERSION 1 + +// magic number for gzip streams (for processing pack200-gzip data) +#define GZIP_MAGIC 0x1F8B0800 +#define GZIP_MAGIC_MASK 0xFFFFFF00 // last byte is variable "flg" field + +enum +{ + CONSTANT_None, + CONSTANT_Utf8, + CONSTANT_unused2, /* unused, was Unicode */ + CONSTANT_Integer, + CONSTANT_Float, + CONSTANT_Long, + CONSTANT_Double, + CONSTANT_Class, + CONSTANT_String, + CONSTANT_Fieldref, + CONSTANT_Methodref, + CONSTANT_InterfaceMethodref, + CONSTANT_NameandType, + CONSTANT_Signature = 13, + CONSTANT_All = 14, + CONSTANT_Limit = 15, + CONSTANT_NONE = 0, + CONSTANT_Literal = 20, // pseudo-tag for debugging + CONSTANT_Member = 21, // pseudo-tag for debugging + SUBINDEX_BIT = 64, // combined with CONSTANT_xxx for ixTag + ACC_STATIC = 0x0008, + ACC_IC_LONG_FORM = (1 << 16), // for ic_flags + CLASS_ATTR_SourceFile = 17, + CLASS_ATTR_EnclosingMethod = 18, + CLASS_ATTR_InnerClasses = 23, + CLASS_ATTR_ClassFile_version = 24, + FIELD_ATTR_ConstantValue = 17, + METHOD_ATTR_Code = 17, + METHOD_ATTR_Exceptions = 18, + METHOD_ATTR_RuntimeVisibleParameterAnnotations = 23, + METHOD_ATTR_RuntimeInvisibleParameterAnnotations = 24, + METHOD_ATTR_AnnotationDefault = 25, + CODE_ATTR_StackMapTable = 0, + CODE_ATTR_LineNumberTable = 1, + CODE_ATTR_LocalVariableTable = 2, + CODE_ATTR_LocalVariableTypeTable = 3, + // X_ATTR_Synthetic = 12, // ACC_SYNTHETIC; not predefined + X_ATTR_Signature = 19, + X_ATTR_Deprecated = 20, + X_ATTR_RuntimeVisibleAnnotations = 21, + X_ATTR_RuntimeInvisibleAnnotations = 22, + X_ATTR_OVERFLOW = 16, + X_ATTR_LIMIT_NO_FLAGS_HI = 32, + X_ATTR_LIMIT_FLAGS_HI = 63, + +#define O_ATTR_DO(F) \ + F(X_ATTR_OVERFLOW, 01) \ + /*(end)*/ +#define X_ATTR_DO(F) \ + O_ATTR_DO(F) F(X_ATTR_Signature, Signature) F(X_ATTR_Deprecated, Deprecated) \ + F(X_ATTR_RuntimeVisibleAnnotations, RuntimeVisibleAnnotations) \ + F(X_ATTR_RuntimeInvisibleAnnotations, RuntimeInvisibleAnnotations) \ + /*F(X_ATTR_Synthetic,Synthetic)*/ \ + /*(end)*/ +#define CLASS_ATTR_DO(F) \ + F(CLASS_ATTR_SourceFile, SourceFile) F(CLASS_ATTR_InnerClasses, InnerClasses) \ + F(CLASS_ATTR_EnclosingMethod, EnclosingMethod) F(CLASS_ATTR_ClassFile_version, 02) \ + /*(end)*/ +#define FIELD_ATTR_DO(F) \ + F(FIELD_ATTR_ConstantValue, ConstantValue) \ + /*(end)*/ +#define METHOD_ATTR_DO(F) \ + F(METHOD_ATTR_Code, Code) F(METHOD_ATTR_Exceptions, Exceptions) \ + F(METHOD_ATTR_RuntimeVisibleParameterAnnotations, RuntimeVisibleParameterAnnotations) \ + F(METHOD_ATTR_RuntimeInvisibleParameterAnnotations, \ + RuntimeInvisibleParameterAnnotations) \ + F(METHOD_ATTR_AnnotationDefault, AnnotationDefault) \ + /*(end)*/ +#define CODE_ATTR_DO(F) \ + F(CODE_ATTR_StackMapTable, StackMapTable) F(CODE_ATTR_LineNumberTable, LineNumberTable) \ + F(CODE_ATTR_LocalVariableTable, LocalVariableTable) \ + F(CODE_ATTR_LocalVariableTypeTable, LocalVariableTypeTable) \ + /*(end)*/ +#define ALL_ATTR_DO(F) \ + X_ATTR_DO(F) CLASS_ATTR_DO(F) FIELD_ATTR_DO(F) METHOD_ATTR_DO(F) CODE_ATTR_DO(F) \ + /*(end)*/ + + // attribute "context types" + ATTR_CONTEXT_CLASS = 0, + ATTR_CONTEXT_FIELD = 1, + ATTR_CONTEXT_METHOD = 2, + ATTR_CONTEXT_CODE = 3, + ATTR_CONTEXT_LIMIT = 4, + + // constants for parsed layouts (stored in band::le_kind) + EK_NONE = 0, // not a layout element + EK_INT = 'I', // B H I SH etc., also FH etc. + EK_BCI = 'P', // PH etc. + EK_BCID = 'Q', // POH etc. + EK_BCO = 'O', // OH etc. + EK_REPL = 'N', // NH[...] etc. + EK_REF = 'R', // RUH, RUNH, KQH, etc. + EK_UN = 'T', // TB(...)[...] etc. + EK_CASE = 'K', // (...)[...] etc. + EK_CALL = '(', // (0), (1), etc. + EK_CBLE = '[', // [...][...] etc. + NO_BAND_INDEX = -1, + + // File option bits, from LSB in ascending bit position. + FO_DEFLATE_HINT = 1 << 0, + FO_IS_CLASS_STUB = 1 << 1, + + // Archive option bits, from LSB in ascending bit position: + AO_HAVE_SPECIAL_FORMATS = 1 << 0, + AO_HAVE_CP_NUMBERS = 1 << 1, + AO_HAVE_ALL_CODE_FLAGS = 1 << 2, + AO_3_UNUSED_MBZ = 1 << 3, + AO_HAVE_FILE_HEADERS = 1 << 4, + AO_DEFLATE_HINT = 1 << 5, + AO_HAVE_FILE_MODTIME = 1 << 6, + AO_HAVE_FILE_OPTIONS = 1 << 7, + AO_HAVE_FILE_SIZE_HI = 1 << 8, + AO_HAVE_CLASS_FLAGS_HI = 1 << 9, + AO_HAVE_FIELD_FLAGS_HI = 1 << 10, + AO_HAVE_METHOD_FLAGS_HI = 1 << 11, + AO_HAVE_CODE_FLAGS_HI = 1 << 12, +#define ARCHIVE_BIT_DO(F) \ + F(AO_HAVE_SPECIAL_FORMATS) F(AO_HAVE_CP_NUMBERS) F(AO_HAVE_ALL_CODE_FLAGS) \ + /*F(AO_3_UNUSED_MBZ)*/ \ + F(AO_HAVE_FILE_HEADERS) F(AO_DEFLATE_HINT) F(AO_HAVE_FILE_MODTIME) \ + F(AO_HAVE_FILE_OPTIONS) F(AO_HAVE_FILE_SIZE_HI) F(AO_HAVE_CLASS_FLAGS_HI) \ + F(AO_HAVE_FIELD_FLAGS_HI) F(AO_HAVE_METHOD_FLAGS_HI) F(AO_HAVE_CODE_FLAGS_HI) \ + /*(end)*/ + + // Constants for decoding attribute definition header bytes. + ADH_CONTEXT_MASK = 0x3, // (hdr & ADH_CONTEXT_MASK) + ADH_BIT_SHIFT = 0x2, // (hdr >> ADH_BIT_SHIFT) + ADH_BIT_IS_LSB = 1, // (hdr >> ADH_BIT_SHIFT) - ADH_BIT_IS_LSB +#define ADH_BYTE(context, index) ((((index) + ADH_BIT_IS_LSB) << ADH_BIT_SHIFT) + (context)) +#define ADH_BYTE_CONTEXT(adhb) ((adhb) & ADH_CONTEXT_MASK) +#define ADH_BYTE_INDEX(adhb) (((adhb) >> ADH_BIT_SHIFT) - ADH_BIT_IS_LSB) + NO_MODTIME = 0, // nullptr modtime value + + // meta-coding + _meta_default = 0, + _meta_canon_min = 1, + _meta_canon_max = 115, + _meta_arb = 116, + _meta_run = 117, + _meta_pop = 141, + _meta_limit = 189, + _meta_error = 255, + _xxx_1_end +}; + +// Bytecodes. + +enum +{ + bc_nop = 0, // 0x00 + bc_aconst_null = 1, // 0x01 + bc_iconst_m1 = 2, // 0x02 + bc_iconst_0 = 3, // 0x03 + bc_iconst_1 = 4, // 0x04 + bc_iconst_2 = 5, // 0x05 + bc_iconst_3 = 6, // 0x06 + bc_iconst_4 = 7, // 0x07 + bc_iconst_5 = 8, // 0x08 + bc_lconst_0 = 9, // 0x09 + bc_lconst_1 = 10, // 0x0a + bc_fconst_0 = 11, // 0x0b + bc_fconst_1 = 12, // 0x0c + bc_fconst_2 = 13, // 0x0d + bc_dconst_0 = 14, // 0x0e + bc_dconst_1 = 15, // 0x0f + bc_bipush = 16, // 0x10 + bc_sipush = 17, // 0x11 + bc_ldc = 18, // 0x12 + bc_ldc_w = 19, // 0x13 + bc_ldc2_w = 20, // 0x14 + bc_iload = 21, // 0x15 + bc_lload = 22, // 0x16 + bc_fload = 23, // 0x17 + bc_dload = 24, // 0x18 + bc_aload = 25, // 0x19 + bc_iload_0 = 26, // 0x1a + bc_iload_1 = 27, // 0x1b + bc_iload_2 = 28, // 0x1c + bc_iload_3 = 29, // 0x1d + bc_lload_0 = 30, // 0x1e + bc_lload_1 = 31, // 0x1f + bc_lload_2 = 32, // 0x20 + bc_lload_3 = 33, // 0x21 + bc_fload_0 = 34, // 0x22 + bc_fload_1 = 35, // 0x23 + bc_fload_2 = 36, // 0x24 + bc_fload_3 = 37, // 0x25 + bc_dload_0 = 38, // 0x26 + bc_dload_1 = 39, // 0x27 + bc_dload_2 = 40, // 0x28 + bc_dload_3 = 41, // 0x29 + bc_aload_0 = 42, // 0x2a + bc_aload_1 = 43, // 0x2b + bc_aload_2 = 44, // 0x2c + bc_aload_3 = 45, // 0x2d + bc_iaload = 46, // 0x2e + bc_laload = 47, // 0x2f + bc_faload = 48, // 0x30 + bc_daload = 49, // 0x31 + bc_aaload = 50, // 0x32 + bc_baload = 51, // 0x33 + bc_caload = 52, // 0x34 + bc_saload = 53, // 0x35 + bc_istore = 54, // 0x36 + bc_lstore = 55, // 0x37 + bc_fstore = 56, // 0x38 + bc_dstore = 57, // 0x39 + bc_astore = 58, // 0x3a + bc_istore_0 = 59, // 0x3b + bc_istore_1 = 60, // 0x3c + bc_istore_2 = 61, // 0x3d + bc_istore_3 = 62, // 0x3e + bc_lstore_0 = 63, // 0x3f + bc_lstore_1 = 64, // 0x40 + bc_lstore_2 = 65, // 0x41 + bc_lstore_3 = 66, // 0x42 + bc_fstore_0 = 67, // 0x43 + bc_fstore_1 = 68, // 0x44 + bc_fstore_2 = 69, // 0x45 + bc_fstore_3 = 70, // 0x46 + bc_dstore_0 = 71, // 0x47 + bc_dstore_1 = 72, // 0x48 + bc_dstore_2 = 73, // 0x49 + bc_dstore_3 = 74, // 0x4a + bc_astore_0 = 75, // 0x4b + bc_astore_1 = 76, // 0x4c + bc_astore_2 = 77, // 0x4d + bc_astore_3 = 78, // 0x4e + bc_iastore = 79, // 0x4f + bc_lastore = 80, // 0x50 + bc_fastore = 81, // 0x51 + bc_dastore = 82, // 0x52 + bc_aastore = 83, // 0x53 + bc_bastore = 84, // 0x54 + bc_castore = 85, // 0x55 + bc_sastore = 86, // 0x56 + bc_pop = 87, // 0x57 + bc_pop2 = 88, // 0x58 + bc_dup = 89, // 0x59 + bc_dup_x1 = 90, // 0x5a + bc_dup_x2 = 91, // 0x5b + bc_dup2 = 92, // 0x5c + bc_dup2_x1 = 93, // 0x5d + bc_dup2_x2 = 94, // 0x5e + bc_swap = 95, // 0x5f + bc_iadd = 96, // 0x60 + bc_ladd = 97, // 0x61 + bc_fadd = 98, // 0x62 + bc_dadd = 99, // 0x63 + bc_isub = 100, // 0x64 + bc_lsub = 101, // 0x65 + bc_fsub = 102, // 0x66 + bc_dsub = 103, // 0x67 + bc_imul = 104, // 0x68 + bc_lmul = 105, // 0x69 + bc_fmul = 106, // 0x6a + bc_dmul = 107, // 0x6b + bc_idiv = 108, // 0x6c + bc_ldiv = 109, // 0x6d + bc_fdiv = 110, // 0x6e + bc_ddiv = 111, // 0x6f + bc_irem = 112, // 0x70 + bc_lrem = 113, // 0x71 + bc_frem = 114, // 0x72 + bc_drem = 115, // 0x73 + bc_ineg = 116, // 0x74 + bc_lneg = 117, // 0x75 + bc_fneg = 118, // 0x76 + bc_dneg = 119, // 0x77 + bc_ishl = 120, // 0x78 + bc_lshl = 121, // 0x79 + bc_ishr = 122, // 0x7a + bc_lshr = 123, // 0x7b + bc_iushr = 124, // 0x7c + bc_lushr = 125, // 0x7d + bc_iand = 126, // 0x7e + bc_land = 127, // 0x7f + bc_ior = 128, // 0x80 + bc_lor = 129, // 0x81 + bc_ixor = 130, // 0x82 + bc_lxor = 131, // 0x83 + bc_iinc = 132, // 0x84 + bc_i2l = 133, // 0x85 + bc_i2f = 134, // 0x86 + bc_i2d = 135, // 0x87 + bc_l2i = 136, // 0x88 + bc_l2f = 137, // 0x89 + bc_l2d = 138, // 0x8a + bc_f2i = 139, // 0x8b + bc_f2l = 140, // 0x8c + bc_f2d = 141, // 0x8d + bc_d2i = 142, // 0x8e + bc_d2l = 143, // 0x8f + bc_d2f = 144, // 0x90 + bc_i2b = 145, // 0x91 + bc_i2c = 146, // 0x92 + bc_i2s = 147, // 0x93 + bc_lcmp = 148, // 0x94 + bc_fcmpl = 149, // 0x95 + bc_fcmpg = 150, // 0x96 + bc_dcmpl = 151, // 0x97 + bc_dcmpg = 152, // 0x98 + bc_ifeq = 153, // 0x99 + bc_ifne = 154, // 0x9a + bc_iflt = 155, // 0x9b + bc_ifge = 156, // 0x9c + bc_ifgt = 157, // 0x9d + bc_ifle = 158, // 0x9e + bc_if_icmpeq = 159, // 0x9f + bc_if_icmpne = 160, // 0xa0 + bc_if_icmplt = 161, // 0xa1 + bc_if_icmpge = 162, // 0xa2 + bc_if_icmpgt = 163, // 0xa3 + bc_if_icmple = 164, // 0xa4 + bc_if_acmpeq = 165, // 0xa5 + bc_if_acmpne = 166, // 0xa6 + bc_goto = 167, // 0xa7 + bc_jsr = 168, // 0xa8 + bc_ret = 169, // 0xa9 + bc_tableswitch = 170, // 0xaa + bc_lookupswitch = 171, // 0xab + bc_ireturn = 172, // 0xac + bc_lreturn = 173, // 0xad + bc_freturn = 174, // 0xae + bc_dreturn = 175, // 0xaf + bc_areturn = 176, // 0xb0 + bc_return = 177, // 0xb1 + bc_getstatic = 178, // 0xb2 + bc_putstatic = 179, // 0xb3 + bc_getfield = 180, // 0xb4 + bc_putfield = 181, // 0xb5 + bc_invokevirtual = 182, // 0xb6 + bc_invokespecial = 183, // 0xb7 + bc_invokestatic = 184, // 0xb8 + bc_invokeinterface = 185, // 0xb9 + bc_xxxunusedxxx = 186, // 0xba + bc_new = 187, // 0xbb + bc_newarray = 188, // 0xbc + bc_anewarray = 189, // 0xbd + bc_arraylength = 190, // 0xbe + bc_athrow = 191, // 0xbf + bc_checkcast = 192, // 0xc0 + bc_instanceof = 193, // 0xc1 + bc_monitorenter = 194, // 0xc2 + bc_monitorexit = 195, // 0xc3 + bc_wide = 196, // 0xc4 + bc_multianewarray = 197, // 0xc5 + bc_ifnull = 198, // 0xc6 + bc_ifnonnull = 199, // 0xc7 + bc_goto_w = 200, // 0xc8 + bc_jsr_w = 201, // 0xc9 + bc_bytecode_limit = 202 // 0xca +}; + +enum +{ + bc_end_marker = 255, + bc_byte_escape = 254, + bc_ref_escape = 253, + _first_linker_op = bc_getstatic, + _last_linker_op = bc_invokestatic, + _num_linker_ops = (_last_linker_op - _first_linker_op) + 1, + _self_linker_op = bc_bytecode_limit, + _self_linker_aload_flag = 1 * _num_linker_ops, + _self_linker_super_flag = 2 * _num_linker_ops, + _self_linker_limit = _self_linker_op + 4 * _num_linker_ops, + _invokeinit_op = _self_linker_limit, + _invokeinit_self_option = 0, + _invokeinit_super_option = 1, + _invokeinit_new_option = 2, + _invokeinit_limit = _invokeinit_op + 3, + _xldc_op = _invokeinit_limit, + bc_aldc = bc_ldc, + bc_cldc = _xldc_op + 0, + bc_ildc = _xldc_op + 1, + bc_fldc = _xldc_op + 2, + bc_aldc_w = bc_ldc_w, + bc_cldc_w = _xldc_op + 3, + bc_ildc_w = _xldc_op + 4, + bc_fldc_w = _xldc_op + 5, + bc_lldc2_w = bc_ldc2_w, + bc_dldc2_w = _xldc_op + 6, + _xldc_limit = _xldc_op + 7, + _xxx_3_end +}; diff --git a/depends/pack200/src/defines.h b/depends/pack200/src/defines.h new file mode 100644 index 00000000..63abae0a --- /dev/null +++ b/depends/pack200/src/defines.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// random definitions + +#ifdef _MSC_VER +#include +#include +#else +#include +#endif + +#ifndef FULL +#define FULL 1 /* Adds <500 bytes to the zipped final product. */ +#endif + +#if FULL // define this if you want debugging and/or compile-time attributes +#define IF_FULL(x) x +#else +#define IF_FULL(x) /*x*/ +#endif + +// Error messages that we have +#define ERROR_ENOMEM "Native allocation failed" +#define ERROR_FORMAT "Corrupted pack file" +#define ERROR_RESOURCE "Cannot extract resource file" +#define ERROR_OVERFLOW "Internal buffer overflow" +#define ERROR_INTERNAL "Internal error" + +#define LOGFILE_STDOUT "-" +#define LOGFILE_STDERR "" + +#define lengthof(array) (sizeof(array) / sizeof(array[0])) + +#define NEW(T, n) (T *) must_malloc((int)(scale_size(n, sizeof(T)))) +#define U_NEW(T, n) (T *) u->alloc(scale_size(n, sizeof(T))) +#define T_NEW(T, n) (T *) u->temp_alloc(scale_size(n, sizeof(T))) + +// bytes and byte arrays + +typedef unsigned int uint; + +#ifdef _MSC_VER +typedef LONGLONG jlong; +typedef DWORDLONG julong; +#define MKDIR(dir) mkdir(dir) +#define getpid() _getpid() +#define PATH_MAX MAX_PATH +#define dup2(a, b) _dup2(a, b) +#define strcasecmp(s1, s2) _stricmp(s1, s2) +#define tempname _tempname +#define sleep Sleep +#else +typedef signed char byte; +#ifdef _LP64 +typedef long jlong; +typedef long unsigned julong; +#else +typedef long long jlong; +typedef long long unsigned julong; +#endif +#define MKDIR(dir) mkdir(dir, 0777); +#endif + +/* Must cast to void *, then size_t, then int. */ +#define ptrlowbits(x) ((int)(size_t)(void *)(x)) + +/* Back and forth from jlong to pointer */ +#define ptr2jlong(x) ((jlong)(size_t)(void *)(x)) +#define jlong2ptr(x) ((void *)(size_t)(x)) + +// Keys used by Java: +#define UNPACK_DEFLATE_HINT "unpack.deflate.hint" + +#define COM_PREFIX "com.sun.java.util.jar.pack." +#define UNPACK_MODIFICATION_TIME COM_PREFIX "unpack.modification.time" +#define DEBUG_VERBOSE COM_PREFIX "verbose" + +#define ZIP_ARCHIVE_MARKER_COMMENT "PACK200" + +// The following are not known to the Java classes: +#define UNPACK_REMOVE_PACKFILE COM_PREFIX "unpack.remove.packfile" + +// Called from unpacker layers +#define _CHECK_DO(t, x) \ + { \ + if (t) \ + { \ + x; \ + } \ + } + +#define CHECK _CHECK_DO(aborting(), return) +#define CHECK_(y) _CHECK_DO(aborting(), return y) +#define CHECK_0 _CHECK_DO(aborting(), return 0) + +#define CHECK_NULL(p) _CHECK_DO((p) == nullptr, return) +#define CHECK_NULL_(y, p) _CHECK_DO((p) == nullptr, return y) +#define CHECK_NULL_0(p) _CHECK_DO((p) == nullptr, return 0) + +#define CHECK_COUNT(t) \ + if (t < 0) \ + { \ + abort("bad value count"); \ + } \ + CHECK + +#define STR_TRUE "true" +#define STR_FALSE "false" + +#define STR_TF(x) ((x) ? STR_TRUE : STR_FALSE) +#define BOOL_TF(x) (((x) != nullptr &&strcmp((x), STR_TRUE) == 0) ? true : false) + +#define DEFAULT_ARCHIVE_MODTIME 1060000000 // Aug 04, 2003 5:26 PM PDT diff --git a/depends/pack200/src/main.cpp b/depends/pack200/src/main.cpp new file mode 100644 index 00000000..ad46a2a2 --- /dev/null +++ b/depends/pack200/src/main.cpp @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "defines.h" +#include "bytes.h" +#include "utils.h" +#include "coding.h" +#include "bands.h" + +#include "constants.h" + +#include "zip.h" + +#include "unpack.h" + +int main(int argc, char **argv) +{ + return unpacker::run(argc, argv); +} + +unpacker *unpacker::non_mt_current = nullptr; +unpacker *unpacker::current() +{ + return non_mt_current; +} +static void set_current_unpacker(unpacker *u) +{ + unpacker::non_mt_current = u; +} + +// Callback for fetching data, Unix style. +static jlong read_input_via_stdio(unpacker *u, void *buf, jlong minlen, jlong maxlen) +{ + assert(minlen <= maxlen); // don't talk nonsense + jlong numread = 0; + char *bufptr = (char *)buf; + while (numread < minlen) + { + // read available input, up to buf.length or maxlen + int readlen = (1 << 16); + if (readlen > (maxlen - numread)) + readlen = (int)(maxlen - numread); + int nr = 0; + if (u->infileptr != nullptr) + { + nr = (int)fread(bufptr, 1, readlen, u->infileptr); + } + else + { +#ifndef WIN32 + // we prefer unbuffered inputs + nr = (int)read(u->infileno, bufptr, readlen); +#else + nr = (int)fread(bufptr, 1, readlen, stdin); +#endif + } + if (nr <= 0) + { + if (errno != EINTR) + break; + nr = 0; + } + numread += nr; + bufptr += nr; + assert(numread <= maxlen); + } + // fprintf(u->errstrm, "readInputFn(%d,%d) => %d\n", + // (int)minlen, (int)maxlen, (int)numread); + return numread; +} + +enum +{ + EOF_MAGIC = 0, + BAD_MAGIC = -1 +}; +static int read_magic(unpacker *u, char peek[], int peeklen) +{ + assert(peeklen == 4); // magic numbers are always 4 bytes + jlong nr = (u->read_input_fn)(u, peek, peeklen, peeklen); + if (nr != peeklen) + { + return (nr == 0) ? EOF_MAGIC : BAD_MAGIC; + } + int magic = 0; + for (int i = 0; i < peeklen; i++) + { + magic <<= 8; + magic += peek[i] & 0xFF; + } + return magic; +} + +static void setup_gzin(unpacker *u) +{ + gunzip *gzin = NEW(gunzip, 1); + gzin->init(u); +} + +static const char *nbasename(const char *progname) +{ + const char *slash = strrchr(progname, '/'); + if (slash != nullptr) + progname = ++slash; + return progname; +} + +static const char *usage_lines[] = { + "Usage: %s [-opt... | --option=value]... x.pack[.gz] y.jar\n", "\n", "Unpacking Options\n", + " -H{h}, --deflate-hint={h} override transmitted deflate hint: true, false, or keep " + "(default)\n", + " -r, --remove-pack-file remove input file after unpacking\n", + " -v, --verbose increase program verbosity\n", + " -q, --quiet set verbosity to lowest level\n", + " -l{F}, --log-file={F} output to the given log file, or '-' for standard output " + "(default)\n", + " -?, -h, --help print this message\n", + " -J{X} Java VM argument (ignored)\n", nullptr}; + +static void usage(unpacker *u, const char *progname, bool full = false) +{ + // WinMain does not set argv[0] to the progrname + progname = (progname != nullptr) ? nbasename(progname) : "unpack200"; + for (int i = 0; usage_lines[i] != nullptr; i++) + { + fprintf(stderr, usage_lines[i], progname); + if (!full) + { + fprintf(stderr, "(For more information, run %s --help .)\n", progname); + break; + } + } +} + +// argument parsing +static char **init_args(int argc, char **argv, int &envargc) +{ + const char *env = getenv("UNPACK200_FLAGS"); + ptrlist envargs; + envargs.init(); + if (env != nullptr) + { + char *buf = (char *)strdup(env); + const char *delim = "\n\t "; + for (char *p = strtok(buf, delim); p != nullptr; p = strtok(nullptr, delim)) + { + envargs.add(p); + } + } + // allocate extra margin at both head and tail + char **argp = NEW(char *, envargs.length() + argc + 1); + char **argp0 = argp; + int i; + for (i = 0; i < envargs.length(); i++) + { + *argp++ = (char *)envargs.get(i); + } + for (i = 1; i < argc; i++) + { + // note: skip argv[0] (program name) + *argp++ = (char *)strdup(argv[i]); // make a scratch copy + } + *argp = nullptr; // sentinel + envargc = envargs.length(); // report this count to next_arg + envargs.free(); + return argp0; +} + +static int strpcmp(const char *str, const char *pfx) +{ + return strncmp(str, pfx, strlen(pfx)); +} + +static const char flag_opts[] = "vqrVh?"; +static const char string_opts[] = "HlJ"; + +static int next_arg(char **&argp) +{ + char *arg = *argp; + if (arg == nullptr || arg[0] != '-') + { // end of option list + return 0; + } + // printf("opt: %s\n", arg); + char ach = arg[1]; + if (ach == '\0') + { + // ++argp; // do not pop this arg + return 0; // bare "-" is stdin/stdout + } + else if (arg[1] == '-') + { // --foo option + static const char *keys[] = {"Hdeflate-hint=", "vverbose", "qquiet", + "rremove-pack-file", "llog-file=", "Vversion", + "hhelp", nullptr}; + if (arg[2] == '\0') + { // end of option list + ++argp; // pop the "--" + return 0; + } + for (int i = 0; keys[i] != nullptr; i++) + { + const char *key = keys[i]; + char kch = *key++; + if (strchr(key, '=') == nullptr) + { + if (!strcmp(arg + 2, key)) + { + ++argp; // pop option arg + return kch; + } + } + else + { + if (!strpcmp(arg + 2, key)) + { + *argp += 2 + strlen(key); // remove "--"+key from arg + return kch; + } + } + } + } + else if (strchr(flag_opts, ach) != nullptr) + { // plain option + if (arg[2] == '\0') + { + ++argp; + } + else + { + // in-place edit of "-vxyz" to "-xyz" + arg += 1; // skip original '-' + arg[0] = '-'; + *argp = arg; + } + // printf(" key => %c\n", ach); + return ach; + } + else if (strchr(string_opts, ach) != nullptr) + { // argument-bearing option + if (arg[2] == '\0') + { + if (argp[1] == nullptr) + return -1; // no next arg + ++argp; // leave the argument in place + } + else + { + // in-place edit of "-Hxyz" to "xyz" + arg += 2; // skip original '-H' + *argp = arg; + } + // printf(" key => %c\n", ach); + return ach; + } + return -1; // bad argument +} + +static const char sccsver[] = "1.30, 07/05/05"; + +// Usage: unpackage input.pack output.jar +int unpacker::run(int argc, char **argv) +{ + unpacker u; + u.init(read_input_via_stdio); + set_current_unpacker(&u); + + jar jarout; + jarout.init(&u); + + int envargc = 0; + char **argbuf = init_args(argc, argv, envargc); + char **arg0 = argbuf + envargc; + char **argp = argbuf; + + int verbose = 0; + char *logfile = nullptr; + + for (;;) + { + const char *arg = (*argp == nullptr) ? "" : u.saveStr(*argp); + bool isenvarg = (argp < arg0); + int ach = next_arg(argp); + bool hasoptarg = (ach != 0 && strchr(string_opts, ach) != nullptr); + if (ach == 0 && argp >= arg0) + break; + if (isenvarg && argp == arg0 && hasoptarg) + ach = 0; // don't pull from cmdline + switch (ach) + { + case 'H': + u.set_option(UNPACK_DEFLATE_HINT, *argp++); + break; + case 'v': + ++verbose; + break; + case 'q': + verbose = 0; + break; + case 'r': + u.set_option(UNPACK_REMOVE_PACKFILE, "1"); + break; + case 'l': + logfile = *argp++; + break; + case 'J': + argp += 1; + break; // skip ignored -Jxxx parameter + + case 'h': + case '?': + usage(&u, argv[0], true); + exit(1); + + default: + const char *inenv = isenvarg ? " in ${UNPACK200_FLAGS}" : ""; + if (hasoptarg) + fprintf(stderr, "Missing option string%s: %s\n", inenv, arg); + else + fprintf(stderr, "Unrecognized argument%s: %s\n", inenv, arg); + usage(&u, argv[0]); + exit(2); + } + } + + if (verbose != 0) + { + u.set_option(DEBUG_VERBOSE, u.saveIntStr(verbose)); + } + + const char *source_file = *argp++; + const char *destination_file = *argp++; + + if (source_file == nullptr || destination_file == nullptr || *argp != nullptr) + { + usage(&u, argv[0]); + exit(2); + } + + if (verbose != 0) + { + fprintf(stderr, "Unpacking from %s to %s\n", source_file, destination_file); + } + bool &remove_source = u.remove_packfile; + + if (strcmp(source_file, "-") == 0) + { + remove_source = false; + u.infileno = fileno(stdin); + } + else + { + u.infileptr = fopen(source_file, "rb"); + if (u.infileptr == nullptr) + { + fprintf(stderr, "Error: Could not open input file: %s\n", source_file); + exit(3); // Called only from the native standalone unpacker + } + } + + if (strcmp(destination_file, "-") == 0) + { + jarout.jarfp = stdout; + } + else + { + jarout.openJarFile(destination_file); + assert(jarout.jarfp != nullptr); + } + + if (verbose != 0) + u.dump_options(); + + char peek[4]; + int magic; + + // check for GZIP input + magic = read_magic(&u, peek, (int)sizeof(peek)); + if ((magic & GZIP_MAGIC_MASK) == GZIP_MAGIC) + { + // Oops; must slap an input filter on this data. + setup_gzin(&u); + u.gzin->start(magic); + if (!u.aborting()) + { + u.start(); + } + } + else + { + u.start(peek, sizeof(peek)); + } + + // Note: The checks to u.aborting() are necessary to gracefully + // terminate processing when the first segment throws an error. + + for (;;) + { + if (u.aborting()) + break; + + // Each trip through this loop unpacks one segment + // and then resets the unpacker. + for (unpacker::file *filep; (filep = u.get_next_file()) != nullptr;) + { + if (u.aborting()) + break; + u.write_file_to_jar(filep); + } + if (u.aborting()) + break; + + // Peek ahead for more data. + magic = read_magic(&u, peek, (int)sizeof(peek)); + if (magic != (int)JAVA_PACKAGE_MAGIC) + { + if (magic != EOF_MAGIC) + u.abort("garbage after end of pack archive"); + break; // all done + } + + // Release all storage from parsing the old segment. + u.reset(); + + // Restart, beginning with the peek-ahead. + u.start(peek, sizeof(peek)); + } + + int status = 0; + if (u.aborting()) + { + fprintf(stderr, "Error: %s\n", u.get_abort_message()); + status = 1; + } + + if (u.infileptr != nullptr) + { + fclose(u.infileptr); + u.infileptr = nullptr; + } + + if (!u.aborting() && remove_source) + remove(source_file); + + if (verbose != 0) + { + fprintf(stderr, "unpacker completed with status=%d\n", status); + } + + u.finish(); + + u.free(); // tidy up malloc blocks + set_current_unpacker(nullptr); // clean up global pointer + + return status; +} diff --git a/depends/pack200/src/unpack.cpp b/depends/pack200/src/unpack.cpp new file mode 100644 index 00000000..722d67b5 --- /dev/null +++ b/depends/pack200/src/unpack.cpp @@ -0,0 +1,5105 @@ +/* + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// -*- C++ -*- +// Program for unpacking specially compressed Java packages. +// John R. Rose + +/* + * When compiling for a 64bit LP64 system (longs and pointers being 64bits), + * the printf format %ld is correct and use of %lld will cause warning + * errors from some compilers (gcc/g++). + * _LP64 can be explicitly set (used on Linux). + * Solaris compilers will define __sparcv9 or __x86_64 on 64bit compilations. + */ +#if defined(_LP64) || defined(__sparcv9) || defined(__x86_64) +#define LONG_LONG_FORMAT "%ld" +#define LONG_LONG_HEX_FORMAT "%lx" +#else +#define LONG_LONG_FORMAT "%lld" +#define LONG_LONG_HEX_FORMAT "%016llx" +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "defines.h" +#include "bytes.h" +#include "utils.h" +#include "coding.h" +#include "bands.h" + +#include "constants.h" + +#include "zip.h" + +#include "unpack.h" + +// tags, in canonical order: +static const byte TAGS_IN_ORDER[] = { + CONSTANT_Utf8, CONSTANT_Integer, CONSTANT_Float, CONSTANT_Long, + CONSTANT_Double, CONSTANT_String, CONSTANT_Class, CONSTANT_Signature, + CONSTANT_NameandType, CONSTANT_Fieldref, CONSTANT_Methodref, CONSTANT_InterfaceMethodref}; +#define N_TAGS_IN_ORDER (sizeof TAGS_IN_ORDER) + +// REQUESTED must be -2 for u2 and REQUESTED_LDC must be -1 for u1 +enum +{ + NOT_REQUESTED = 0, + REQUESTED = -2, + REQUESTED_LDC = -1 +}; + +#define NO_INORD ((uint) - 1) + +struct entry +{ + byte tag; + +#if 0 + byte bits; + enum { + //EB_EXTRA = 1, + EB_SUPER = 2 + }; +#endif + unsigned short nrefs; // pack w/ tag + + int outputIndex; + uint inord; // &cp.entries[cp.tag_base[this->tag]+this->inord] == this + + entry **refs; + + // put last to pack best + union + { + bytes b; + int i; + jlong l; + } value; + + void requestOutputIndex(cpool &cp, int req = REQUESTED); + int getOutputIndex() + { + assert(outputIndex > NOT_REQUESTED); + return outputIndex; + } + + entry *ref(int refnum) + { + assert((uint)refnum < nrefs); + return refs[refnum]; + } + + const char *utf8String() + { + assert(tagMatches(CONSTANT_Utf8)); + assert(value.b.len == strlen((const char *)value.b.ptr)); + return (const char *)value.b.ptr; + } + + entry *className() + { + assert(tagMatches(CONSTANT_Class)); + return ref(0); + } + + entry *memberClass() + { + assert(tagMatches(CONSTANT_Member)); + return ref(0); + } + + entry *memberDescr() + { + assert(tagMatches(CONSTANT_Member)); + return ref(1); + } + + entry *descrName() + { + assert(tagMatches(CONSTANT_NameandType)); + return ref(0); + } + + entry *descrType() + { + assert(tagMatches(CONSTANT_NameandType)); + return ref(1); + } + + int typeSize(); + + bytes &asUtf8(); + int asInteger() + { + assert(tag == CONSTANT_Integer); + return value.i; + } + + bool isUtf8(bytes &b) + { + return tagMatches(CONSTANT_Utf8) && value.b.equals(b); + } + + bool isDoubleWord() + { + return tag == CONSTANT_Double || tag == CONSTANT_Long; + } + + bool tagMatches(byte tag2) + { + return (tag2 == tag) || (tag2 == CONSTANT_Utf8 && tag == CONSTANT_Signature); + } +}; + +entry *cpindex::get(uint i) +{ + if (i >= len) + return nullptr; + else if (base1 != nullptr) + // primary index + return &base1[i]; + else + // secondary index + return base2[i]; +} + +inline bytes &entry::asUtf8() +{ + assert(tagMatches(CONSTANT_Utf8)); + return value.b; +} + +int entry::typeSize() +{ + assert(tagMatches(CONSTANT_Utf8)); + const char *sigp = (char *)value.b.ptr; + switch (*sigp) + { + case '(': + sigp++; + break; // skip opening '(' + case 'D': + case 'J': + return 2; // double field + default: + return 1; // field + } + int siglen = 0; + for (;;) + { + int ch = *sigp++; + switch (ch) + { + case 'D': + case 'J': + siglen += 1; + break; + case '[': + // Skip rest of array info. + while (ch == '[') + { + ch = *sigp++; + } + if (ch != 'L') + break; + // else fall through + case 'L': + sigp = strchr(sigp, ';'); + if (sigp == nullptr) + { + unpack_abort("bad data"); + return 0; + } + sigp += 1; + break; + case ')': // closing ')' + return siglen; + } + siglen += 1; + } +} + +inline cpindex *cpool::getFieldIndex(entry *classRef) +{ + assert(classRef->tagMatches(CONSTANT_Class)); + assert((uint)classRef->inord < (uint)tag_count[CONSTANT_Class]); + return &member_indexes[classRef->inord * 2 + 0]; +} +inline cpindex *cpool::getMethodIndex(entry *classRef) +{ + assert(classRef->tagMatches(CONSTANT_Class)); + assert((uint)classRef->inord < (uint)tag_count[CONSTANT_Class]); + return &member_indexes[classRef->inord * 2 + 1]; +} + +struct inner_class +{ + entry *inner; + entry *outer; + entry *name; + int flags; + inner_class *next_sibling; + bool requested; +}; + +// Here is where everything gets deallocated: +void unpacker::free() +{ + int i; + assert(infileptr == nullptr); // caller resp. + if (jarout != nullptr) + jarout->reset(); + if (gzin != nullptr) + { + gzin->free(); + gzin = nullptr; + } + if (free_input) + input.free(); + // free everybody ever allocated with U_NEW or (recently) with T_NEW + assert(smallbuf.base() == nullptr || mallocs.contains(smallbuf.base())); + assert(tsmallbuf.base() == nullptr || tmallocs.contains(tsmallbuf.base())); + mallocs.freeAll(); + tmallocs.freeAll(); + smallbuf.init(); + tsmallbuf.init(); + bcimap.free(); + class_fixup_type.free(); + class_fixup_offset.free(); + class_fixup_ref.free(); + code_fixup_type.free(); + code_fixup_offset.free(); + code_fixup_source.free(); + requested_ics.free(); + cur_classfile_head.free(); + cur_classfile_tail.free(); + for (i = 0; i < ATTR_CONTEXT_LIMIT; i++) + attr_defs[i].free(); + + // free CP state + cp.outputEntries.free(); + for (i = 0; i < CONSTANT_Limit; i++) + cp.tag_extras[i].free(); +} + +// input handling +// Attempts to advance rplimit so that (rplimit-rp) is at least 'more'. +// Will eagerly read ahead by larger chunks, if possible. +// Returns false if (rplimit-rp) is not at least 'more', +// unless rplimit hits input.limit(). +bool unpacker::ensure_input(jlong more) +{ + julong want = more - input_remaining(); + if ((jlong)want <= 0) + return true; // it's already in the buffer + if (rplimit == input.limit()) + return true; // not expecting any more + + if (read_input_fn == nullptr) + { + // assume it is already all there + bytes_read += input.limit() - rplimit; + rplimit = input.limit(); + return true; + } + CHECK_0; + + julong remaining = (input.limit() - rplimit); // how much left to read? + byte *rpgoal = (want >= remaining) ? input.limit() : rplimit + (size_t)want; + enum + { + CHUNK_SIZE = (1 << 14) + }; + julong fetch = want; + if (fetch < CHUNK_SIZE) + fetch = CHUNK_SIZE; + if (fetch > remaining * 3 / 4) + fetch = remaining; + // Try to fetch at least "more" bytes. + while ((jlong)fetch > 0) + { + jlong nr = (*read_input_fn)(this, rplimit, fetch, remaining); + if (nr <= 0) + { + return (rplimit >= rpgoal); + } + remaining -= nr; + rplimit += nr; + fetch -= nr; + bytes_read += nr; + assert(remaining == (julong)(input.limit() - rplimit)); + } + return true; +} + +// output handling + +fillbytes *unpacker::close_output(fillbytes *which) +{ + assert(wp != nullptr); + if (which == nullptr) + { + if (wpbase == cur_classfile_head.base()) + { + which = &cur_classfile_head; + } + else + { + which = &cur_classfile_tail; + } + } + assert(wpbase == which->base()); + assert(wplimit == which->end()); + which->setLimit(wp); + wp = nullptr; + wplimit = nullptr; + // wpbase = nullptr; + return which; +} + +// maybe_inline +void unpacker::ensure_put_space(size_t size) +{ + if (wp + size <= wplimit) + return; + // Determine which segment needs expanding. + fillbytes *which = close_output(); + byte *wp0 = which->grow(size); + wpbase = which->base(); + wplimit = which->end(); + wp = wp0; +} + +byte *unpacker::put_space(size_t size) +{ + byte *wp0 = wp; + byte *wp1 = wp0 + size; + if (wp1 > wplimit) + { + ensure_put_space(size); + wp0 = wp; + wp1 = wp0 + size; + } + wp = wp1; + return wp0; +} + +void unpacker::putu2_at(byte *wp, int n) +{ + if (n != (unsigned short)n) + { + unpack_abort(ERROR_OVERFLOW); + return; + } + wp[0] = (n) >> 8; + wp[1] = (n) >> 0; +} + +void unpacker::putu4_at(byte *wp, int n) +{ + wp[0] = (n) >> 24; + wp[1] = (n) >> 16; + wp[2] = (n) >> 8; + wp[3] = (n) >> 0; +} + +void unpacker::putu8_at(byte *wp, jlong n) +{ + putu4_at(wp + 0, (int)((julong)n >> 32)); + putu4_at(wp + 4, (int)((julong)n >> 0)); +} + +void unpacker::putu2(int n) +{ + putu2_at(put_space(2), n); +} + +void unpacker::putu4(int n) +{ + putu4_at(put_space(4), n); +} + +void unpacker::putu8(jlong n) +{ + putu8_at(put_space(8), n); +} + +int unpacker::putref_index(entry *e, int size) +{ + if (e == nullptr) + return 0; + else if (e->outputIndex > NOT_REQUESTED) + return e->outputIndex; + else if (e->tag == CONSTANT_Signature) + return putref_index(e->ref(0), size); + else + { + e->requestOutputIndex(cp, -size); + // Later on we'll fix the bits. + class_fixup_type.addByte(size); + class_fixup_offset.add((int)wpoffset()); + class_fixup_ref.add(e); + return 0; + } +} + +void unpacker::putref(entry *e) +{ + int oidx = putref_index(e, 2); + putu2_at(put_space(2), oidx); +} + +void unpacker::putu1ref(entry *e) +{ + int oidx = putref_index(e, 1); + putu1_at(put_space(1), oidx); +} + +static int total_cp_size[] = {0, 0}; +static int largest_cp_ref[] = {0, 0}; +static int hash_probes[] = {0, 0}; + +// Allocation of small and large blocks. + +enum +{ + CHUNK = (1 << 14), + SMALL = (1 << 9) +}; + +// Call malloc. Try to combine small blocks and free much later. +void *unpacker::alloc_heap(size_t size, bool smallOK, bool temp) +{ + if (!smallOK || size > SMALL) + { + void *res = must_malloc((int)size); + (temp ? &tmallocs : &mallocs)->add(res); + return res; + } + fillbytes &xsmallbuf = *(temp ? &tsmallbuf : &smallbuf); + if (!xsmallbuf.canAppend(size + 1)) + { + xsmallbuf.init(CHUNK); + (temp ? &tmallocs : &mallocs)->add(xsmallbuf.base()); + } + int growBy = (int)size; + growBy += -growBy & 7; // round up mod 8 + return xsmallbuf.grow(growBy); +} + +void unpacker::saveTo(bytes &b, byte *ptr, size_t len) +{ + b.ptr = U_NEW(byte, add_size(len, 1)); + if (aborting()) + { + b.len = 0; + return; + } + b.len = len; + b.copyFrom(ptr, len); +} + +// Read up through band_headers. +// Do the archive_size dance to set the size of the input mega-buffer. +void unpacker::read_file_header() +{ + // Read file header to determine file type and total size. + enum + { + MAGIC_BYTES = 4, + AH_LENGTH_0 = 3, // minver, majver, options are outside of archive_size + AH_LENGTH_0_MAX = AH_LENGTH_0 + 1, // options might have 2 bytes + AH_LENGTH = 26, // maximum archive header length (w/ all fields) + // Length contributions from optional header fields: + AH_FILE_HEADER_LEN = 5, // sizehi/lo/next/modtime/files + AH_ARCHIVE_SIZE_LEN = 2, // sizehi/lo only; part of AH_FILE_HEADER_LEN + AH_CP_NUMBER_LEN = 4, // int/float/long/double + AH_SPECIAL_FORMAT_LEN = 2, // layouts/band-headers + AH_LENGTH_MIN = + AH_LENGTH - (AH_FILE_HEADER_LEN + AH_SPECIAL_FORMAT_LEN + AH_CP_NUMBER_LEN), + ARCHIVE_SIZE_MIN = AH_LENGTH_MIN - (AH_LENGTH_0 + AH_ARCHIVE_SIZE_LEN), + FIRST_READ = MAGIC_BYTES + AH_LENGTH_MIN + }; + + assert(AH_LENGTH_MIN == 15); // # of UNSIGNED5 fields required after archive_magic + assert(ARCHIVE_SIZE_MIN == 10); // # of UNSIGNED5 fields required after archive_size + // An absolute minimum nullptr archive is magic[4], {minver,majver,options}[3], + // archive_size[0], cp_counts[8], class_counts[4], for a total of 19 bytes. + // (Note that archive_size is optional; it may be 0..10 bytes in length.) + // The first read must capture everything up through the options field. + // This happens to work even if {minver,majver,options} is a pathological + // 15 bytes long. Legal pack files limit those three fields to 1+1+2 bytes. + assert(FIRST_READ >= MAGIC_BYTES + AH_LENGTH_0 * B_MAX); + + // Up through archive_size, the largest possible archive header is + // magic[4], {minver,majver,options}[4], archive_size[10]. + // (Note only the low 12 bits of options are allowed to be non-zero.) + // In order to parse archive_size, we need at least this many bytes + // in the first read. Of course, if archive_size_hi is more than + // a byte, we probably will fail to allocate the buffer, since it + // will be many gigabytes long. This is a practical, not an + // architectural limit to Pack200 archive sizes. + assert(FIRST_READ >= MAGIC_BYTES + AH_LENGTH_0_MAX + 2 * B_MAX); + + bool foreign_buf = (read_input_fn == nullptr); + byte initbuf[(int)FIRST_READ + (int)C_SLOP + 200]; // 200 is for JAR I/O + if (foreign_buf) + { + // inbytes is all there is + input.set(inbytes); + rp = input.base(); + rplimit = input.limit(); + } + else + { + // inbytes, if not empty, contains some read-ahead we must use first + // ensure_input will take care of copying it into initbuf, + // then querying read_input_fn for any additional data needed. + // However, the caller must assume that we use up all of inbytes. + // There is no way to tell the caller that we used only part of them. + // Therefore, the caller must use only a bare minimum of read-ahead. + if (inbytes.len > FIRST_READ) + { + abort("too much read-ahead"); + return; + } + input.set(initbuf, sizeof(initbuf)); + input.b.clear(); + input.b.copyFrom(inbytes); + rplimit = rp = input.base(); + rplimit += inbytes.len; + bytes_read += inbytes.len; + } + // Read only 19 bytes, which is certain to contain #archive_options fields, + // but is certain not to overflow past the archive_header. + input.b.len = FIRST_READ; + if (!ensure_input(FIRST_READ)) + abort("EOF reading archive magic number"); + + if (rp[0] == 'P' && rp[1] == 'K') + { + // In the Unix-style program, we simply simulate a copy command. + // Copy until EOF; assume the JAR file is the last segment. + fprintf(stderr, "Copy-mode.\n"); + for (;;) + { + jarout->write_data(rp, (int)input_remaining()); + if (foreign_buf) + break; // one-time use of a passed in buffer + if (input.size() < CHUNK) + { + // Get some breathing room. + input.set(U_NEW(byte, (size_t)CHUNK + C_SLOP), (size_t)CHUNK); + CHECK; + } + rp = rplimit = input.base(); + if (!ensure_input(1)) + break; + } + jarout->closeJarFile(false); + return; + } + + // Read the magic number. + magic = 0; + for (int i1 = 0; i1 < (int)sizeof(magic); i1++) + { + magic <<= 8; + magic += (*rp++ & 0xFF); + } + + // Read the first 3 values from the header. + value_stream hdr; + int hdrVals = 0; + int hdrValsSkipped = 0; // debug only + hdr.init(rp, rplimit, UNSIGNED5_spec); + minver = hdr.getInt(); + majver = hdr.getInt(); + hdrVals += 2; + + if (magic != (int)JAVA_PACKAGE_MAGIC || + (majver != JAVA5_PACKAGE_MAJOR_VERSION && majver != JAVA6_PACKAGE_MAJOR_VERSION) || + (minver != JAVA5_PACKAGE_MINOR_VERSION && minver != JAVA6_PACKAGE_MINOR_VERSION)) + { + char message[200]; + sprintf(message, "@" ERROR_FORMAT ": magic/ver = " + "%08X/%d.%d should be %08X/%d.%d OR %08X/%d.%d\n", + magic, majver, minver, JAVA_PACKAGE_MAGIC, JAVA5_PACKAGE_MAJOR_VERSION, + JAVA5_PACKAGE_MINOR_VERSION, JAVA_PACKAGE_MAGIC, JAVA6_PACKAGE_MAJOR_VERSION, + JAVA6_PACKAGE_MINOR_VERSION); + abort(message); + } + CHECK; + + archive_options = hdr.getInt(); + hdrVals += 1; + assert(hdrVals == AH_LENGTH_0); // first three fields only + +#define ORBIT(bit) | (bit) + int OPTION_LIMIT = (0 ARCHIVE_BIT_DO(ORBIT)); +#undef ORBIT + if ((archive_options & ~OPTION_LIMIT) != 0) + { + fprintf(stderr, "Warning: Illegal archive options 0x%x\n", archive_options); + abort("illegal archive options"); + return; + } + + if ((archive_options & AO_HAVE_FILE_HEADERS) != 0) + { + uint hi = hdr.getInt(); + uint lo = hdr.getInt(); + julong x = band::makeLong(hi, lo); + archive_size = (size_t)x; + if (archive_size != x) + { + // Silly size specified; force overflow. + archive_size = PSIZE_MAX + 1; + } + hdrVals += 2; + } + else + { + hdrValsSkipped += 2; + } + + // Now we can size the whole archive. + // Read everything else into a mega-buffer. + rp = hdr.rp; + int header_size_0 = (int)(rp - input.base()); // used-up header (4byte + 3int) + int header_size_1 = (int)(rplimit - rp); // buffered unused initial fragment + int header_size = header_size_0 + header_size_1; + unsized_bytes_read = header_size_0; + CHECK; + if (foreign_buf) + { + if (archive_size > (size_t)header_size_1) + { + abort("EOF reading fixed input buffer"); + return; + } + } + else if (archive_size != 0) + { + if (archive_size < ARCHIVE_SIZE_MIN) + { + abort("impossible archive size"); // bad input data + return; + } + if (archive_size < header_size_1) + { + abort("too much read-ahead"); // somehow we pre-fetched too much? + return; + } + input.set(U_NEW(byte, add_size(header_size_0, archive_size, C_SLOP)), + (size_t)header_size_0 + archive_size); + CHECK; + assert(input.limit()[0] == 0); + // Move all the bytes we read initially into the real buffer. + input.b.copyFrom(initbuf, header_size); + rp = input.b.ptr + header_size_0; + rplimit = input.b.ptr + header_size; + } + else + { + // It's more complicated and painful. + // A zero archive_size means that we must read until EOF. + input.init(CHUNK * 2); + CHECK; + input.b.len = input.allocated; + rp = rplimit = input.base(); + // Set up input buffer as if we already read the header: + input.b.copyFrom(initbuf, header_size); + CHECK; + rplimit += header_size; + while (ensure_input(input.limit() - rp)) + { + size_t dataSoFar = input_remaining(); + size_t nextSize = add_size(dataSoFar, CHUNK); + input.ensureSize(nextSize); + CHECK; + input.b.len = input.allocated; + rp = rplimit = input.base(); + rplimit += dataSoFar; + } + size_t dataSize = (rplimit - input.base()); + input.b.len = dataSize; + input.grow(C_SLOP); + CHECK; + free_input = true; // free it later + input.b.len = dataSize; + assert(input.limit()[0] == 0); + rp = rplimit = input.base(); + rplimit += dataSize; + rp += header_size_0; // already scanned these bytes... + } + live_input = true; // mark as "do not reuse" + if (aborting()) + { + abort("cannot allocate large input buffer for package file"); + return; + } + + // read the rest of the header fields + ensure_input((AH_LENGTH - AH_LENGTH_0) * B_MAX); + CHECK; + hdr.rp = rp; + hdr.rplimit = rplimit; + + if ((archive_options & AO_HAVE_FILE_HEADERS) != 0) + { + archive_next_count = hdr.getInt(); + CHECK_COUNT(archive_next_count); + archive_modtime = hdr.getInt(); + file_count = hdr.getInt(); + CHECK_COUNT(file_count); + hdrVals += 3; + } + else + { + hdrValsSkipped += 3; + } + + if ((archive_options & AO_HAVE_SPECIAL_FORMATS) != 0) + { + band_headers_size = hdr.getInt(); + CHECK_COUNT(band_headers_size); + attr_definition_count = hdr.getInt(); + CHECK_COUNT(attr_definition_count); + hdrVals += 2; + } + else + { + hdrValsSkipped += 2; + } + + int cp_counts[N_TAGS_IN_ORDER]; + for (int k = 0; k < (int)N_TAGS_IN_ORDER; k++) + { + if (!(archive_options & AO_HAVE_CP_NUMBERS)) + { + switch (TAGS_IN_ORDER[k]) + { + case CONSTANT_Integer: + case CONSTANT_Float: + case CONSTANT_Long: + case CONSTANT_Double: + cp_counts[k] = 0; + hdrValsSkipped += 1; + continue; + } + } + cp_counts[k] = hdr.getInt(); + CHECK_COUNT(cp_counts[k]); + hdrVals += 1; + } + + ic_count = hdr.getInt(); + CHECK_COUNT(ic_count); + default_class_minver = hdr.getInt(); + default_class_majver = hdr.getInt(); + class_count = hdr.getInt(); + CHECK_COUNT(class_count); + hdrVals += 4; + + // done with archive_header + hdrVals += hdrValsSkipped; + assert(hdrVals == AH_LENGTH); + + rp = hdr.rp; + if (rp > rplimit) + abort("EOF reading archive header"); + + // Now size the CP. + cp.init(this, cp_counts); + CHECK; + + default_file_modtime = archive_modtime; + if (default_file_modtime == 0 && !(archive_options & AO_HAVE_FILE_MODTIME)) + default_file_modtime = DEFAULT_ARCHIVE_MODTIME; // taken from driver + if ((archive_options & AO_DEFLATE_HINT) != 0) + default_file_options |= FO_DEFLATE_HINT; + + // meta-bytes, if any, immediately follow archive header + // band_headers.readData(band_headers_size); + ensure_input(band_headers_size); + if (input_remaining() < (size_t)band_headers_size) + { + abort("EOF reading band headers"); + return; + } + bytes band_headers; + // The "1+" allows an initial byte to be pushed on the front. + band_headers.set(1 + U_NEW(byte, 1 + band_headers_size + C_SLOP), band_headers_size); + CHECK; + // Start scanning band headers here: + band_headers.copyFrom(rp, band_headers.len); + rp += band_headers.len; + assert(rp <= rplimit); + meta_rp = band_headers.ptr; + // Put evil meta-codes at the end of the band headers, + // so we are sure to throw an error if we run off the end. + bytes::of(band_headers.limit(), C_SLOP).clear(_meta_error); +} + +void unpacker::finish() +{ + if (verbose >= 1) + { + fprintf(stderr, "A total of " LONG_LONG_FORMAT " bytes were read in %d segment(s).\n", + (bytes_read_before_reset + bytes_read), segments_read_before_reset + 1); + fprintf(stderr, "A total of " LONG_LONG_FORMAT " file content bytes were written.\n", + (bytes_written_before_reset + bytes_written)); + fprintf(stderr, + "A total of %d files (of which %d are classes) were written to output.\n", + files_written_before_reset + files_written, + classes_written_before_reset + classes_written); + } + if (jarout != nullptr) + jarout->closeJarFile(true); +} + +// Cf. PackageReader.readConstantPoolCounts +void cpool::init(unpacker *u_, int counts[NUM_COUNTS]) +{ + this->u = u_; + + // Fill-pointer for CP. + int next_entry = 0; + + // Size the constant pool: + for (int k = 0; k < (int)N_TAGS_IN_ORDER; k++) + { + byte tag = TAGS_IN_ORDER[k]; + int len = counts[k]; + tag_count[tag] = len; + tag_base[tag] = next_entry; + next_entry += len; + // Detect and defend against constant pool size overflow. + // (Pack200 forbids the sum of CP counts to exceed 2^29-1.) + enum + { + CP_SIZE_LIMIT = (1 << 29), + IMPLICIT_ENTRY_COUNT = 1 // empty Utf8 string + }; + if (len >= (1 << 29) || len < 0 || next_entry >= CP_SIZE_LIMIT + IMPLICIT_ENTRY_COUNT) + { + abort("archive too large: constant pool limit exceeded"); + return; + } + } + + // Close off the end of the CP: + nentries = next_entry; + + // place a limit on future CP growth: + int generous = 0; + generous = add_size(generous, u->ic_count); // implicit name + generous = add_size(generous, u->ic_count); // outer + generous = add_size(generous, u->ic_count); // outer.utf8 + generous = add_size(generous, 40); // WKUs, misc + generous = add_size(generous, u->class_count); // implicit SourceFile strings + maxentries = add_size(nentries, generous); + + // Note that this CP does not include "empty" entries + // for longs and doubles. Those are introduced when + // the entries are renumbered for classfile output. + + entries = U_NEW(entry, maxentries); + CHECK; + + first_extra_entry = &entries[nentries]; + + // Initialize the standard indexes. + tag_count[CONSTANT_All] = nentries; + tag_base[CONSTANT_All] = 0; + for (int tag = 0; tag < CONSTANT_Limit; tag++) + { + entry *cpMap = &entries[tag_base[tag]]; + tag_index[tag].init(tag_count[tag], cpMap, tag); + } + + // Initialize hashTab to a generous power-of-two size. + uint pow2 = 1; + uint target = maxentries + maxentries / 2; // 60% full + while (pow2 < target) + pow2 <<= 1; + hashTab = U_NEW(entry *, hashTabLength = pow2); +} + +static byte *store_Utf8_char(byte *cp, unsigned short ch) +{ + if (ch >= 0x001 && ch <= 0x007F) + { + *cp++ = (byte)ch; + } + else if (ch <= 0x07FF) + { + *cp++ = (byte)(0xC0 | ((ch >> 6) & 0x1F)); + *cp++ = (byte)(0x80 | ((ch >> 0) & 0x3F)); + } + else + { + *cp++ = (byte)(0xE0 | ((ch >> 12) & 0x0F)); + *cp++ = (byte)(0x80 | ((ch >> 6) & 0x3F)); + *cp++ = (byte)(0x80 | ((ch >> 0) & 0x3F)); + } + return cp; +} + +static byte *skip_Utf8_chars(byte *cp, int len) +{ + for (;; cp++) + { + int ch = *cp & 0xFF; + if ((ch & 0xC0) != 0x80) + { + if (len-- == 0) + return cp; + if (ch < 0x80 && len == 0) + return cp + 1; + } + } +} + +static int compare_Utf8_chars(bytes &b1, bytes &b2) +{ + int l1 = (int)b1.len; + int l2 = (int)b2.len; + int l0 = (l1 < l2) ? l1 : l2; + byte *p1 = b1.ptr; + byte *p2 = b2.ptr; + int c0 = 0; + for (int i = 0; i < l0; i++) + { + int c1 = p1[i] & 0xFF; + int c2 = p2[i] & 0xFF; + if (c1 != c2) + { + // Before returning the obvious answer, + // check to see if c1 or c2 is part of a 0x0000, + // which encodes as {0xC0,0x80}. The 0x0000 is the + // lowest-sorting Java char value, and yet it encodes + // as if it were the first char after 0x7F, which causes + // strings containing nulls to sort too high. All other + // comparisons are consistent between Utf8 and Java chars. + if (c1 == 0xC0 && (p1[i + 1] & 0xFF) == 0x80) + c1 = 0; + if (c2 == 0xC0 && (p2[i + 1] & 0xFF) == 0x80) + c2 = 0; + if (c0 == 0xC0) + { + assert(((c1 | c2) & 0xC0) == 0x80); // c1 & c2 are extension chars + if (c1 == 0x80) + c1 = 0; // will sort below c2 + if (c2 == 0x80) + c2 = 0; // will sort below c1 + } + return c1 - c2; + } + c0 = c1; // save away previous char + } + // common prefix is identical; return length difference if any + return l1 - l2; +} + +// Cf. PackageReader.readUtf8Bands +void unpacker::read_Utf8_values(entry *cpMap, int len) +{ + // Implicit first Utf8 string is the empty string. + enum + { + // certain bands begin with implicit zeroes + PREFIX_SKIP_2 = 2, + SUFFIX_SKIP_1 = 1 + }; + + int i; + + // First band: Read lengths of shared prefixes. + if (len > PREFIX_SKIP_2) + cp_Utf8_prefix.readData(len - PREFIX_SKIP_2); + + // Second band: Read lengths of unshared suffixes: + if (len > SUFFIX_SKIP_1) + cp_Utf8_suffix.readData(len - SUFFIX_SKIP_1); + + bytes *allsuffixes = T_NEW(bytes, len); + CHECK; + + int nbigsuf = 0; + fillbytes charbuf; // buffer to allocate small strings + charbuf.init(); + + // Third band: Read the char values in the unshared suffixes: + cp_Utf8_chars.readData(cp_Utf8_suffix.getIntTotal()); + for (i = 0; i < len; i++) + { + int suffix = (i < SUFFIX_SKIP_1) ? 0 : cp_Utf8_suffix.getInt(); + if (suffix < 0) + { + abort("bad utf8 suffix"); + return; + } + if (suffix == 0 && i >= SUFFIX_SKIP_1) + { + // chars are packed in cp_Utf8_big_chars + nbigsuf += 1; + continue; + } + bytes &chars = allsuffixes[i]; + uint size3 = suffix * 3; // max Utf8 length + bool isMalloc = (suffix > SMALL); + if (isMalloc) + { + chars.malloc(size3); + } + else + { + if (!charbuf.canAppend(size3 + 1)) + { + assert(charbuf.allocated == 0 || tmallocs.contains(charbuf.base())); + charbuf.init(CHUNK); // Reset to new buffer. + tmallocs.add(charbuf.base()); + } + chars.set(charbuf.grow(size3 + 1), size3); + } + CHECK; + byte *chp = chars.ptr; + for (int j = 0; j < suffix; j++) + { + unsigned short ch = cp_Utf8_chars.getInt(); + chp = store_Utf8_char(chp, ch); + } + // shrink to fit: + if (isMalloc) + { + chars.realloc(chp - chars.ptr); + CHECK; + tmallocs.add(chars.ptr); // free it later + } + else + { + int shrink = (int)(chars.limit() - chp); + chars.len -= shrink; + charbuf.b.len -= shrink; // ungrow to reclaim buffer space + // Note that we did not reclaim the final '\0'. + assert(chars.limit() == charbuf.limit() - 1); + assert(strlen((char *)chars.ptr) == chars.len); + } + } + // cp_Utf8_chars.done(); + + // Fourth band: Go back and size the specially packed strings. + int maxlen = 0; + cp_Utf8_big_suffix.readData(nbigsuf); + cp_Utf8_suffix.rewind(); + for (i = 0; i < len; i++) + { + int suffix = (i < SUFFIX_SKIP_1) ? 0 : cp_Utf8_suffix.getInt(); + int prefix = (i < PREFIX_SKIP_2) ? 0 : cp_Utf8_prefix.getInt(); + if (prefix < 0 || prefix + suffix < 0) + { + abort("bad utf8 prefix"); + return; + } + bytes &chars = allsuffixes[i]; + if (suffix == 0 && i >= SUFFIX_SKIP_1) + { + suffix = cp_Utf8_big_suffix.getInt(); + assert(chars.ptr == nullptr); + chars.len = suffix; // just a momentary hack + } + else + { + assert(chars.ptr != nullptr); + } + if (maxlen < prefix + suffix) + { + maxlen = prefix + suffix; + } + } + // cp_Utf8_suffix.done(); // will use allsuffixes[i].len (ptr!=nullptr) + // cp_Utf8_big_suffix.done(); // will use allsuffixes[i].len + + // Fifth band(s): Get the specially packed characters. + cp_Utf8_big_suffix.rewind(); + for (i = 0; i < len; i++) + { + bytes &chars = allsuffixes[i]; + if (chars.ptr != nullptr) + continue; // already input + int suffix = (int)chars.len; // pick up the hack + uint size3 = suffix * 3; + if (suffix == 0) + continue; // done with empty string + chars.malloc(size3); + byte *chp = chars.ptr; + band saved_band = cp_Utf8_big_chars; + cp_Utf8_big_chars.readData(suffix); + for (int j = 0; j < suffix; j++) + { + unsigned short ch = cp_Utf8_big_chars.getInt(); + chp = store_Utf8_char(chp, ch); + } + chars.realloc(chp - chars.ptr); + CHECK; + tmallocs.add(chars.ptr); // free it later + // cp_Utf8_big_chars.done(); + cp_Utf8_big_chars = saved_band; // reset the band for the next string + } + cp_Utf8_big_chars.readData(0); // zero chars + // cp_Utf8_big_chars.done(); + + // Finally, sew together all the prefixes and suffixes. + bytes bigbuf; + bigbuf.malloc(maxlen * 3 + 1); // max Utf8 length, plus slop for nullptr + CHECK; + int prevlen = 0; // previous string length (in chars) + tmallocs.add(bigbuf.ptr); // free after this block + cp_Utf8_prefix.rewind(); + for (i = 0; i < len; i++) + { + bytes &chars = allsuffixes[i]; + int prefix = (i < PREFIX_SKIP_2) ? 0 : cp_Utf8_prefix.getInt(); + int suffix = (int)chars.len; + byte *fillp; + // by induction, the buffer is already filled with the prefix + // make sure the prefix value is not corrupted, though: + if (prefix > prevlen) + { + abort("utf8 prefix overflow"); + return; + } + fillp = skip_Utf8_chars(bigbuf.ptr, prefix); + // copy the suffix into the same buffer: + fillp = chars.writeTo(fillp); + assert(bigbuf.inBounds(fillp)); + *fillp = 0; // bigbuf must contain a well-formed Utf8 string + int length = (int)(fillp - bigbuf.ptr); + bytes &value = cpMap[i].value.b; + value.set(U_NEW(byte, add_size(length, 1)), length); + value.copyFrom(bigbuf.ptr, length); + CHECK; + // Index all Utf8 strings + entry *&htref = cp.hashTabRef(CONSTANT_Utf8, value); + if (htref == nullptr) + { + // Note that if two identical strings are transmitted, + // the first is taken to be the canonical one. + htref = &cpMap[i]; + } + prevlen = prefix + suffix; + } + // cp_Utf8_prefix.done(); + + // Free intermediate buffers. + free_temps(); +} + +void unpacker::read_single_words(band &cp_band, entry *cpMap, int len) +{ + cp_band.readData(len); + for (int i = 0; i < len; i++) + { + cpMap[i].value.i = cp_band.getInt(); // coding handles signs OK + } +} + +void unpacker::read_double_words(band &cp_bands, entry *cpMap, int len) +{ + band &cp_band_hi = cp_bands; + band &cp_band_lo = cp_bands.nextBand(); + cp_band_hi.readData(len); + cp_band_lo.readData(len); + for (int i = 0; i < len; i++) + { + cpMap[i].value.l = cp_band_hi.getLong(cp_band_lo, true); + } + // cp_band_hi.done(); + // cp_band_lo.done(); +} + +void unpacker::read_single_refs(band &cp_band, byte refTag, entry *cpMap, int len) +{ + assert(refTag == CONSTANT_Utf8); + cp_band.setIndexByTag(refTag); + cp_band.readData(len); + CHECK; + int indexTag = (cp_band.bn == e_cp_Class) ? CONSTANT_Class : 0; + for (int i = 0; i < len; i++) + { + entry &e = cpMap[i]; + e.refs = U_NEW(entry *, e.nrefs = 1); + entry *utf = cp_band.getRef(); + CHECK; + e.refs[0] = utf; + e.value.b = utf->value.b; // copy value of Utf8 string to self + if (indexTag != 0) + { + // Maintain cross-reference: + entry *&htref = cp.hashTabRef(indexTag, e.value.b); + if (htref == nullptr) + { + // Note that if two identical classes are transmitted, + // the first is taken to be the canonical one. + htref = &e; + } + } + } + // cp_band.done(); +} + +void unpacker::read_double_refs(band &cp_band, byte ref1Tag, byte ref2Tag, entry *cpMap, + int len) +{ + band &cp_band1 = cp_band; + band &cp_band2 = cp_band.nextBand(); + cp_band1.setIndexByTag(ref1Tag); + cp_band2.setIndexByTag(ref2Tag); + cp_band1.readData(len); + cp_band2.readData(len); + CHECK; + for (int i = 0; i < len; i++) + { + entry &e = cpMap[i]; + e.refs = U_NEW(entry *, e.nrefs = 2); + e.refs[0] = cp_band1.getRef(); + e.refs[1] = cp_band2.getRef(); + CHECK; + } + // cp_band1.done(); + // cp_band2.done(); +} + +// Cf. PackageReader.readSignatureBands +void unpacker::read_signature_values(entry *cpMap, int len) +{ + cp_Signature_form.setIndexByTag(CONSTANT_Utf8); + cp_Signature_form.readData(len); + CHECK; + int ncTotal = 0; + int i; + for (i = 0; i < len; i++) + { + entry &e = cpMap[i]; + entry &form = *cp_Signature_form.getRef(); + CHECK; + int nc = 0; + + for (const char *ncp = form.utf8String(); *ncp; ncp++) + { + if (*ncp == 'L') + nc++; + } + + ncTotal += nc; + e.refs = U_NEW(entry *, cpMap[i].nrefs = 1 + nc); + CHECK; + e.refs[0] = &form; + } + // cp_Signature_form.done(); + cp_Signature_classes.setIndexByTag(CONSTANT_Class); + cp_Signature_classes.readData(ncTotal); + for (i = 0; i < len; i++) + { + entry &e = cpMap[i]; + for (int j = 1; j < e.nrefs; j++) + { + e.refs[j] = cp_Signature_classes.getRef(); + CHECK; + } + } + // cp_Signature_classes.done(); +} + +// Cf. PackageReader.readConstantPool +void unpacker::read_cp() +{ + byte *rp0 = rp; + + int i; + + for (int k = 0; k < (int)N_TAGS_IN_ORDER; k++) + { + byte tag = TAGS_IN_ORDER[k]; + int len = cp.tag_count[tag]; + int base = cp.tag_base[tag]; + + entry *cpMap = &cp.entries[base]; + for (i = 0; i < len; i++) + { + cpMap[i].tag = tag; + cpMap[i].inord = i; + } + + switch (tag) + { + case CONSTANT_Utf8: + read_Utf8_values(cpMap, len); + break; + case CONSTANT_Integer: + read_single_words(cp_Int, cpMap, len); + break; + case CONSTANT_Float: + read_single_words(cp_Float, cpMap, len); + break; + case CONSTANT_Long: + read_double_words(cp_Long_hi /*& cp_Long_lo*/, cpMap, len); + break; + case CONSTANT_Double: + read_double_words(cp_Double_hi /*& cp_Double_lo*/, cpMap, len); + break; + case CONSTANT_String: + read_single_refs(cp_String, CONSTANT_Utf8, cpMap, len); + break; + case CONSTANT_Class: + read_single_refs(cp_Class, CONSTANT_Utf8, cpMap, len); + break; + case CONSTANT_Signature: + read_signature_values(cpMap, len); + break; + case CONSTANT_NameandType: + read_double_refs(cp_Descr_name /*& cp_Descr_type*/, CONSTANT_Utf8, + CONSTANT_Signature, cpMap, len); + break; + case CONSTANT_Fieldref: + read_double_refs(cp_Field_class /*& cp_Field_desc*/, CONSTANT_Class, + CONSTANT_NameandType, cpMap, len); + break; + case CONSTANT_Methodref: + read_double_refs(cp_Method_class /*& cp_Method_desc*/, CONSTANT_Class, + CONSTANT_NameandType, cpMap, len); + break; + case CONSTANT_InterfaceMethodref: + read_double_refs(cp_Imethod_class /*& cp_Imethod_desc*/, CONSTANT_Class, + CONSTANT_NameandType, cpMap, len); + break; + default: + assert(false); + break; + } + CHECK; + } + + cp.expandSignatures(); + CHECK; + cp.initMemberIndexes(); + CHECK; + +#define SNAME(n, s) #s "\0" + const char *symNames = (ALL_ATTR_DO(SNAME) ""); +#undef SNAME + + for (int sn = 0; sn < cpool::s_LIMIT; sn++) + { + assert(symNames[0] >= '0' && symNames[0] <= 'Z'); // sanity + bytes name; + name.set(symNames); + if (name.len > 0 && name.ptr[0] != '0') + { + cp.sym[sn] = cp.ensureUtf8(name); + } + symNames += name.len + 1; // skip trailing nullptr to next name + } + + band::initIndexes(this); +} + +static band *no_bands[] = {nullptr}; // shared empty body + +inline band &unpacker::attr_definitions::fixed_band(int e_class_xxx) +{ + return u->all_bands[xxx_flags_hi_bn + (e_class_xxx - e_class_flags_hi)]; +} +inline band &unpacker::attr_definitions::xxx_flags_hi() +{ + return fixed_band(e_class_flags_hi); +} +inline band &unpacker::attr_definitions::xxx_flags_lo() +{ + return fixed_band(e_class_flags_lo); +} +inline band &unpacker::attr_definitions::xxx_attr_count() +{ + return fixed_band(e_class_attr_count); +} +inline band &unpacker::attr_definitions::xxx_attr_indexes() +{ + return fixed_band(e_class_attr_indexes); +} +inline band &unpacker::attr_definitions::xxx_attr_calls() +{ + return fixed_band(e_class_attr_calls); +} + +inline unpacker::layout_definition * +unpacker::attr_definitions::defineLayout(int idx, entry *nameEntry, const char *layout) +{ + const char *name = nameEntry->value.b.strval(); + layout_definition *lo = defineLayout(idx, name, layout); + CHECK_0; + lo->nameEntry = nameEntry; + return lo; +} + +unpacker::layout_definition *unpacker::attr_definitions::defineLayout(int idx, const char *name, + const char *layout) +{ + assert(flag_limit != 0); // must be set up already + if (idx >= 0) + { + // Fixed attr. + if (idx >= (int)flag_limit) + abort("attribute index too large"); + if (isRedefined(idx)) + abort("redefined attribute index"); + redef |= ((julong)1 << idx); + } + else + { + idx = flag_limit + overflow_count.length(); + overflow_count.add(0); // make a new counter + } + layout_definition *lo = U_NEW(layout_definition, 1); + CHECK_0; + lo->idx = idx; + lo->name = name; + lo->layout = layout; + for (int adds = (idx + 1) - layouts.length(); adds > 0; adds--) + { + layouts.add(nullptr); + } + CHECK_0; + layouts.get(idx) = lo; + return lo; +} + +band **unpacker::attr_definitions::buildBands(unpacker::layout_definition *lo) +{ + int i; + if (lo->elems != nullptr) + return lo->bands(); + if (lo->layout[0] == '\0') + { + lo->elems = no_bands; + } + else + { + // Create bands for this attribute by parsing the layout. + bool hasCallables = lo->hasCallables(); + bands_made = 0x10000; // base number for bands made + const char *lp = lo->layout; + lp = parseLayout(lp, lo->elems, -1); + CHECK_0; + if (lp[0] != '\0' || band_stack.length() > 0) + { + abort("garbage at end of layout"); + } + band_stack.popTo(0); + CHECK_0; + + // Fix up callables to point at their callees. + band **bands = lo->elems; + assert(bands == lo->bands()); + int num_callables = 0; + if (hasCallables) + { + while (bands[num_callables] != nullptr) + { + if (bands[num_callables]->le_kind != EK_CBLE) + { + abort("garbage mixed with callables"); + break; + } + num_callables += 1; + } + } + for (i = 0; i < calls_to_link.length(); i++) + { + band &call = *(band *)calls_to_link.get(i); + assert(call.le_kind == EK_CALL); + // Determine the callee. + int call_num = call.le_len; + if (call_num < 0 || call_num >= num_callables) + { + abort("bad call in layout"); + break; + } + band &cble = *bands[call_num]; + // Link the call to it. + call.le_body[0] = &cble; + // Distinguish backward calls and callables: + assert(cble.le_kind == EK_CBLE); + assert(cble.le_len == call_num); + cble.le_back |= call.le_back; + } + calls_to_link.popTo(0); + } + return lo->elems; +} + +/* attribute layout language parser + + attribute_layout: + ( layout_element )* | ( callable )+ + layout_element: + ( integral | replication | union | call | reference ) + + callable: + '[' body ']' + body: + ( layout_element )+ + + integral: + ( unsigned_int | signed_int | bc_index | bc_offset | flag ) + unsigned_int: + uint_type + signed_int: + 'S' uint_type + any_int: + ( unsigned_int | signed_int ) + bc_index: + ( 'P' uint_type | 'PO' uint_type ) + bc_offset: + 'O' any_int + flag: + 'F' uint_type + uint_type: + ( 'B' | 'H' | 'I' | 'V' ) + + replication: + 'N' uint_type '[' body ']' + + union: + 'T' any_int (union_case)* '(' ')' '[' (body)? ']' + union_case: + '(' union_case_tag (',' union_case_tag)* ')' '[' (body)? ']' + union_case_tag: + ( numeral | numeral '-' numeral ) + call: + '(' numeral ')' + + reference: + reference_type ( 'N' )? uint_type + reference_type: + ( constant_ref | schema_ref | utf8_ref | untyped_ref ) + constant_ref: + ( 'KI' | 'KJ' | 'KF' | 'KD' | 'KS' | 'KQ' ) + schema_ref: + ( 'RC' | 'RS' | 'RD' | 'RF' | 'RM' | 'RI' ) + utf8_ref: + 'RU' + untyped_ref: + 'RQ' + + numeral: + '(' ('-')? (digit)+ ')' + digit: + ( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ) + +*/ + +const char *unpacker::attr_definitions::parseIntLayout(const char *lp, band *&res, byte le_kind, + bool can_be_signed) +{ + const char *lp0 = lp; + band *b = U_NEW(band, 1); + CHECK_(lp); + char le = *lp++; + int spec = UNSIGNED5_spec; + if (le == 'S' && can_be_signed) + { + // Note: This is the last use of sign. There is no 'EF_SIGN'. + spec = SIGNED5_spec; + le = *lp++; + } + else if (le == 'B') + { + spec = BYTE1_spec; // unsigned byte + } + b->init(u, bands_made++, spec); + b->le_kind = le_kind; + int le_len = 0; + switch (le) + { + case 'B': + le_len = 1; + break; + case 'H': + le_len = 2; + break; + case 'I': + le_len = 4; + break; + case 'V': + le_len = 0; + break; + default: + abort("bad layout element"); + } + b->le_len = le_len; + band_stack.add(b); + res = b; + return lp; +} + +const char *unpacker::attr_definitions::parseNumeral(const char *lp, int &res) +{ + const char *lp0 = lp; + bool sgn = false; + if (*lp == '0') + { + res = 0; + return lp + 1; + } // special case '0' + if (*lp == '-') + { + sgn = true; + lp++; + } + const char *dp = lp; + int con = 0; + while (*dp >= '0' && *dp <= '9') + { + int con0 = con; + con *= 10; + con += (*dp++) - '0'; + if (con <= con0) + { + con = -1; + break; + } // numeral overflow + } + if (lp == dp) + { + abort("missing numeral in layout"); + return ""; + } + lp = dp; + if (con < 0 && !(sgn && con == -con)) + { + // (Portability note: Misses the error if int is not 32 bits.) + abort("numeral overflow"); + return ""; + } + if (sgn) + con = -con; + res = con; + return lp; +} + +band **unpacker::attr_definitions::popBody(int bs_base) +{ + // Return everything that was pushed, as a nullptr-terminated pointer array. + int bs_limit = band_stack.length(); + if (bs_base == bs_limit) + { + return no_bands; + } + else + { + int nb = bs_limit - bs_base; + band **res = U_NEW(band *, add_size(nb, 1)); + CHECK_(no_bands); + for (int i = 0; i < nb; i++) + { + band *b = (band *)band_stack.get(bs_base + i); + res[i] = b; + } + band_stack.popTo(bs_base); + return res; + } +} + +const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, int curCble) +{ + const char *lp0 = lp; + int bs_base = band_stack.length(); + bool top_level = (bs_base == 0); + band *b; + enum + { + can_be_signed = true + }; // optional arg to parseIntLayout + + for (bool done = false; !done;) + { + switch (*lp++) + { + case 'B': + case 'H': + case 'I': + case 'V': // unsigned_int + case 'S': // signed_int + --lp; // reparse + case 'F': + lp = parseIntLayout(lp, b, EK_INT); + break; + case 'P': + { + int le_bci = EK_BCI; + if (*lp == 'O') + { + ++lp; + le_bci = EK_BCID; + } + assert(*lp != 'S'); // no PSH, etc. + lp = parseIntLayout(lp, b, EK_INT); + b->le_bci = le_bci; + if (le_bci == EK_BCI) + b->defc = coding::findBySpec(BCI5_spec); + else + b->defc = coding::findBySpec(BRANCH5_spec); + } + break; + case 'O': + lp = parseIntLayout(lp, b, EK_INT, can_be_signed); + b->le_bci = EK_BCO; + b->defc = coding::findBySpec(BRANCH5_spec); + break; + case 'N': // replication: 'N' uint '[' elem ... ']' + lp = parseIntLayout(lp, b, EK_REPL); + assert(*lp == '['); + ++lp; + lp = parseLayout(lp, b->le_body, curCble); + CHECK_(lp); + break; + case 'T': // union: 'T' any_int union_case* '(' ')' '[' body ']' + lp = parseIntLayout(lp, b, EK_UN, can_be_signed); + { + int union_base = band_stack.length(); + for (;;) + { // for each case + band &k_case = *U_NEW(band, 1); + CHECK_(lp); + band_stack.add(&k_case); + k_case.le_kind = EK_CASE; + k_case.bn = bands_made++; + if (*lp++ != '(') + { + abort("bad union case"); + return ""; + } + if (*lp++ != ')') + { + --lp; // reparse + // Read some case values. (Use band_stack for temp. storage.) + int case_base = band_stack.length(); + for (;;) + { + int caseval = 0; + lp = parseNumeral(lp, caseval); + band_stack.add((void *)(size_t)caseval); + if (*lp == '-') + { + // new in version 160, allow (1-5) for (1,2,3,4,5) + if (u->majver < JAVA6_PACKAGE_MAJOR_VERSION) + { + abort("bad range in union case label (old archive format)"); + return ""; + } + int caselimit = caseval; + lp++; + lp = parseNumeral(lp, caselimit); + if (caseval >= caselimit || + (uint)(caselimit - caseval) > 0x10000) + { + // Note: 0x10000 is arbitrary implementation restriction. + // We can remove it later if it's important to. + abort("bad range in union case label"); + return ""; + } + for (;;) + { + ++caseval; + band_stack.add((void *)(size_t)caseval); + if (caseval == caselimit) + break; + } + } + if (*lp != ',') + break; + lp++; + } + if (*lp++ != ')') + { + abort("bad case label"); + return ""; + } + // save away the case labels + int ntags = band_stack.length() - case_base; + int *tags = U_NEW(int, add_size(ntags, 1)); + CHECK_(lp); + k_case.le_casetags = tags; + *tags++ = ntags; + for (int i = 0; i < ntags; i++) + { + *tags++ = ptrlowbits(band_stack.get(case_base + i)); + } + band_stack.popTo(case_base); + CHECK_(lp); + } + // Got le_casetags. Now grab the body. + assert(*lp == '['); + ++lp; + lp = parseLayout(lp, k_case.le_body, curCble); + CHECK_(lp); + if (k_case.le_casetags == nullptr) + break; // done + } + b->le_body = popBody(union_base); + } + break; + case '(': // call: '(' -?NN* ')' + { + band &call = *U_NEW(band, 1); + CHECK_(lp); + band_stack.add(&call); + call.le_kind = EK_CALL; + call.bn = bands_made++; + call.le_body = U_NEW(band *, 2); // fill in later + int call_num = 0; + lp = parseNumeral(lp, call_num); + call.le_back = (call_num <= 0); + call_num += curCble; // numeral is self-relative offset + call.le_len = call_num; // use le_len as scratch + calls_to_link.add(&call); + CHECK_(lp); + if (*lp++ != ')') + { + abort("bad call label"); + return ""; + } + } + break; + case 'K': // reference_type: constant_ref + case 'R': // reference_type: schema_ref + { + int ixTag = CONSTANT_None; + if (lp[-1] == 'K') + { + switch (*lp++) + { + case 'I': + ixTag = CONSTANT_Integer; + break; + case 'J': + ixTag = CONSTANT_Long; + break; + case 'F': + ixTag = CONSTANT_Float; + break; + case 'D': + ixTag = CONSTANT_Double; + break; + case 'S': + ixTag = CONSTANT_String; + break; + case 'Q': + ixTag = CONSTANT_Literal; + break; + } + } + else + { + switch (*lp++) + { + case 'C': + ixTag = CONSTANT_Class; + break; + case 'S': + ixTag = CONSTANT_Signature; + break; + case 'D': + ixTag = CONSTANT_NameandType; + break; + case 'F': + ixTag = CONSTANT_Fieldref; + break; + case 'M': + ixTag = CONSTANT_Methodref; + break; + case 'I': + ixTag = CONSTANT_InterfaceMethodref; + break; + case 'U': + ixTag = CONSTANT_Utf8; + break; // utf8_ref + case 'Q': + ixTag = CONSTANT_All; + break; // untyped_ref + } + } + if (ixTag == CONSTANT_None) + { + abort("bad reference layout"); + break; + } + bool nullOK = false; + if (*lp == 'N') + { + nullOK = true; + lp++; + } + lp = parseIntLayout(lp, b, EK_REF); + b->defc = coding::findBySpec(UNSIGNED5_spec); + b->initRef(ixTag, nullOK); + } + break; + case '[': + { + // [callable1][callable2]... + if (!top_level) + { + abort("bad nested callable"); + break; + } + curCble += 1; + band &cble = *U_NEW(band, 1); + CHECK_(lp); + band_stack.add(&cble); + cble.le_kind = EK_CBLE; + cble.bn = bands_made++; + lp = parseLayout(lp, cble.le_body, curCble); + } + break; + case ']': + // Hit a closing brace. This ends whatever body we were in. + done = true; + break; + case '\0': + // Hit a nullptr. Also ends the (top-level) body. + --lp; // back up, so caller can see the nullptr also + done = true; + break; + default: + abort("bad layout"); + break; + } + CHECK_(lp); + } + + // Return the accumulated bands: + res = popBody(bs_base); + return lp; +} + +void unpacker::read_attr_defs() +{ + int i; + + // Tell each AD which attrc it is and where its fixed flags are: + attr_defs[ATTR_CONTEXT_CLASS].attrc = ATTR_CONTEXT_CLASS; + attr_defs[ATTR_CONTEXT_CLASS].xxx_flags_hi_bn = e_class_flags_hi; + attr_defs[ATTR_CONTEXT_FIELD].attrc = ATTR_CONTEXT_FIELD; + attr_defs[ATTR_CONTEXT_FIELD].xxx_flags_hi_bn = e_field_flags_hi; + attr_defs[ATTR_CONTEXT_METHOD].attrc = ATTR_CONTEXT_METHOD; + attr_defs[ATTR_CONTEXT_METHOD].xxx_flags_hi_bn = e_method_flags_hi; + attr_defs[ATTR_CONTEXT_CODE].attrc = ATTR_CONTEXT_CODE; + attr_defs[ATTR_CONTEXT_CODE].xxx_flags_hi_bn = e_code_flags_hi; + + // Decide whether bands for the optional high flag words are present. + attr_defs[ATTR_CONTEXT_CLASS] + .setHaveLongFlags((archive_options & AO_HAVE_CLASS_FLAGS_HI) != 0); + attr_defs[ATTR_CONTEXT_FIELD] + .setHaveLongFlags((archive_options & AO_HAVE_FIELD_FLAGS_HI) != 0); + attr_defs[ATTR_CONTEXT_METHOD] + .setHaveLongFlags((archive_options & AO_HAVE_METHOD_FLAGS_HI) != 0); + attr_defs[ATTR_CONTEXT_CODE] + .setHaveLongFlags((archive_options & AO_HAVE_CODE_FLAGS_HI) != 0); + + // Set up built-in attrs. + // (The simple ones are hard-coded. The metadata layouts are not.) + const char *md_layout = ( +// parameter annotations: +#define MDL0 "[NB[(1)]]" + MDL0 +// annotations: +#define MDL1 \ + "[NH[(1)]]" \ + "[RSHNH[RUH(1)]]" + MDL1 + // member_value: + "[TB" + "(66,67,73,83,90)[KIH]" + "(68)[KDH]" + "(70)[KFH]" + "(74)[KJH]" + "(99)[RSH]" + "(101)[RSHRUH]" + "(115)[RUH]" + "(91)[NH[(0)]]" + "(64)[" + // nested annotation: + "RSH" + "NH[RUH(0)]" + "]" + "()[]" + "]"); + + const char *md_layout_P = md_layout; + const char *md_layout_A = md_layout + strlen(MDL0); + const char *md_layout_V = md_layout + strlen(MDL0 MDL1); + assert(0 == strncmp(&md_layout_A[-3], ")]][", 4)); + assert(0 == strncmp(&md_layout_V[-3], ")]][", 4)); + + for (i = 0; i < ATTR_CONTEXT_LIMIT; i++) + { + attr_definitions &ad = attr_defs[i]; + ad.defineLayout(X_ATTR_RuntimeVisibleAnnotations, "RuntimeVisibleAnnotations", + md_layout_A); + ad.defineLayout(X_ATTR_RuntimeInvisibleAnnotations, "RuntimeInvisibleAnnotations", + md_layout_A); + if (i != ATTR_CONTEXT_METHOD) + continue; + ad.defineLayout(METHOD_ATTR_RuntimeVisibleParameterAnnotations, + "RuntimeVisibleParameterAnnotations", md_layout_P); + ad.defineLayout(METHOD_ATTR_RuntimeInvisibleParameterAnnotations, + "RuntimeInvisibleParameterAnnotations", md_layout_P); + ad.defineLayout(METHOD_ATTR_AnnotationDefault, "AnnotationDefault", md_layout_V); + } + + attr_definition_headers.readData(attr_definition_count); + attr_definition_name.readData(attr_definition_count); + attr_definition_layout.readData(attr_definition_count); + + CHECK; + +// Initialize correct predef bits, to distinguish predefs from new defs. +#define ORBIT(n, s) | ((julong)1 << n) + attr_defs[ATTR_CONTEXT_CLASS].predef = (0 X_ATTR_DO(ORBIT) CLASS_ATTR_DO(ORBIT)); + attr_defs[ATTR_CONTEXT_FIELD].predef = (0 X_ATTR_DO(ORBIT) FIELD_ATTR_DO(ORBIT)); + attr_defs[ATTR_CONTEXT_METHOD].predef = (0 X_ATTR_DO(ORBIT) METHOD_ATTR_DO(ORBIT)); + attr_defs[ATTR_CONTEXT_CODE].predef = (0 O_ATTR_DO(ORBIT) CODE_ATTR_DO(ORBIT)); +#undef ORBIT + // Clear out the redef bits, folding them back into predef. + for (i = 0; i < ATTR_CONTEXT_LIMIT; i++) + { + attr_defs[i].predef |= attr_defs[i].redef; + attr_defs[i].redef = 0; + } + + // Now read the transmitted locally defined attrs. + // This will set redef bits again. + for (i = 0; i < attr_definition_count; i++) + { + int header = attr_definition_headers.getByte(); + int attrc = ADH_BYTE_CONTEXT(header); + int idx = ADH_BYTE_INDEX(header); + entry *name = attr_definition_name.getRef(); + entry *layout = attr_definition_layout.getRef(); + CHECK; + attr_defs[attrc].defineLayout(idx, name, layout->value.b.strval()); + } +} + +#define NO_ENTRY_YET ((entry *)-1) + +static bool isDigitString(bytes &x, int beg, int end) +{ + if (beg == end) + return false; // nullptr string + byte *xptr = x.ptr; + for (int i = beg; i < end; i++) + { + char ch = xptr[i]; + if (!(ch >= '0' && ch <= '9')) + return false; + } + return true; +} + +enum +{ // constants for parsing class names + SLASH_MIN = '.', + SLASH_MAX = '/', + DOLLAR_MIN = 0, + DOLLAR_MAX = '-'}; + +static int lastIndexOf(int chmin, int chmax, bytes &x, int pos) +{ + byte *ptr = x.ptr; + for (byte *cp = ptr + pos; --cp >= ptr;) + { + assert(x.inBounds(cp)); + if (*cp >= chmin && *cp <= chmax) + return (int)(cp - ptr); + } + return -1; +} + +inner_class *cpool::getIC(entry *inner) +{ + if (inner == nullptr) + return nullptr; + assert(inner->tag == CONSTANT_Class); + if (inner->inord == NO_INORD) + return nullptr; + inner_class *ic = ic_index[inner->inord]; + assert(ic == nullptr || ic->inner == inner); + return ic; +} + +inner_class *cpool::getFirstChildIC(entry *outer) +{ + if (outer == nullptr) + return nullptr; + assert(outer->tag == CONSTANT_Class); + if (outer->inord == NO_INORD) + return nullptr; + inner_class *ic = ic_child_index[outer->inord]; + assert(ic == nullptr || ic->outer == outer); + return ic; +} + +inner_class *cpool::getNextChildIC(inner_class *child) +{ + inner_class *ic = child->next_sibling; + assert(ic == nullptr || ic->outer == child->outer); + return ic; +} + +void unpacker::read_ics() +{ + int i; + int index_size = cp.tag_count[CONSTANT_Class]; + inner_class **ic_index = U_NEW(inner_class *, index_size); + inner_class **ic_child_index = U_NEW(inner_class *, index_size); + cp.ic_index = ic_index; + cp.ic_child_index = ic_child_index; + ics = U_NEW(inner_class, ic_count); + ic_this_class.readData(ic_count); + ic_flags.readData(ic_count); + CHECK; + // Scan flags to get count of long-form bands. + int long_forms = 0; + for (i = 0; i < ic_count; i++) + { + int flags = ic_flags.getInt(); // may be long form! + if ((flags & ACC_IC_LONG_FORM) != 0) + { + long_forms += 1; + ics[i].name = NO_ENTRY_YET; + } + flags &= ~ACC_IC_LONG_FORM; + entry *inner = ic_this_class.getRef(); + CHECK; + uint inord = inner->inord; + assert(inord < (uint)cp.tag_count[CONSTANT_Class]); + if (ic_index[inord] != nullptr) + { + abort("identical inner class"); + break; + } + ic_index[inord] = &ics[i]; + ics[i].inner = inner; + ics[i].flags = flags; + assert(cp.getIC(inner) == &ics[i]); + } + CHECK; + // ic_this_class.done(); + // ic_flags.done(); + ic_outer_class.readData(long_forms); + ic_name.readData(long_forms); + for (i = 0; i < ic_count; i++) + { + if (ics[i].name == NO_ENTRY_YET) + { + // Long form. + ics[i].outer = ic_outer_class.getRefN(); + ics[i].name = ic_name.getRefN(); + } + else + { + // Fill in outer and name based on inner. + bytes &n = ics[i].inner->value.b; + bytes pkgOuter; + bytes number; + bytes name; + // Parse n into pkgOuter and name (and number). + int dollar1, dollar2; // pointers to $ in the pattern + // parse n = (/)*($)?($)? + int nlen = (int)n.len; + int pkglen = lastIndexOf(SLASH_MIN, SLASH_MAX, n, nlen) + 1; + dollar2 = lastIndexOf(DOLLAR_MIN, DOLLAR_MAX, n, nlen); + if (dollar2 < 0) + { + abort(); + return; + } + assert(dollar2 >= pkglen); + if (isDigitString(n, dollar2 + 1, nlen)) + { + // n = (/)*$ + number = n.slice(dollar2 + 1, nlen); + name.set(nullptr, 0); + dollar1 = dollar2; + } + else if (pkglen < (dollar1 = lastIndexOf(DOLLAR_MIN, DOLLAR_MAX, n, dollar2 - 1)) && + isDigitString(n, dollar1 + 1, dollar2)) + { + // n = (/)*$$ + number = n.slice(dollar1 + 1, dollar2); + name = n.slice(dollar2 + 1, nlen); + } + else + { + // n = (/)*$ + dollar1 = dollar2; + number.set(nullptr, 0); + name = n.slice(dollar2 + 1, nlen); + } + if (number.ptr == nullptr) + pkgOuter = n.slice(0, dollar1); + else + pkgOuter.set(nullptr, 0); + + if (pkgOuter.ptr != nullptr) + ics[i].outer = cp.ensureClass(pkgOuter); + + if (name.ptr != nullptr) + ics[i].name = cp.ensureUtf8(name); + } + + // update child/sibling list + if (ics[i].outer != nullptr) + { + uint outord = ics[i].outer->inord; + if (outord != NO_INORD) + { + assert(outord < (uint)cp.tag_count[CONSTANT_Class]); + ics[i].next_sibling = ic_child_index[outord]; + ic_child_index[outord] = &ics[i]; + } + } + } + // ic_outer_class.done(); + // ic_name.done(); +} + +void unpacker::read_classes() +{ + class_this.readData(class_count); + class_super.readData(class_count); + class_interface_count.readData(class_count); + class_interface.readData(class_interface_count.getIntTotal()); + + CHECK; + +#if 0 + int i; + // Make a little mark on super-classes. + for (i = 0; i < class_count; i++) { + entry* e = class_super.getRefN(); + if (e != nullptr) e->bits |= entry::EB_SUPER; + } + class_super.rewind(); +#endif + + // Members. + class_field_count.readData(class_count); + class_method_count.readData(class_count); + + CHECK; + + int field_count = class_field_count.getIntTotal(); + int method_count = class_method_count.getIntTotal(); + + field_descr.readData(field_count); + read_attrs(ATTR_CONTEXT_FIELD, field_count); + CHECK; + + method_descr.readData(method_count); + read_attrs(ATTR_CONTEXT_METHOD, method_count); + + CHECK; + + read_attrs(ATTR_CONTEXT_CLASS, class_count); + CHECK; + + read_code_headers(); +} + +int unpacker::attr_definitions::predefCount(uint idx) +{ + return isPredefined(idx) ? flag_count[idx] : 0; +} + +void unpacker::read_attrs(int attrc, int obj_count) +{ + attr_definitions &ad = attr_defs[attrc]; + assert(ad.attrc == attrc); + + int i, idx, count; + + CHECK; + + bool haveLongFlags = ad.haveLongFlags(); + + band &xxx_flags_hi = ad.xxx_flags_hi(); + assert(endsWith(xxx_flags_hi.name, "_flags_hi")); + if (haveLongFlags) + xxx_flags_hi.readData(obj_count); + CHECK; + + band &xxx_flags_lo = ad.xxx_flags_lo(); + assert(endsWith(xxx_flags_lo.name, "_flags_lo")); + xxx_flags_lo.readData(obj_count); + CHECK; + + // pre-scan flags, counting occurrences of each index bit + julong indexMask = ad.flagIndexMask(); // which flag bits are index bits? + for (i = 0; i < obj_count; i++) + { + julong indexBits = xxx_flags_hi.getLong(xxx_flags_lo, haveLongFlags); + if ((indexBits & ~indexMask) > (ushort) - 1) + { + abort("undefined attribute flag bit"); + return; + } + indexBits &= indexMask; // ignore classfile flag bits + for (idx = 0; indexBits != 0; idx++, indexBits >>= 1) + { + ad.flag_count[idx] += (int)(indexBits & 1); + } + } + // we'll scan these again later for output: + xxx_flags_lo.rewind(); + xxx_flags_hi.rewind(); + + band &xxx_attr_count = ad.xxx_attr_count(); + assert(endsWith(xxx_attr_count.name, "_attr_count")); + // There is one count element for each 1<<16 bit set in flags: + xxx_attr_count.readData(ad.predefCount(X_ATTR_OVERFLOW)); + CHECK; + + band &xxx_attr_indexes = ad.xxx_attr_indexes(); + assert(endsWith(xxx_attr_indexes.name, "_attr_indexes")); + int overflowIndexCount = xxx_attr_count.getIntTotal(); + xxx_attr_indexes.readData(overflowIndexCount); + CHECK; + // pre-scan attr indexes, counting occurrences of each value + for (i = 0; i < overflowIndexCount; i++) + { + idx = xxx_attr_indexes.getInt(); + if (!ad.isIndex(idx)) + { + abort("attribute index out of bounds"); + return; + } + ad.getCount(idx) += 1; + } + xxx_attr_indexes.rewind(); // we'll scan it again later for output + + // We will need a backward call count for each used backward callable. + int backwardCounts = 0; + for (idx = 0; idx < ad.layouts.length(); idx++) + { + layout_definition *lo = ad.getLayout(idx); + if (lo != nullptr && ad.getCount(idx) != 0) + { + // Build the bands lazily, only when they are used. + band **bands = ad.buildBands(lo); + CHECK; + if (lo->hasCallables()) + { + for (i = 0; bands[i] != nullptr; i++) + { + if (bands[i]->le_back) + { + assert(bands[i]->le_kind == EK_CBLE); + backwardCounts += 1; + } + } + } + } + } + ad.xxx_attr_calls().readData(backwardCounts); + CHECK; + + // Read built-in bands. + // Mostly, these are hand-coded equivalents to readBandData(). + switch (attrc) + { + case ATTR_CONTEXT_CLASS: + + count = ad.predefCount(CLASS_ATTR_SourceFile); + class_SourceFile_RUN.readData(count); + CHECK; + + count = ad.predefCount(CLASS_ATTR_EnclosingMethod); + class_EnclosingMethod_RC.readData(count); + class_EnclosingMethod_RDN.readData(count); + CHECK; + + count = ad.predefCount(X_ATTR_Signature); + class_Signature_RS.readData(count); + CHECK; + + ad.readBandData(X_ATTR_RuntimeVisibleAnnotations); + ad.readBandData(X_ATTR_RuntimeInvisibleAnnotations); + + count = ad.predefCount(CLASS_ATTR_InnerClasses); + class_InnerClasses_N.readData(count); + CHECK; + + count = class_InnerClasses_N.getIntTotal(); + class_InnerClasses_RC.readData(count); + class_InnerClasses_F.readData(count); + CHECK; + // Drop remaining columns wherever flags are zero: + count -= class_InnerClasses_F.getIntCount(0); + class_InnerClasses_outer_RCN.readData(count); + class_InnerClasses_name_RUN.readData(count); + CHECK; + + count = ad.predefCount(CLASS_ATTR_ClassFile_version); + class_ClassFile_version_minor_H.readData(count); + class_ClassFile_version_major_H.readData(count); + CHECK; + break; + + case ATTR_CONTEXT_FIELD: + + count = ad.predefCount(FIELD_ATTR_ConstantValue); + field_ConstantValue_KQ.readData(count); + CHECK; + + count = ad.predefCount(X_ATTR_Signature); + field_Signature_RS.readData(count); + CHECK; + + ad.readBandData(X_ATTR_RuntimeVisibleAnnotations); + ad.readBandData(X_ATTR_RuntimeInvisibleAnnotations); + CHECK; + break; + + case ATTR_CONTEXT_METHOD: + + code_count = ad.predefCount(METHOD_ATTR_Code); + // Code attrs are handled very specially below... + + count = ad.predefCount(METHOD_ATTR_Exceptions); + method_Exceptions_N.readData(count); + count = method_Exceptions_N.getIntTotal(); + method_Exceptions_RC.readData(count); + CHECK; + + count = ad.predefCount(X_ATTR_Signature); + method_Signature_RS.readData(count); + CHECK; + + ad.readBandData(X_ATTR_RuntimeVisibleAnnotations); + ad.readBandData(X_ATTR_RuntimeInvisibleAnnotations); + ad.readBandData(METHOD_ATTR_RuntimeVisibleParameterAnnotations); + ad.readBandData(METHOD_ATTR_RuntimeInvisibleParameterAnnotations); + ad.readBandData(METHOD_ATTR_AnnotationDefault); + CHECK; + break; + + case ATTR_CONTEXT_CODE: + // (keep this code aligned with its brother in unpacker::write_attrs) + count = ad.predefCount(CODE_ATTR_StackMapTable); + // disable this feature in old archives! + if (count != 0 && majver < JAVA6_PACKAGE_MAJOR_VERSION) + { + abort("undefined StackMapTable attribute (old archive format)"); + return; + } + code_StackMapTable_N.readData(count); + CHECK; + count = code_StackMapTable_N.getIntTotal(); + code_StackMapTable_frame_T.readData(count); + CHECK; + // the rest of it depends in a complicated way on frame tags + { + int fat_frame_count = 0; + int offset_count = 0; + int type_count = 0; + for (int k = 0; k < count; k++) + { + int tag = code_StackMapTable_frame_T.getByte(); + if (tag <= 127) + { + // (64-127) [(2)] + if (tag >= 64) + type_count++; + } + else if (tag <= 251) + { + // (247) [(1)(2)] + // (248-251) [(1)] + if (tag >= 247) + offset_count++; + if (tag == 247) + type_count++; + } + else if (tag <= 254) + { + // (252) [(1)(2)] + // (253) [(1)(2)(2)] + // (254) [(1)(2)(2)(2)] + offset_count++; + type_count += (tag - 251); + } + else + { + // (255) [(1)NH[(2)]NH[(2)]] + fat_frame_count++; + } + } + + // done pre-scanning frame tags: + code_StackMapTable_frame_T.rewind(); + + // deal completely with fat frames: + offset_count += fat_frame_count; + code_StackMapTable_local_N.readData(fat_frame_count); + CHECK; + type_count += code_StackMapTable_local_N.getIntTotal(); + code_StackMapTable_stack_N.readData(fat_frame_count); + type_count += code_StackMapTable_stack_N.getIntTotal(); + CHECK; + // read the rest: + code_StackMapTable_offset.readData(offset_count); + code_StackMapTable_T.readData(type_count); + CHECK; + // (7) [RCH] + count = code_StackMapTable_T.getIntCount(7); + code_StackMapTable_RC.readData(count); + CHECK; + // (8) [PH] + count = code_StackMapTable_T.getIntCount(8); + code_StackMapTable_P.readData(count); + CHECK; + } + + count = ad.predefCount(CODE_ATTR_LineNumberTable); + code_LineNumberTable_N.readData(count); + count = code_LineNumberTable_N.getIntTotal(); + code_LineNumberTable_bci_P.readData(count); + code_LineNumberTable_line.readData(count); + + count = ad.predefCount(CODE_ATTR_LocalVariableTable); + code_LocalVariableTable_N.readData(count); + count = code_LocalVariableTable_N.getIntTotal(); + code_LocalVariableTable_bci_P.readData(count); + code_LocalVariableTable_span_O.readData(count); + code_LocalVariableTable_name_RU.readData(count); + code_LocalVariableTable_type_RS.readData(count); + code_LocalVariableTable_slot.readData(count); + + count = ad.predefCount(CODE_ATTR_LocalVariableTypeTable); + code_LocalVariableTypeTable_N.readData(count); + count = code_LocalVariableTypeTable_N.getIntTotal(); + code_LocalVariableTypeTable_bci_P.readData(count); + code_LocalVariableTypeTable_span_O.readData(count); + code_LocalVariableTypeTable_name_RU.readData(count); + code_LocalVariableTypeTable_type_RS.readData(count); + code_LocalVariableTypeTable_slot.readData(count); + break; + } + + // Read compressor-defined bands. + for (idx = 0; idx < ad.layouts.length(); idx++) + { + if (ad.getLayout(idx) == nullptr) + continue; // none at this fixed index <32 + if (idx < (int)ad.flag_limit && ad.isPredefined(idx)) + continue; // already handled + if (ad.getCount(idx) == 0) + continue; // no attributes of this type (then why transmit layouts?) + ad.readBandData(idx); + } +} + +void unpacker::attr_definitions::readBandData(int idx) +{ + int j; + uint count = getCount(idx); + if (count == 0) + return; + layout_definition *lo = getLayout(idx); + bool hasCallables = lo->hasCallables(); + band **bands = lo->bands(); + if (!hasCallables) + { + // Read through the rest of the bands in a regular way. + readBandData(bands, count); + } + else + { + // Deal with the callables. + // First set up the forward entry count for each callable. + // This is stored on band::length of the callable. + bands[0]->expectMoreLength(count); + for (j = 0; bands[j] != nullptr; j++) + { + band &j_cble = *bands[j]; + assert(j_cble.le_kind == EK_CBLE); + if (j_cble.le_back) + { + // Add in the predicted effects of backward calls, too. + int back_calls = xxx_attr_calls().getInt(); + j_cble.expectMoreLength(back_calls); + // In a moment, more forward calls may increment j_cble.length. + } + } + // Now consult whichever callables have non-zero entry counts. + readBandData(bands, (uint) - 1); + } +} + +// Recursive helper to the previous function: +void unpacker::attr_definitions::readBandData(band **body, uint count) +{ + int j, k; + for (j = 0; body[j] != nullptr; j++) + { + band &b = *body[j]; + if (b.defc != nullptr) + { + // It has data, so read it. + b.readData(count); + } + switch (b.le_kind) + { + case EK_REPL: + { + int reps = b.getIntTotal(); + readBandData(b.le_body, reps); + } + break; + case EK_UN: + { + int remaining = count; + for (k = 0; b.le_body[k] != nullptr; k++) + { + band &k_case = *b.le_body[k]; + int k_count = 0; + if (k_case.le_casetags == nullptr) + { + k_count = remaining; // last (empty) case + } + else + { + int *tags = k_case.le_casetags; + int ntags = *tags++; // 1st element is length (why not?) + while (ntags-- > 0) + { + int tag = *tags++; + k_count += b.getIntCount(tag); + } + } + readBandData(k_case.le_body, k_count); + remaining -= k_count; + } + assert(remaining == 0); + } + break; + case EK_CALL: + // Push the count forward, if it is not a backward call. + if (!b.le_back) + { + band &cble = *b.le_body[0]; + assert(cble.le_kind == EK_CBLE); + cble.expectMoreLength(count); + } + break; + case EK_CBLE: + assert((int)count == -1); // incoming count is meaningless + k = b.length; + assert(k >= 0); + // This is intended and required for non production mode. + assert((b.length = -1)); // make it unable to accept more calls now. + readBandData(b.le_body, k); + break; + } + } +} + +static inline band **findMatchingCase(int matchTag, band **cases) +{ + for (int k = 0; cases[k] != nullptr; k++) + { + band &k_case = *cases[k]; + if (k_case.le_casetags != nullptr) + { + // If it has tags, it must match a tag. + int *tags = k_case.le_casetags; + int ntags = *tags++; // 1st element is length + for (; ntags > 0; ntags--) + { + int tag = *tags++; + if (tag == matchTag) + break; + } + if (ntags == 0) + continue; // does not match + } + return k_case.le_body; + } + return nullptr; +} + +// write attribute band data: +void unpacker::putlayout(band **body) +{ + int i; + int prevBII = -1; + int prevBCI = -1; + if (body == NULL) + { + abort("putlayout: unexpected NULL for body"); + return; + } + for (i = 0; body[i] != nullptr; i++) + { + band &b = *body[i]; + byte le_kind = b.le_kind; + + // Handle scalar part, if any. + int x = 0; + entry *e = nullptr; + if (b.defc != nullptr) + { + // It has data, so unparse an element. + if (b.ixTag != CONSTANT_None) + { + assert(le_kind == EK_REF); + if (b.ixTag == CONSTANT_Literal) + e = b.getRefUsing(cp.getKQIndex()); + else + e = b.getRefN(); + switch (b.le_len) + { + case 0: + break; + case 1: + putu1ref(e); + break; + case 2: + putref(e); + break; + case 4: + putu2(0); + putref(e); + break; + default: + assert(false); + } + } + else + { + assert(le_kind == EK_INT || le_kind == EK_REPL || le_kind == EK_UN); + x = b.getInt(); + + assert(!b.le_bci || prevBCI == (int)to_bci(prevBII)); + switch (b.le_bci) + { + case EK_BCI: // PH: transmit R(bci), store bci + x = to_bci(prevBII = x); + prevBCI = x; + break; + case EK_BCID: // POH: transmit D(R(bci)), store bci + x = to_bci(prevBII += x); + prevBCI = x; + break; + case EK_BCO: // OH: transmit D(R(bci)), store D(bci) + x = to_bci(prevBII += x) - prevBCI; + prevBCI += x; + break; + } + assert(!b.le_bci || prevBCI == (int)to_bci(prevBII)); + + switch (b.le_len) + { + case 0: + break; + case 1: + putu1(x); + break; + case 2: + putu2(x); + break; + case 4: + putu4(x); + break; + default: + assert(false); + } + } + } + + // Handle subparts, if any. + switch (le_kind) + { + case EK_REPL: + // x is the repeat count + while (x-- > 0) + { + putlayout(b.le_body); + } + break; + case EK_UN: + // x is the tag + putlayout(findMatchingCase(x, b.le_body)); + break; + case EK_CALL: + { + band &cble = *b.le_body[0]; + assert(cble.le_kind == EK_CBLE); + assert(cble.le_len == b.le_len); + putlayout(cble.le_body); + } + break; + + case EK_CBLE: + case EK_CASE: + assert(false); // should not reach here + } + } +} + +void unpacker::read_files() +{ + file_name.readData(file_count); + if ((archive_options & AO_HAVE_FILE_SIZE_HI) != 0) + file_size_hi.readData(file_count); + file_size_lo.readData(file_count); + if ((archive_options & AO_HAVE_FILE_MODTIME) != 0) + file_modtime.readData(file_count); + int allFiles = file_count + class_count; + if ((archive_options & AO_HAVE_FILE_OPTIONS) != 0) + { + file_options.readData(file_count); + // FO_IS_CLASS_STUB might be set, causing overlap between classes and files + for (int i = 0; i < file_count; i++) + { + if ((file_options.getInt() & FO_IS_CLASS_STUB) != 0) + { + allFiles -= 1; // this one counts as both class and file + } + } + file_options.rewind(); + } + assert((default_file_options & FO_IS_CLASS_STUB) == 0); + files_remaining = allFiles; +} + +void unpacker::get_code_header(int &max_stack, int &max_na_locals, int &handler_count, + int &cflags) +{ + int sc = code_headers.getByte(); + if (sc == 0) + { + max_stack = max_na_locals = handler_count = cflags = -1; + return; + } + // Short code header is the usual case: + int nh; + int mod; + if (sc < 1 + 12 * 12) + { + sc -= 1; + nh = 0; + mod = 12; + } + else if (sc < 1 + 12 * 12 + 8 * 8) + { + sc -= 1 + 12 * 12; + nh = 1; + mod = 8; + } + else + { + assert(sc < 1 + 12 * 12 + 8 * 8 + 7 * 7); + sc -= 1 + 12 * 12 + 8 * 8; + nh = 2; + mod = 7; + } + max_stack = sc % mod; + max_na_locals = sc / mod; // caller must add static, siglen + handler_count = nh; + if ((archive_options & AO_HAVE_ALL_CODE_FLAGS) != 0) + cflags = -1; + else + cflags = 0; // this one has no attributes +} + +// Cf. PackageReader.readCodeHeaders +void unpacker::read_code_headers() +{ + code_headers.readData(code_count); + CHECK; + int totalHandlerCount = 0; + int totalFlagsCount = 0; + for (int i = 0; i < code_count; i++) + { + int max_stack, max_locals, handler_count, cflags; + get_code_header(max_stack, max_locals, handler_count, cflags); + if (max_stack < 0) + code_max_stack.expectMoreLength(1); + if (max_locals < 0) + code_max_na_locals.expectMoreLength(1); + if (handler_count < 0) + code_handler_count.expectMoreLength(1); + else + totalHandlerCount += handler_count; + if (cflags < 0) + totalFlagsCount += 1; + } + code_headers.rewind(); // replay later during writing + + code_max_stack.readData(); + code_max_na_locals.readData(); + code_handler_count.readData(); + totalHandlerCount += code_handler_count.getIntTotal(); + CHECK; + + // Read handler specifications. + // Cf. PackageReader.readCodeHandlers. + code_handler_start_P.readData(totalHandlerCount); + code_handler_end_PO.readData(totalHandlerCount); + code_handler_catch_PO.readData(totalHandlerCount); + code_handler_class_RCN.readData(totalHandlerCount); + CHECK; + + read_attrs(ATTR_CONTEXT_CODE, totalFlagsCount); + CHECK; +} + +static inline bool is_in_range(uint n, uint min, uint max) +{ + return n - min <= max - min; // unsigned arithmetic! +} +static inline bool is_field_op(int bc) +{ + return is_in_range(bc, bc_getstatic, bc_putfield); +} +static inline bool is_invoke_init_op(int bc) +{ + return is_in_range(bc, _invokeinit_op, _invokeinit_limit - 1); +} +static inline bool is_self_linker_op(int bc) +{ + return is_in_range(bc, _self_linker_op, _self_linker_limit - 1); +} +static bool is_branch_op(int bc) +{ + return is_in_range(bc, bc_ifeq, bc_jsr) || is_in_range(bc, bc_ifnull, bc_jsr_w); +} +static bool is_local_slot_op(int bc) +{ + return is_in_range(bc, bc_iload, bc_aload) || is_in_range(bc, bc_istore, bc_astore) || + bc == bc_iinc || bc == bc_ret; +} +band *unpacker::ref_band_for_op(int bc) +{ + switch (bc) + { + case bc_ildc: + case bc_ildc_w: + return &bc_intref; + case bc_fldc: + case bc_fldc_w: + return &bc_floatref; + case bc_lldc2_w: + return &bc_longref; + case bc_dldc2_w: + return &bc_doubleref; + case bc_aldc: + case bc_aldc_w: + return &bc_stringref; + case bc_cldc: + case bc_cldc_w: + return &bc_classref; + + case bc_getstatic: + case bc_putstatic: + case bc_getfield: + case bc_putfield: + return &bc_fieldref; + + case bc_invokevirtual: + case bc_invokespecial: + case bc_invokestatic: + return &bc_methodref; + case bc_invokeinterface: + return &bc_imethodref; + + case bc_new: + case bc_anewarray: + case bc_checkcast: + case bc_instanceof: + case bc_multianewarray: + return &bc_classref; + } + return nullptr; +} + +band *unpacker::ref_band_for_self_op(int bc, bool &isAloadVar, int &origBCVar) +{ + if (!is_self_linker_op(bc)) + return nullptr; + int idx = (bc - _self_linker_op); + bool isSuper = (idx >= _self_linker_super_flag); + if (isSuper) + idx -= _self_linker_super_flag; + bool isAload = (idx >= _self_linker_aload_flag); + if (isAload) + idx -= _self_linker_aload_flag; + int origBC = _first_linker_op + idx; + bool isField = is_field_op(origBC); + isAloadVar = isAload; + origBCVar = _first_linker_op + idx; + if (!isSuper) + return isField ? &bc_thisfield : &bc_thismethod; + else + return isField ? &bc_superfield : &bc_supermethod; +} + +// Cf. PackageReader.readByteCodes +inline // called exactly once => inline + void +unpacker::read_bcs() +{ + // read from bc_codes and bc_case_count + fillbytes all_switch_ops; + all_switch_ops.init(); + CHECK; + + // Read directly from rp/rplimit. + // Do this later: bc_codes.readData(...) + byte *rp0 = rp; + + band *bc_which; + byte *opptr = rp; + byte *oplimit = rplimit; + + bool isAload; // passed by ref and then ignored + int junkBC; // passed by ref and then ignored + for (int k = 0; k < code_count; k++) + { + // Scan one method: + for (;;) + { + if (opptr + 2 > oplimit) + { + rp = opptr; + ensure_input(2); + oplimit = rplimit; + rp = rp0; // back up + } + if (opptr == oplimit) + { + abort(); + break; + } + int bc = *opptr++ & 0xFF; + bool isWide = false; + if (bc == bc_wide) + { + if (opptr == oplimit) + { + abort(); + break; + } + bc = *opptr++ & 0xFF; + isWide = true; + } + // Adjust expectations of various band sizes. + switch (bc) + { + case bc_tableswitch: + case bc_lookupswitch: + all_switch_ops.addByte(bc); + break; + case bc_iinc: + bc_local.expectMoreLength(1); + bc_which = isWide ? &bc_short : &bc_byte; + bc_which->expectMoreLength(1); + break; + case bc_sipush: + bc_short.expectMoreLength(1); + break; + case bc_bipush: + bc_byte.expectMoreLength(1); + break; + case bc_newarray: + bc_byte.expectMoreLength(1); + break; + case bc_multianewarray: + assert(ref_band_for_op(bc) == &bc_classref); + bc_classref.expectMoreLength(1); + bc_byte.expectMoreLength(1); + break; + case bc_ref_escape: + bc_escrefsize.expectMoreLength(1); + bc_escref.expectMoreLength(1); + break; + case bc_byte_escape: + bc_escsize.expectMoreLength(1); + // bc_escbyte will have to be counted too + break; + default: + if (is_invoke_init_op(bc)) + { + bc_initref.expectMoreLength(1); + break; + } + bc_which = ref_band_for_self_op(bc, isAload, junkBC); + if (bc_which != nullptr) + { + bc_which->expectMoreLength(1); + break; + } + if (is_branch_op(bc)) + { + bc_label.expectMoreLength(1); + break; + } + bc_which = ref_band_for_op(bc); + if (bc_which != nullptr) + { + bc_which->expectMoreLength(1); + assert(bc != bc_multianewarray); // handled elsewhere + break; + } + if (is_local_slot_op(bc)) + { + bc_local.expectMoreLength(1); + break; + } + break; + case bc_end_marker: + // Increment k and test against code_count. + goto doneScanningMethod; + } + } + doneScanningMethod: + { + } + if (aborting()) + break; + } + + // Go through the formality, so we can use it in a regular fashion later: + assert(rp == rp0); + bc_codes.readData((int)(opptr - rp)); + + int i = 0; + + // To size instruction bands correctly, we need info on switches: + bc_case_count.readData((int)all_switch_ops.size()); + for (i = 0; i < (int)all_switch_ops.size(); i++) + { + int caseCount = bc_case_count.getInt(); + int bc = all_switch_ops.getByte(i); + bc_label.expectMoreLength(1 + caseCount); // default label + cases + bc_case_value.expectMoreLength(bc == bc_tableswitch ? 1 : caseCount); + } + bc_case_count.rewind(); // uses again for output + + all_switch_ops.free(); + + for (i = e_bc_case_value; i <= e_bc_escsize; i++) + { + all_bands[i].readData(); + } + + // The bc_escbyte band is counted by the immediately previous band. + bc_escbyte.readData(bc_escsize.getIntTotal()); +} + +void unpacker::read_bands() +{ + byte *rp0 = rp; + + read_file_header(); + CHECK; + + if (cp.nentries == 0) + { + // read_file_header failed to read a CP, because it copied a JAR. + return; + } + + // Do this after the file header has been read: + check_options(); + + read_cp(); + CHECK; + read_attr_defs(); + CHECK; + read_ics(); + CHECK; + read_classes(); + CHECK; + read_bcs(); + CHECK; + read_files(); +} + +/// CP routines + +entry *&cpool::hashTabRef(byte tag, bytes &b) +{ + uint hash = tag + (int)b.len; + for (int i = 0; i < (int)b.len; i++) + { + hash = hash * 31 + (0xFF & b.ptr[i]); + } + entry **ht = hashTab; + int hlen = hashTabLength; + assert((hlen & (hlen - 1)) == 0); // must be power of 2 + uint hash1 = hash & (hlen - 1); // == hash % hlen + uint hash2 = 0; // lazily computed (requires mod op.) + int probes = 0; + while (ht[hash1] != nullptr) + { + entry &e = *ht[hash1]; + if (e.value.b.equals(b) && e.tag == tag) + break; + if (hash2 == 0) + // Note: hash2 must be relatively prime to hlen, hence the "|1". + hash2 = (((hash % 499) & (hlen - 1)) | 1); + hash1 += hash2; + if (hash1 >= (uint)hlen) + hash1 -= hlen; + assert(hash1 < (uint)hlen); + assert(++probes < hlen); + } + return ht[hash1]; +} + +static void insert_extra(entry *e, ptrlist &extras) +{ + // This ordering helps implement the Pack200 requirement + // of a predictable CP order in the class files produced. + e->inord = NO_INORD; // mark as an "extra" + extras.add(e); + // Note: We will sort the list (by string-name) later. +} + +entry *cpool::ensureUtf8(bytes &b) +{ + entry *&ix = hashTabRef(CONSTANT_Utf8, b); + if (ix != nullptr) + return ix; + // Make one. + if (nentries == maxentries) + { + abort("cp utf8 overflow"); + return &entries[tag_base[CONSTANT_Utf8]]; // return something + } + entry &e = entries[nentries++]; + e.tag = CONSTANT_Utf8; + u->saveTo(e.value.b, b); + assert(&e >= first_extra_entry); + insert_extra(&e, tag_extras[CONSTANT_Utf8]); + return ix = &e; +} + +entry *cpool::ensureClass(bytes &b) +{ + entry *&ix = hashTabRef(CONSTANT_Class, b); + if (ix != nullptr) + return ix; + // Make one. + if (nentries == maxentries) + { + abort("cp class overflow"); + return &entries[tag_base[CONSTANT_Class]]; // return something + } + entry &e = entries[nentries++]; + e.tag = CONSTANT_Class; + e.nrefs = 1; + e.refs = U_NEW(entry *, 1); + ix = &e; // hold my spot in the index + entry *utf = ensureUtf8(b); + e.refs[0] = utf; + e.value.b = utf->value.b; + assert(&e >= first_extra_entry); + insert_extra(&e, tag_extras[CONSTANT_Class]); + return &e; +} + +void cpool::expandSignatures() +{ + int i; + int nsigs = 0; + int nreused = 0; + int first_sig = tag_base[CONSTANT_Signature]; + int sig_limit = tag_count[CONSTANT_Signature] + first_sig; + fillbytes buf; + buf.init(1 << 10); + CHECK; + for (i = first_sig; i < sig_limit; i++) + { + entry &e = entries[i]; + assert(e.tag == CONSTANT_Signature); + int refnum = 0; + bytes form = e.refs[refnum++]->asUtf8(); + buf.empty(); + for (int j = 0; j < (int)form.len; j++) + { + int c = form.ptr[j]; + buf.addByte(c); + if (c == 'L') + { + entry *cls = e.refs[refnum++]; + buf.append(cls->className()->asUtf8()); + } + } + assert(refnum == e.nrefs); + bytes &sig = buf.b; + + // try to find a pre-existing Utf8: + entry *&e2 = hashTabRef(CONSTANT_Utf8, sig); + if (e2 != nullptr) + { + assert(e2->isUtf8(sig)); + e.value.b = e2->value.b; + e.refs[0] = e2; + e.nrefs = 1; + nreused++; + } + else + { + // there is no other replacement; reuse this CP entry as a Utf8 + u->saveTo(e.value.b, sig); + e.tag = CONSTANT_Utf8; + e.nrefs = 0; + e2 = &e; + } + nsigs++; + } + buf.free(); + + // go expunge all references to remaining signatures: + for (i = 0; i < (int)nentries; i++) + { + entry &e = entries[i]; + for (int j = 0; j < e.nrefs; j++) + { + entry *&e2 = e.refs[j]; + if (e2 != nullptr && e2->tag == CONSTANT_Signature) + e2 = e2->refs[0]; + } + } +} + +void cpool::initMemberIndexes() +{ + // This function does NOT refer to any class schema. + // It is totally internal to the cpool. + int i, j; + + // Get the pre-existing indexes: + int nclasses = tag_count[CONSTANT_Class]; + entry *classes = tag_base[CONSTANT_Class] + entries; + int nfields = tag_count[CONSTANT_Fieldref]; + entry *fields = tag_base[CONSTANT_Fieldref] + entries; + int nmethods = tag_count[CONSTANT_Methodref]; + entry *methods = tag_base[CONSTANT_Methodref] + entries; + + int *field_counts = T_NEW(int, nclasses); + int *method_counts = T_NEW(int, nclasses); + cpindex *all_indexes = U_NEW(cpindex, nclasses * 2); + entry **field_ix = U_NEW(entry *, add_size(nfields, nclasses)); + entry **method_ix = U_NEW(entry *, add_size(nmethods, nclasses)); + + for (j = 0; j < nfields; j++) + { + entry &f = fields[j]; + i = f.memberClass()->inord; + assert(i < nclasses); + field_counts[i]++; + } + for (j = 0; j < nmethods; j++) + { + entry &m = methods[j]; + i = m.memberClass()->inord; + assert(i < nclasses); + method_counts[i]++; + } + + int fbase = 0, mbase = 0; + for (i = 0; i < nclasses; i++) + { + int fc = field_counts[i]; + int mc = method_counts[i]; + all_indexes[i * 2 + 0].init(fc, field_ix + fbase, CONSTANT_Fieldref + SUBINDEX_BIT); + all_indexes[i * 2 + 1].init(mc, method_ix + mbase, CONSTANT_Methodref + SUBINDEX_BIT); + // reuse field_counts and member_counts as fill pointers: + field_counts[i] = fbase; + method_counts[i] = mbase; + fbase += fc + 1; + mbase += mc + 1; + // (the +1 leaves a space between every subarray) + } + assert(fbase == nfields + nclasses); + assert(mbase == nmethods + nclasses); + + for (j = 0; j < nfields; j++) + { + entry &f = fields[j]; + i = f.memberClass()->inord; + field_ix[field_counts[i]++] = &f; + } + for (j = 0; j < nmethods; j++) + { + entry &m = methods[j]; + i = m.memberClass()->inord; + method_ix[method_counts[i]++] = &m; + } + + member_indexes = all_indexes; + + // Free intermediate buffers. + u->free_temps(); +} + +void entry::requestOutputIndex(cpool &cp, int req) +{ + assert(outputIndex <= NOT_REQUESTED); // must not have assigned indexes yet + if (tag == CONSTANT_Signature) + { + ref(0)->requestOutputIndex(cp, req); + return; + } + assert(req == REQUESTED || req == REQUESTED_LDC); + if (outputIndex != NOT_REQUESTED) + { + if (req == REQUESTED_LDC) + outputIndex = req; // this kind has precedence + return; + } + outputIndex = req; + // assert(!cp.outputEntries.contains(this)); + assert(tag != CONSTANT_Signature); + cp.outputEntries.add(this); + for (int j = 0; j < nrefs; j++) + { + ref(j)->requestOutputIndex(cp); + } +} + +void cpool::resetOutputIndexes() +{ + int i; + int noes = outputEntries.length(); + entry **oes = (entry **)outputEntries.base(); + for (i = 0; i < noes; i++) + { + entry &e = *oes[i]; + e.outputIndex = NOT_REQUESTED; + } + outputIndexLimit = 0; + outputEntries.empty(); +} + +static const byte TAG_ORDER[CONSTANT_Limit] = {0, 1, 0, 2, 3, 4, 5, 7, 6, 10, 11, 12, 9, 8}; + +extern "C" int outputEntry_cmp(const void *e1p, const void *e2p) +{ + // Sort entries according to the Pack200 rules for deterministic + // constant pool ordering. + // + // The four sort keys as follows, in order of decreasing importance: + // 1. ldc first, then non-ldc guys + // 2. normal cp_All entries by input order (i.e., address order) + // 3. after that, extra entries by lexical order (as in tag_extras[*]) + entry &e1 = *(entry *)*(void **)e1p; + entry &e2 = *(entry *)*(void **)e2p; + int oi1 = e1.outputIndex; + int oi2 = e2.outputIndex; + assert(oi1 == REQUESTED || oi1 == REQUESTED_LDC); + assert(oi2 == REQUESTED || oi2 == REQUESTED_LDC); + if (oi1 != oi2) + { + if (oi1 == REQUESTED_LDC) + return 0 - 1; + if (oi2 == REQUESTED_LDC) + return 1 - 0; + // Else fall through; neither is an ldc request. + } + if (e1.inord != NO_INORD || e2.inord != NO_INORD) + { + // One or both is normal. Use input order. + if (&e1 > &e2) + return 1 - 0; + if (&e1 < &e2) + return 0 - 1; + return 0; // equal pointers + } + // Both are extras. Sort by tag and then by value. + if (e1.tag != e2.tag) + { + return TAG_ORDER[e1.tag] - TAG_ORDER[e2.tag]; + } + // If the tags are the same, use string comparison. + return compare_Utf8_chars(e1.value.b, e2.value.b); +} + +void cpool::computeOutputIndexes() +{ + int i; + + int noes = outputEntries.length(); + entry **oes = (entry **)outputEntries.base(); + + // Sort the output constant pool into the order required by Pack200. + PTRLIST_QSORT(outputEntries, outputEntry_cmp); + + // Allocate a new index for each entry that needs one. + // We do this in two passes, one for LDC entries and one for the rest. + int nextIndex = 1; // always skip index #0 in output cpool + for (i = 0; i < noes; i++) + { + entry &e = *oes[i]; + assert(e.outputIndex == REQUESTED || e.outputIndex == REQUESTED_LDC); + e.outputIndex = nextIndex++; + if (e.isDoubleWord()) + nextIndex++; // do not use the next index + } + outputIndexLimit = nextIndex; +} + +// Unpacker Start + +const char str_tf[] = "true\0false"; +#undef STR_TRUE +#undef STR_FALSE +#define STR_TRUE (&str_tf[0]) +#define STR_FALSE (&str_tf[5]) + +const char *unpacker::get_option(const char *prop) +{ + if (prop == nullptr) + return nullptr; + if (strcmp(prop, UNPACK_DEFLATE_HINT) == 0) + { + return deflate_hint_or_zero == 0 ? nullptr : STR_TF(deflate_hint_or_zero > 0); +#ifdef HAVE_STRIP + } + else if (strcmp(prop, UNPACK_STRIP_COMPILE) == 0) + { + return STR_TF(strip_compile); + } + else if (strcmp(prop, UNPACK_STRIP_DEBUG) == 0) + { + return STR_TF(strip_debug); + } + else if (strcmp(prop, UNPACK_STRIP_JCOV) == 0) + { + return STR_TF(strip_jcov); +#endif /*HAVE_STRIP*/ + } + else if (strcmp(prop, UNPACK_REMOVE_PACKFILE) == 0) + { + return STR_TF(remove_packfile); + } + else if (strcmp(prop, DEBUG_VERBOSE) == 0) + { + return saveIntStr(verbose); + } + else if (strcmp(prop, UNPACK_MODIFICATION_TIME) == 0) + { + return (modification_time_or_zero == 0) ? nullptr + : saveIntStr(modification_time_or_zero); + } + else + { + return NULL; // unknown option ignore + } +} + +bool unpacker::set_option(const char *prop, const char *value) +{ + if (prop == NULL) + return false; + if (strcmp(prop, UNPACK_DEFLATE_HINT) == 0) + { + deflate_hint_or_zero = + ((value == nullptr || strcmp(value, "keep") == 0) ? 0 : BOOL_TF(value) ? +1 : -1); +#ifdef HAVE_STRIP + } + else if (strcmp(prop, UNPACK_STRIP_COMPILE) == 0) + { + strip_compile = STR_TF(value); + } + else if (strcmp(prop, UNPACK_STRIP_DEBUG) == 0) + { + strip_debug = STR_TF(value); + } + else if (strcmp(prop, UNPACK_STRIP_JCOV) == 0) + { + strip_jcov = STR_TF(value); +#endif /*HAVE_STRIP*/ + } + else if (strcmp(prop, UNPACK_REMOVE_PACKFILE) == 0) + { + remove_packfile = STR_TF(value); + } + else if (strcmp(prop, DEBUG_VERBOSE) == 0) + { + verbose = (value == nullptr) ? 0 : atoi(value); + } + else if (strcmp(prop, UNPACK_MODIFICATION_TIME) == 0) + { + if (value == nullptr || (strcmp(value, "keep") == 0)) + { + modification_time_or_zero = 0; + } + else if (strcmp(value, "now") == 0) + { + time_t now; + time(&now); + modification_time_or_zero = (int)now; + } + else + { + modification_time_or_zero = atoi(value); + if (modification_time_or_zero == 0) + modification_time_or_zero = 1; // make non-zero + } + } + else + { + return false; // unknown option ignore + } + return true; +} + +// Deallocate all internal storage and reset to a clean state. +// Do not disturb any input or output connections, including +// infileptr, infileno, inbytes, read_input_fn, jarout, or errstrm. +// Do not reset any unpack options. +void unpacker::reset() +{ + bytes_read_before_reset += bytes_read; + bytes_written_before_reset += bytes_written; + files_written_before_reset += files_written; + classes_written_before_reset += classes_written; + segments_read_before_reset += 1; + if (verbose >= 2) + { + fprintf(stderr, "After segment %d, " LONG_LONG_FORMAT + " bytes read and " LONG_LONG_FORMAT " bytes written.\n", + segments_read_before_reset - 1, bytes_read_before_reset, + bytes_written_before_reset); + fprintf(stderr, + "After segment %d, %d files (of which %d are classes) written to output.\n", + segments_read_before_reset - 1, files_written_before_reset, + classes_written_before_reset); + if (archive_next_count != 0) + { + fprintf(stderr, "After segment %d, %d segment%s remaining (estimated).\n", + segments_read_before_reset - 1, archive_next_count, + archive_next_count == 1 ? "" : "s"); + } + } + + unpacker save_u = (*this); // save bytewise image + infileptr = nullptr; // make asserts happy + jarout = nullptr; // do not close the output jar + gzin = nullptr; // do not close the input gzip stream + this->free(); + this->init(read_input_fn); + +// restore selected interface state: +#define SAVE(x) this->x = save_u.x + SAVE(infileptr); // buffered + SAVE(infileno); // unbuffered + SAVE(inbytes); // direct + SAVE(jarout); + SAVE(gzin); + SAVE(verbose); // verbose level, 0 means no output + SAVE(strip_compile); + SAVE(strip_debug); + SAVE(strip_jcov); + SAVE(remove_packfile); + SAVE(deflate_hint_or_zero); // ==0 means not set, otherwise -1 or 1 + SAVE(modification_time_or_zero); + SAVE(bytes_read_before_reset); + SAVE(bytes_written_before_reset); + SAVE(files_written_before_reset); + SAVE(classes_written_before_reset); + SAVE(segments_read_before_reset); +#undef SAVE + // Note: If we use strip_names, watch out: They get nuked here. +} + +void unpacker::init(read_input_fn_t input_fn) +{ + int i; + BYTES_OF(*this).clear(); + this->u = this; // self-reference for U_NEW macro + read_input_fn = input_fn; + all_bands = band::makeBands(this); + // Make a default jar buffer; caller may safely overwrite it. + jarout = U_NEW(jar, 1); + jarout->init(this); + for (i = 0; i < ATTR_CONTEXT_LIMIT; i++) + attr_defs[i].u = u; // set up outer ptr +} + +const char *unpacker::get_abort_message() +{ + return abort_message; +} + +void unpacker::dump_options() +{ + static const char *opts[] = { + UNPACK_DEFLATE_HINT, +#ifdef HAVE_STRIP + UNPACK_STRIP_COMPILE, UNPACK_STRIP_DEBUG, UNPACK_STRIP_JCOV, +#endif /*HAVE_STRIP*/ + UNPACK_REMOVE_PACKFILE, DEBUG_VERBOSE, UNPACK_MODIFICATION_TIME, nullptr}; + for (int i = 0; opts[i] != nullptr; i++) + { + const char *str = get_option(opts[i]); + if (str == nullptr) + { + if (verbose == 0) + continue; + str = "(not set)"; + } + fprintf(stderr, "%s=%s\n", opts[i], str); + } +} + +// Usage: unpack a byte buffer +// packptr is a reference to byte buffer containing a +// packed file and len is the length of the buffer. +// If nullptr, the callback is used to fill an internal buffer. +void unpacker::start(void *packptr, size_t len) +{ + if (packptr != nullptr && len != 0) + { + inbytes.set((byte *)packptr, len); + } + read_bands(); +} + +void unpacker::check_options() +{ + const char *strue = "true"; + const char *sfalse = "false"; + if (deflate_hint_or_zero != 0) + { + bool force_deflate_hint = (deflate_hint_or_zero > 0); + if (force_deflate_hint) + default_file_options |= FO_DEFLATE_HINT; + else + default_file_options &= ~FO_DEFLATE_HINT; + // Turn off per-file deflate hint by force. + suppress_file_options |= FO_DEFLATE_HINT; + } + if (modification_time_or_zero != 0) + { + default_file_modtime = modification_time_or_zero; + // Turn off per-file modtime by force. + archive_options &= ~AO_HAVE_FILE_MODTIME; + } + // %%% strip_compile, etc... +} + +// classfile writing + +void unpacker::reset_cur_classfile() +{ + // set defaults + cur_class_minver = default_class_minver; + cur_class_majver = default_class_majver; + + // reset constant pool state + cp.resetOutputIndexes(); + + // reset fixups + class_fixup_type.empty(); + class_fixup_offset.empty(); + class_fixup_ref.empty(); + requested_ics.empty(); +} + +cpindex *cpool::getKQIndex() +{ + char ch = '?'; + if (u->cur_descr != nullptr) + { + entry *type = u->cur_descr->descrType(); + ch = type->value.b.ptr[0]; + } + byte tag = CONSTANT_Integer; + switch (ch) + { + case 'L': + tag = CONSTANT_String; + break; + case 'I': + tag = CONSTANT_Integer; + break; + case 'J': + tag = CONSTANT_Long; + break; + case 'F': + tag = CONSTANT_Float; + break; + case 'D': + tag = CONSTANT_Double; + break; + case 'B': + case 'S': + case 'C': + case 'Z': + tag = CONSTANT_Integer; + break; + default: + abort("bad KQ reference"); + break; + } + return getIndex(tag); +} + +uint unpacker::to_bci(uint bii) +{ + uint len = bcimap.length(); + uint *map = (uint *)bcimap.base(); + assert(len > 0); // must be initialized before using to_bci + if (bii < len) + return map[bii]; + // Else it's a fractional or out-of-range BCI. + uint key = bii - len; + for (int i = len;; i--) + { + if (map[i - 1] - (i - 1) <= key) + break; + else + --bii; + } + return bii; +} + +void unpacker::put_stackmap_type() +{ + int tag = code_StackMapTable_T.getByte(); + putu1(tag); + switch (tag) + { + case 7: // (7) [RCH] + putref(code_StackMapTable_RC.getRef()); + break; + case 8: // (8) [PH] + putu2(to_bci(code_StackMapTable_P.getInt())); + break; + } +} + +// Functions for writing code. + +void unpacker::put_label(int curIP, int size) +{ + code_fixup_type.addByte(size); + code_fixup_offset.add((int)put_empty(size)); + code_fixup_source.add(curIP); +} + +inline // called exactly once => inline + void +unpacker::write_bc_ops() +{ + bcimap.empty(); + code_fixup_type.empty(); + code_fixup_offset.empty(); + code_fixup_source.empty(); + + band *bc_which; + + byte *opptr = bc_codes.curRP(); + // No need for oplimit, since the codes are pre-counted. + + size_t codeBase = wpoffset(); + + bool isAload; // copy-out result + int origBC; + + entry *thisClass = cur_class; + entry *superClass = cur_super; + entry *newClass = nullptr; // class of last _new opcode + + // overwrite any prior index on these bands; it changes w/ current class: + bc_thisfield.setIndex(cp.getFieldIndex(thisClass)); + bc_thismethod.setIndex(cp.getMethodIndex(thisClass)); + if (superClass != nullptr) + { + bc_superfield.setIndex(cp.getFieldIndex(superClass)); + bc_supermethod.setIndex(cp.getMethodIndex(superClass)); + } + + for (int curIP = 0;; curIP++) + { + int curPC = (int)(wpoffset() - codeBase); + bcimap.add(curPC); + ensure_put_space(10); // covers most instrs w/o further bounds check + int bc = *opptr++ & 0xFF; + + putu1_fast(bc); + // Note: See '--wp' below for pseudo-bytecodes like bc_end_marker. + + bool isWide = false; + if (bc == bc_wide) + { + bc = *opptr++ & 0xFF; + putu1_fast(bc); + isWide = true; + } + switch (bc) + { + case bc_end_marker: + --wp; // not really part of the code + assert(opptr <= bc_codes.maxRP()); + bc_codes.curRP() = opptr; // advance over this in bc_codes + goto doneScanningMethod; + case bc_tableswitch: // apc: (df, lo, hi, (hi-lo+1)*(label)) + case bc_lookupswitch: // apc: (df, nc, nc*(case, label)) + { + int caseCount = bc_case_count.getInt(); + while (((wpoffset() - codeBase) % 4) != 0) + putu1_fast(0); + ensure_put_space(30 + caseCount * 8); + put_label(curIP, 4); // int df = bc_label.getInt(); + if (bc == bc_tableswitch) + { + int lo = bc_case_value.getInt(); + int hi = lo + caseCount - 1; + putu4(lo); + putu4(hi); + for (int j = 0; j < caseCount; j++) + { + put_label(curIP, 4); // int lVal = bc_label.getInt(); + // int cVal = lo + j; + } + } + else + { + putu4(caseCount); + for (int j = 0; j < caseCount; j++) + { + int cVal = bc_case_value.getInt(); + putu4(cVal); + put_label(curIP, 4); // int lVal = bc_label.getInt(); + } + } + assert((int)to_bci(curIP) == curPC); + continue; + } + case bc_iinc: + { + int local = bc_local.getInt(); + int delta = (isWide ? bc_short : bc_byte).getInt(); + if (isWide) + { + putu2(local); + putu2(delta); + } + else + { + putu1_fast(local); + putu1_fast(delta); + } + continue; + } + case bc_sipush: + { + int val = bc_short.getInt(); + putu2(val); + continue; + } + case bc_bipush: + case bc_newarray: + { + int val = bc_byte.getByte(); + putu1_fast(val); + continue; + } + case bc_ref_escape: + { + // Note that insnMap has one entry for this. + --wp; // not really part of the code + int size = bc_escrefsize.getInt(); + entry *ref = bc_escref.getRefN(); + CHECK; + switch (size) + { + case 1: + putu1ref(ref); + break; + case 2: + putref(ref); + break; + default: + assert(false); + } + continue; + } + case bc_byte_escape: + { + // Note that insnMap has one entry for all these bytes. + --wp; // not really part of the code + int size = bc_escsize.getInt(); + ensure_put_space(size); + for (int j = 0; j < size; j++) + putu1_fast(bc_escbyte.getByte()); + continue; + } + default: + if (is_invoke_init_op(bc)) + { + origBC = bc_invokespecial; + entry *classRef; + switch (bc - _invokeinit_op) + { + case _invokeinit_self_option: + classRef = thisClass; + break; + case _invokeinit_super_option: + classRef = superClass; + break; + default: + assert(bc == _invokeinit_op + _invokeinit_new_option); + case _invokeinit_new_option: + classRef = newClass; + break; + } + wp[-1] = origBC; // overwrite with origBC + int coding = bc_initref.getInt(); + // Find the nth overloading of in classRef. + entry *ref = nullptr; + cpindex *ix = (classRef == nullptr) ? nullptr : cp.getMethodIndex(classRef); + for (int j = 0, which_init = 0;; j++) + { + ref = (ix == nullptr) ? nullptr : ix->get(j); + if (ref == nullptr) + break; // oops, bad input + assert(ref->tag == CONSTANT_Methodref); + if (ref->memberDescr()->descrName() == cp.sym[cpool::s_lt_init_gt]) + { + if (which_init++ == coding) + break; + } + } + putref(ref); + continue; + } + bc_which = ref_band_for_self_op(bc, isAload, origBC); + if (bc_which != nullptr) + { + if (!isAload) + { + wp[-1] = origBC; // overwrite with origBC + } + else + { + wp[-1] = bc_aload_0; // overwrite with _aload_0 + // Note: insnMap keeps the _aload_0 separate. + bcimap.add(++curPC); + ++curIP; + putu1_fast(origBC); + } + entry *ref = bc_which->getRef(); + CHECK; + putref(ref); + continue; + } + if (is_branch_op(bc)) + { + // int lVal = bc_label.getInt(); + if (bc < bc_goto_w) + { + put_label(curIP, 2); // putu2(lVal & 0xFFFF); + } + else + { + assert(bc <= bc_jsr_w); + put_label(curIP, 4); // putu4(lVal); + } + assert((int)to_bci(curIP) == curPC); + continue; + } + bc_which = ref_band_for_op(bc); + if (bc_which != nullptr) + { + entry *ref = bc_which->getRefCommon(bc_which->ix, bc_which->nullOK); + CHECK; + if (ref == nullptr && bc_which == &bc_classref) + { + // Shorthand for class self-references. + ref = thisClass; + } + origBC = bc; + switch (bc) + { + case bc_ildc: + case bc_cldc: + case bc_fldc: + case bc_aldc: + origBC = bc_ldc; + break; + case bc_ildc_w: + case bc_cldc_w: + case bc_fldc_w: + case bc_aldc_w: + origBC = bc_ldc_w; + break; + case bc_lldc2_w: + case bc_dldc2_w: + origBC = bc_ldc2_w; + break; + case bc_new: + newClass = ref; + break; + } + wp[-1] = origBC; // overwrite with origBC + if (origBC == bc_ldc) + { + putu1ref(ref); + } + else + { + putref(ref); + } + if (origBC == bc_multianewarray) + { + // Copy the trailing byte also. + int val = bc_byte.getByte(); + putu1_fast(val); + } + else if (origBC == bc_invokeinterface) + { + int argSize = ref->memberDescr()->descrType()->typeSize(); + putu1_fast(1 + argSize); + putu1_fast(0); + } + continue; + } + if (is_local_slot_op(bc)) + { + int local = bc_local.getInt(); + if (isWide) + { + putu2(local); + if (bc == bc_iinc) + { + int iVal = bc_short.getInt(); + putu2(iVal); + } + } + else + { + putu1_fast(local); + if (bc == bc_iinc) + { + int iVal = bc_byte.getByte(); + putu1_fast(iVal); + } + } + continue; + } + // Random bytecode. Just copy it. + assert(bc < bc_bytecode_limit); + } + } +doneScanningMethod: +{ +} + // bcimap.add(curPC); // PC limit is already also in map, from bc_end_marker + + // Armed with a bcimap, we can now fix up all the labels. + for (int i = 0; i < (int)code_fixup_type.size(); i++) + { + int type = code_fixup_type.getByte(i); + byte *bp = wp_at(code_fixup_offset.get(i)); + int curIP = code_fixup_source.get(i); + int destIP = curIP + bc_label.getInt(); + int span = to_bci(destIP) - to_bci(curIP); + switch (type) + { + case 2: + putu2_at(bp, (ushort)span); + break; + case 4: + putu4_at(bp, span); + break; + default: + assert(false); + } + } +} + +inline // called exactly once => inline + void +unpacker::write_code() +{ + int j; + + int max_stack, max_locals, handler_count, cflags; + get_code_header(max_stack, max_locals, handler_count, cflags); + + if (max_stack < 0) + max_stack = code_max_stack.getInt(); + if (max_locals < 0) + max_locals = code_max_na_locals.getInt(); + if (handler_count < 0) + handler_count = code_handler_count.getInt(); + + int siglen = cur_descr->descrType()->typeSize(); + CHECK; + if ((cur_descr_flags & ACC_STATIC) == 0) + siglen++; + max_locals += siglen; + + putu2(max_stack); + putu2(max_locals); + size_t bcbase = put_empty(4); + + // Write the bytecodes themselves. + write_bc_ops(); + CHECK; + + byte *bcbasewp = wp_at(bcbase); + putu4_at(bcbasewp, (int)(wp - (bcbasewp + 4))); // size of code attr + + putu2(handler_count); + for (j = 0; j < handler_count; j++) + { + int bii = code_handler_start_P.getInt(); + putu2(to_bci(bii)); + bii += code_handler_end_PO.getInt(); + putu2(to_bci(bii)); + bii += code_handler_catch_PO.getInt(); + putu2(to_bci(bii)); + putref(code_handler_class_RCN.getRefN()); + CHECK; + } + + julong indexBits = cflags; + if (cflags < 0) + { + bool haveLongFlags = attr_defs[ATTR_CONTEXT_CODE].haveLongFlags(); + indexBits = code_flags_hi.getLong(code_flags_lo, haveLongFlags); + } + write_attrs(ATTR_CONTEXT_CODE, indexBits); +} + +int unpacker::write_attrs(int attrc, julong indexBits) +{ + CHECK_0; + if (indexBits == 0) + { + // Quick short-circuit. + putu2(0); + return 0; + } + + attr_definitions &ad = attr_defs[attrc]; + + int i, j, j2, idx, count; + + int oiCount = 0; + if (ad.isPredefined(X_ATTR_OVERFLOW) && (indexBits & ((julong)1 << X_ATTR_OVERFLOW)) != 0) + { + indexBits -= ((julong)1 << X_ATTR_OVERFLOW); + oiCount = ad.xxx_attr_count().getInt(); + } + + int bitIndexes[X_ATTR_LIMIT_FLAGS_HI]; + int biCount = 0; + + // Fill bitIndexes with index bits, in order. + for (idx = 0; indexBits != 0; idx++, indexBits >>= 1) + { + if ((indexBits & 1) != 0) + bitIndexes[biCount++] = idx; + } + assert(biCount <= (int)lengthof(bitIndexes)); + + // Write a provisional attribute count, perhaps to be corrected later. + int naOffset = (int)wpoffset(); + int na0 = biCount + oiCount; + putu2(na0); + + int na = 0; + for (i = 0; i < na0; i++) + { + if (i < biCount) + idx = bitIndexes[i]; + else + idx = ad.xxx_attr_indexes().getInt(); + assert(ad.isIndex(idx)); + entry *aname = nullptr; + entry *ref; // scratch + size_t abase = put_empty(2 + 4); + CHECK_0; + if (idx < (int)ad.flag_limit && ad.isPredefined(idx)) + { + // Switch on the attrc and idx simultaneously. + switch (ADH_BYTE(attrc, idx)) + { + + case ADH_BYTE(ATTR_CONTEXT_CLASS, X_ATTR_OVERFLOW) : + case ADH_BYTE(ATTR_CONTEXT_FIELD, X_ATTR_OVERFLOW) : + case ADH_BYTE(ATTR_CONTEXT_METHOD, X_ATTR_OVERFLOW) : + case ADH_BYTE(ATTR_CONTEXT_CODE, X_ATTR_OVERFLOW) : + // no attribute at all, so back up on this one + wp = wp_at(abase); + continue; + + case ADH_BYTE(ATTR_CONTEXT_CLASS, CLASS_ATTR_ClassFile_version) : + cur_class_minver = class_ClassFile_version_minor_H.getInt(); + cur_class_majver = class_ClassFile_version_major_H.getInt(); + // back up; not a real attribute + wp = wp_at(abase); + continue; + + case ADH_BYTE(ATTR_CONTEXT_CLASS, CLASS_ATTR_InnerClasses) : + // note the existence of this attr, but save for later + if (cur_class_has_local_ics) + abort("too many InnerClasses attrs"); + cur_class_has_local_ics = true; + wp = wp_at(abase); + continue; + + case ADH_BYTE(ATTR_CONTEXT_CLASS, CLASS_ATTR_SourceFile) : + aname = cp.sym[cpool::s_SourceFile]; + ref = class_SourceFile_RUN.getRefN(); + CHECK_0; + if (ref == nullptr) + { + bytes &n = cur_class->ref(0)->value.b; + // parse n = (/)*?($)* + int pkglen = lastIndexOf(SLASH_MIN, SLASH_MAX, n, (int)n.len) + 1; + bytes prefix = n.slice(pkglen, n.len); + for (;;) + { + // Work backwards, finding all '$', '#', etc. + int dollar = + lastIndexOf(DOLLAR_MIN, DOLLAR_MAX, prefix, (int)prefix.len); + if (dollar < 0) + break; + prefix = prefix.slice(0, dollar); + } + const char *suffix = ".java"; + int len = (int)(prefix.len + strlen(suffix)); + bytes name; + name.set(T_NEW(byte, add_size(len, 1)), len); + name.strcat(prefix).strcat(suffix); + ref = cp.ensureUtf8(name); + } + putref(ref); + break; + + case ADH_BYTE(ATTR_CONTEXT_CLASS, CLASS_ATTR_EnclosingMethod) : + aname = cp.sym[cpool::s_EnclosingMethod]; + putref(class_EnclosingMethod_RC.getRefN()); + putref(class_EnclosingMethod_RDN.getRefN()); + break; + + case ADH_BYTE(ATTR_CONTEXT_FIELD, FIELD_ATTR_ConstantValue) : + aname = cp.sym[cpool::s_ConstantValue]; + putref(field_ConstantValue_KQ.getRefUsing(cp.getKQIndex())); + break; + + case ADH_BYTE(ATTR_CONTEXT_METHOD, METHOD_ATTR_Code) : + aname = cp.sym[cpool::s_Code]; + write_code(); + break; + + case ADH_BYTE(ATTR_CONTEXT_METHOD, METHOD_ATTR_Exceptions) : + aname = cp.sym[cpool::s_Exceptions]; + putu2(count = method_Exceptions_N.getInt()); + for (j = 0; j < count; j++) + { + putref(method_Exceptions_RC.getRefN()); + } + break; + + case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_StackMapTable) : + aname = cp.sym[cpool::s_StackMapTable]; + // (keep this code aligned with its brother in unpacker::read_attrs) + putu2(count = code_StackMapTable_N.getInt()); + for (j = 0; j < count; j++) + { + int tag = code_StackMapTable_frame_T.getByte(); + putu1(tag); + if (tag <= 127) + { + // (64-127) [(2)] + if (tag >= 64) + put_stackmap_type(); + } + else if (tag <= 251) + { + // (247) [(1)(2)] + // (248-251) [(1)] + if (tag >= 247) + putu2(code_StackMapTable_offset.getInt()); + if (tag == 247) + put_stackmap_type(); + } + else if (tag <= 254) + { + // (252) [(1)(2)] + // (253) [(1)(2)(2)] + // (254) [(1)(2)(2)(2)] + putu2(code_StackMapTable_offset.getInt()); + for (int k = (tag - 251); k > 0; k--) + { + put_stackmap_type(); + } + } + else + { + // (255) [(1)NH[(2)]NH[(2)]] + putu2(code_StackMapTable_offset.getInt()); + putu2(j2 = code_StackMapTable_local_N.getInt()); + while (j2-- > 0) + put_stackmap_type(); + putu2(j2 = code_StackMapTable_stack_N.getInt()); + while (j2-- > 0) + put_stackmap_type(); + } + } + break; + + case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_LineNumberTable) : + aname = cp.sym[cpool::s_LineNumberTable]; + putu2(count = code_LineNumberTable_N.getInt()); + for (j = 0; j < count; j++) + { + putu2(to_bci(code_LineNumberTable_bci_P.getInt())); + putu2(code_LineNumberTable_line.getInt()); + } + break; + + case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_LocalVariableTable) : + aname = cp.sym[cpool::s_LocalVariableTable]; + putu2(count = code_LocalVariableTable_N.getInt()); + for (j = 0; j < count; j++) + { + int bii = code_LocalVariableTable_bci_P.getInt(); + int bci = to_bci(bii); + putu2(bci); + bii += code_LocalVariableTable_span_O.getInt(); + putu2(to_bci(bii) - bci); + putref(code_LocalVariableTable_name_RU.getRefN()); + putref(code_LocalVariableTable_type_RS.getRefN()); + putu2(code_LocalVariableTable_slot.getInt()); + } + break; + + case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_LocalVariableTypeTable) : + aname = cp.sym[cpool::s_LocalVariableTypeTable]; + putu2(count = code_LocalVariableTypeTable_N.getInt()); + for (j = 0; j < count; j++) + { + int bii = code_LocalVariableTypeTable_bci_P.getInt(); + int bci = to_bci(bii); + putu2(bci); + bii += code_LocalVariableTypeTable_span_O.getInt(); + putu2(to_bci(bii) - bci); + putref(code_LocalVariableTypeTable_name_RU.getRefN()); + putref(code_LocalVariableTypeTable_type_RS.getRefN()); + putu2(code_LocalVariableTypeTable_slot.getInt()); + } + break; + + case ADH_BYTE(ATTR_CONTEXT_CLASS, X_ATTR_Signature) : + aname = cp.sym[cpool::s_Signature]; + putref(class_Signature_RS.getRefN()); + break; + + case ADH_BYTE(ATTR_CONTEXT_FIELD, X_ATTR_Signature) : + aname = cp.sym[cpool::s_Signature]; + putref(field_Signature_RS.getRefN()); + break; + + case ADH_BYTE(ATTR_CONTEXT_METHOD, X_ATTR_Signature) : + aname = cp.sym[cpool::s_Signature]; + putref(method_Signature_RS.getRefN()); + break; + + case ADH_BYTE(ATTR_CONTEXT_CLASS, X_ATTR_Deprecated) : + case ADH_BYTE(ATTR_CONTEXT_FIELD, X_ATTR_Deprecated) : + case ADH_BYTE(ATTR_CONTEXT_METHOD, X_ATTR_Deprecated) : + aname = cp.sym[cpool::s_Deprecated]; + // no data + break; + } + } + + if (aname == nullptr) + { + // Unparse a compressor-defined attribute. + layout_definition *lo = ad.getLayout(idx); + if (lo == nullptr) + { + abort("bad layout index"); + break; + } + assert((int)lo->idx == idx); + aname = lo->nameEntry; + if (aname == nullptr) + { + bytes nameb; + nameb.set(lo->name); + aname = cp.ensureUtf8(nameb); + // Cache the name entry for next time. + lo->nameEntry = aname; + } + // Execute all the layout elements. + band **bands = lo->bands(); + if (lo->hasCallables()) + { + band &cble = *bands[0]; + assert(cble.le_kind == EK_CBLE); + bands = cble.le_body; + } + putlayout(bands); + } + + if (aname == nullptr) + abort("bad attribute index"); + CHECK_0; + + byte *wp1 = wp; + wp = wp_at(abase); + + // DTRT if this attr is on the strip-list. + // (Note that we emptied the data out of the band first.) + if (ad.strip_names.contains(aname)) + { + continue; + } + + // patch the name and length + putref(aname); + putu4((int)(wp1 - (wp + 4))); // put the attr size + wp = wp1; + na++; // count the attrs actually written + } + + if (na != na0) + // Refresh changed count. + putu2_at(wp_at(naOffset), na); + return na; +} + +void unpacker::write_members(int num, int attrc) +{ + CHECK; + attr_definitions &ad = attr_defs[attrc]; + band &member_flags_hi = ad.xxx_flags_hi(); + band &member_flags_lo = ad.xxx_flags_lo(); + band &member_descr = (&member_flags_hi)[e_field_descr - e_field_flags_hi]; + assert(endsWith(member_descr.name, "_descr")); + assert(endsWith(member_flags_lo.name, "_flags_lo")); + assert(endsWith(member_flags_lo.name, "_flags_lo")); + bool haveLongFlags = ad.haveLongFlags(); + + putu2(num); + julong indexMask = attr_defs[attrc].flagIndexMask(); + for (int i = 0; i < num; i++) + { + julong mflags = member_flags_hi.getLong(member_flags_lo, haveLongFlags); + entry *mdescr = member_descr.getRef(); + cur_descr = mdescr; + putu2(cur_descr_flags = (ushort)(mflags & ~indexMask)); + CHECK; + putref(mdescr->descrName()); + putref(mdescr->descrType()); + write_attrs(attrc, (mflags & indexMask)); + CHECK; + } + cur_descr = nullptr; +} + +extern "C" int raw_address_cmp(const void *p1p, const void *p2p) +{ + void *p1 = *(void **)p1p; + void *p2 = *(void **)p2p; + return (p1 > p2) ? 1 : (p1 < p2) ? -1 : 0; +} + +void unpacker::write_classfile_tail() +{ + cur_classfile_tail.empty(); + set_output(&cur_classfile_tail); + + int i, num; + + attr_definitions &ad = attr_defs[ATTR_CONTEXT_CLASS]; + + bool haveLongFlags = ad.haveLongFlags(); + julong kflags = class_flags_hi.getLong(class_flags_lo, haveLongFlags); + julong indexMask = ad.flagIndexMask(); + + cur_class = class_this.getRef(); + cur_super = class_super.getRef(); + + CHECK; + + if (cur_super == cur_class) + cur_super = nullptr; + // special representation for java/lang/Object + + putu2((ushort)(kflags & ~indexMask)); + putref(cur_class); + putref(cur_super); + + putu2(num = class_interface_count.getInt()); + for (i = 0; i < num; i++) + { + putref(class_interface.getRef()); + } + + write_members(class_field_count.getInt(), ATTR_CONTEXT_FIELD); + write_members(class_method_count.getInt(), ATTR_CONTEXT_METHOD); + CHECK; + + cur_class_has_local_ics = false; // may be set true by write_attrs + + int naOffset = (int)wpoffset(); + int na = write_attrs(ATTR_CONTEXT_CLASS, (kflags & indexMask)); + +// at the very last, choose which inner classes (if any) pertain to k: +#ifdef ASSERT + for (i = 0; i < ic_count; i++) + { + assert(!ics[i].requested); + } +#endif + // First, consult the global table and the local constant pool, + // and decide on the globally implied inner classes. + // (Note that we read the cpool's outputIndex fields, but we + // do not yet write them, since the local IC attribute might + // reverse a global decision to declare an IC.) + assert(requested_ics.length() == 0); // must start out empty + // Always include all members of the current class. + for (inner_class *child = cp.getFirstChildIC(cur_class); child != nullptr; + child = cp.getNextChildIC(child)) + { + child->requested = true; + requested_ics.add(child); + } + // And, for each inner class mentioned in the constant pool, + // include it and all its outers. + int noes = cp.outputEntries.length(); + entry **oes = (entry **)cp.outputEntries.base(); + for (i = 0; i < noes; i++) + { + entry &e = *oes[i]; + if (e.tag != CONSTANT_Class) + continue; // wrong sort + for (inner_class *ic = cp.getIC(&e); ic != nullptr; ic = cp.getIC(ic->outer)) + { + if (ic->requested) + break; // already processed + ic->requested = true; + requested_ics.add(ic); + } + } + int local_ics = requested_ics.length(); + // Second, consult a local attribute (if any) and adjust the global set. + inner_class *extra_ics = nullptr; + int num_extra_ics = 0; + if (cur_class_has_local_ics) + { + // adjust the set of ICs by symmetric set difference w/ the locals + num_extra_ics = class_InnerClasses_N.getInt(); + if (num_extra_ics == 0) + { + // Explicit zero count has an irregular meaning: It deletes the attr. + local_ics = 0; // (short-circuit all tests of requested bits) + } + else + { + extra_ics = T_NEW(inner_class, num_extra_ics); + // Note: extra_ics will be freed up by next call to get_next_file(). + } + } + for (i = 0; i < num_extra_ics; i++) + { + inner_class &extra_ic = extra_ics[i]; + extra_ic.inner = class_InnerClasses_RC.getRef(); + CHECK; + // Find the corresponding equivalent global IC: + inner_class *global_ic = cp.getIC(extra_ic.inner); + int flags = class_InnerClasses_F.getInt(); + if (flags == 0) + { + // The extra IC is simply a copy of a global IC. + if (global_ic == nullptr) + { + abort("bad reference to inner class"); + break; + } + extra_ic = (*global_ic); // fill in rest of fields + } + else + { + flags &= ~ACC_IC_LONG_FORM; // clear high bit if set to get clean zero + extra_ic.flags = flags; + extra_ic.outer = class_InnerClasses_outer_RCN.getRefN(); + extra_ic.name = class_InnerClasses_name_RUN.getRefN(); + // Detect if this is an exact copy of the global tuple. + if (global_ic != nullptr) + { + if (global_ic->flags != extra_ic.flags || global_ic->outer != extra_ic.outer || + global_ic->name != extra_ic.name) + { + global_ic = nullptr; // not really the same, so break the link + } + } + } + if (global_ic != nullptr && global_ic->requested) + { + // This local repetition reverses the globally implied request. + global_ic->requested = false; + extra_ic.requested = false; + local_ics -= 1; + } + else + { + // The global either does not exist, or is not yet requested. + extra_ic.requested = true; + local_ics += 1; + } + } + // Finally, if there are any that survived, put them into an attribute. + // (Note that a zero-count attribute is always deleted.) + // The putref calls below will tell the constant pool to add any + // necessary local CP references to support the InnerClasses attribute. + // This step must be the last round of additions to the local CP. + if (local_ics > 0) + { + // append the new attribute: + putref(cp.sym[cpool::s_InnerClasses]); + putu4(2 + 2 * 4 * local_ics); + putu2(local_ics); + PTRLIST_QSORT(requested_ics, raw_address_cmp); + int num_global_ics = requested_ics.length(); + for (i = -num_global_ics; i < num_extra_ics; i++) + { + inner_class *ic; + if (i < 0) + ic = (inner_class *)requested_ics.get(num_global_ics + i); + else + ic = &extra_ics[i]; + if (ic->requested) + { + putref(ic->inner); + putref(ic->outer); + putref(ic->name); + putu2(ic->flags); + } + } + assert(local_ics == 0); // must balance + putu2_at(wp_at(naOffset), ++na); // increment class attr count + } + + // Tidy up global 'requested' bits: + for (i = requested_ics.length(); --i >= 0;) + { + inner_class *ic = (inner_class *)requested_ics.get(i); + ic->requested = false; + } + requested_ics.empty(); + + CHECK; + close_output(); + + // rewrite CP references in the tail + cp.computeOutputIndexes(); + int nextref = 0; + for (i = 0; i < (int)class_fixup_type.size(); i++) + { + int type = class_fixup_type.getByte(i); + byte *fixp = wp_at(class_fixup_offset.get(i)); + entry *e = (entry *)class_fixup_ref.get(nextref++); + int idx = e->getOutputIndex(); + switch (type) + { + case 1: + putu1_at(fixp, idx); + break; + case 2: + putu2_at(fixp, idx); + break; + default: + assert(false); // should not reach here + } + } + CHECK; +} + +void unpacker::write_classfile_head() +{ + cur_classfile_head.empty(); + set_output(&cur_classfile_head); + + putu4(JAVA_MAGIC); + putu2(cur_class_minver); + putu2(cur_class_majver); + putu2(cp.outputIndexLimit); + + int checkIndex = 1; + int noes = cp.outputEntries.length(); + entry **oes = (entry **)cp.outputEntries.base(); + for (int i = 0; i < noes; i++) + { + entry &e = *oes[i]; + assert(e.getOutputIndex() == checkIndex++); + byte tag = e.tag; + assert(tag != CONSTANT_Signature); + putu1(tag); + switch (tag) + { + case CONSTANT_Utf8: + putu2((int)e.value.b.len); + put_bytes(e.value.b); + break; + case CONSTANT_Integer: + case CONSTANT_Float: + putu4(e.value.i); + break; + case CONSTANT_Long: + case CONSTANT_Double: + putu8(e.value.l); + assert(checkIndex++); + break; + case CONSTANT_Class: + case CONSTANT_String: + // just write the ref + putu2(e.refs[0]->getOutputIndex()); + break; + case CONSTANT_Fieldref: + case CONSTANT_Methodref: + case CONSTANT_InterfaceMethodref: + case CONSTANT_NameandType: + putu2(e.refs[0]->getOutputIndex()); + putu2(e.refs[1]->getOutputIndex()); + break; + default: + abort(ERROR_INTERNAL); + } + } + close_output(); +} + +unpacker::file *unpacker::get_next_file() +{ + CHECK_0; + free_temps(); + if (files_remaining == 0) + { + // Leave a clue that we're exhausted. + cur_file.name = nullptr; + cur_file.size = 0; + if (archive_size != 0) + { + julong predicted_size = unsized_bytes_read + archive_size; + if (predicted_size != bytes_read) + abort("archive header had incorrect size"); + } + return nullptr; + } + files_remaining -= 1; + assert(files_written < file_count || classes_written < class_count); + cur_file.name = ""; + cur_file.size = 0; + cur_file.modtime = default_file_modtime; + cur_file.options = default_file_options; + cur_file.data[0].set(nullptr, 0); + cur_file.data[1].set(nullptr, 0); + if (files_written < file_count) + { + entry *e = file_name.getRef(); + CHECK_0; + cur_file.name = e->utf8String(); + bool haveLongSize = ((archive_options & AO_HAVE_FILE_SIZE_HI) != 0); + cur_file.size = file_size_hi.getLong(file_size_lo, haveLongSize); + if ((archive_options & AO_HAVE_FILE_MODTIME) != 0) + cur_file.modtime += file_modtime.getInt(); // relative to archive modtime + if ((archive_options & AO_HAVE_FILE_OPTIONS) != 0) + cur_file.options |= file_options.getInt() & ~suppress_file_options; + } + else if (classes_written < class_count) + { + // there is a class for a missing file record + cur_file.options |= FO_IS_CLASS_STUB; + } + if ((cur_file.options & FO_IS_CLASS_STUB) != 0) + { + assert(classes_written < class_count); + classes_written += 1; + if (cur_file.size != 0) + { + abort("class file size transmitted"); + return nullptr; + } + reset_cur_classfile(); + + // write the meat of the classfile: + write_classfile_tail(); + cur_file.data[1] = cur_classfile_tail.b; + CHECK_0; + + // write the CP of the classfile, second: + write_classfile_head(); + cur_file.data[0] = cur_classfile_head.b; + CHECK_0; + + cur_file.size += cur_file.data[0].len; + cur_file.size += cur_file.data[1].len; + if (cur_file.name[0] == '\0') + { + bytes &prefix = cur_class->ref(0)->value.b; + const char *suffix = ".class"; + int len = (int)(prefix.len + strlen(suffix)); + bytes name; + name.set(T_NEW(byte, add_size(len, 1)), len); + cur_file.name = name.strcat(prefix).strcat(suffix).strval(); + } + } + else + { + // If there is buffered file data, produce a pointer to it. + if (cur_file.size != (size_t)cur_file.size) + { + // Silly size specified. + abort("resource file too large"); + return nullptr; + } + size_t rpleft = input_remaining(); + if (rpleft > 0) + { + if (rpleft > cur_file.size) + rpleft = (size_t)cur_file.size; + cur_file.data[0].set(rp, rpleft); + rp += rpleft; + } + if (rpleft < cur_file.size) + { + // Caller must read the rest. + size_t fleft = (size_t)cur_file.size - rpleft; + bytes_read += fleft; // Credit it to the overall archive size. + } + } + CHECK_0; + bytes_written += cur_file.size; + files_written += 1; + return &cur_file; +} + +// Write a file to jarout. +void unpacker::write_file_to_jar(unpacker::file *f) +{ + size_t htsize = f->data[0].len + f->data[1].len; + julong fsize = f->size; + if (htsize == fsize) + { + jarout->addJarEntry(f->name, f->deflate_hint(), f->modtime, f->data[0], f->data[1]); + } + else + { + assert(input_remaining() == 0); + bytes part1, part2; + part1.len = f->data[0].len; + part1.set(T_NEW(byte, part1.len), part1.len); + part1.copyFrom(f->data[0]); + assert(f->data[1].len == 0); + part2.set(nullptr, 0); + size_t fleft = (size_t)fsize - part1.len; + assert(bytes_read > fleft); // part2 already credited by get_next_file + bytes_read -= fleft; + if (fleft > 0) + { + // Must read some more. + if (live_input) + { + // Stop using the input buffer. Make a new one: + if (free_input) + input.free(); + input.init(fleft > (1 << 12) ? fleft : (1 << 12)); + free_input = true; + live_input = false; + } + else + { + // Make it large enough. + assert(free_input); // must be reallocable + input.ensureSize(fleft); + } + rplimit = rp = input.base(); + CHECK; + input.setLimit(rp + fleft); + if (!ensure_input(fleft)) + abort("EOF reading resource file"); + part2.ptr = input_scan(); + part2.len = input_remaining(); + rplimit = rp = input.base(); + } + jarout->addJarEntry(f->name, f->deflate_hint(), f->modtime, part1, part2); + } + if (verbose >= 3) + { + fprintf(stderr, "Wrote " LONG_LONG_FORMAT " bytes to: %s\n", fsize, f->name); + } +} + +void unpacker::abort(const char *message) +{ + if (message == nullptr) + message = "error unpacking archive"; + if (message[0] == '@') + ++message; + fprintf(stderr, "%s\n", message); + fflush(stderr); + exit(-1); +} diff --git a/depends/pack200/src/unpack.h b/depends/pack200/src/unpack.h new file mode 100644 index 00000000..11f7bbe1 --- /dev/null +++ b/depends/pack200/src/unpack.h @@ -0,0 +1,585 @@ +/* + * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// Global Structures +struct jar; +struct gunzip; +struct band; +struct cpool; +struct entry; +struct cpindex; +struct inner_class; +struct value_stream; + +struct cpindex +{ + uint len; + entry *base1; // base of primary index + entry **base2; // base of secondary index + byte ixTag; // type of entries (!= CONSTANT_None), plus 64 if sub-index + enum + { + SUB_TAG = 64 + }; + + entry *get(uint i); + + void init(int len_, entry *base1_, int ixTag_) + { + len = len_; + base1 = base1_; + base2 = nullptr; + ixTag = ixTag_; + } + void init(int len_, entry **base2_, int ixTag_) + { + len = len_; + base1 = nullptr; + base2 = base2_; + ixTag = ixTag_; + } +}; + +struct cpool +{ + uint nentries; + entry *entries; + entry *first_extra_entry; + uint maxentries; // total allocated size of entries + + // Position and size of each homogeneous subrange: + int tag_count[CONSTANT_Limit]; + int tag_base[CONSTANT_Limit]; + cpindex tag_index[CONSTANT_Limit]; + ptrlist tag_extras[CONSTANT_Limit]; + + cpindex *member_indexes; // indexed by 2*CONSTANT_Class.inord + cpindex *getFieldIndex(entry *classRef); + cpindex *getMethodIndex(entry *classRef); + + inner_class **ic_index; + inner_class **ic_child_index; + inner_class *getIC(entry *inner); + inner_class *getFirstChildIC(entry *outer); + inner_class *getNextChildIC(inner_class *child); + + int outputIndexLimit; // index limit after renumbering + ptrlist outputEntries; // list of entry* needing output idx assigned + + entry **hashTab; + uint hashTabLength; + entry *&hashTabRef(byte tag, bytes &b); + entry *ensureUtf8(bytes &b); + entry *ensureClass(bytes &b); + + // Well-known Utf8 symbols. + enum + { +#define SNAME(n, s) s_##s, + ALL_ATTR_DO(SNAME) +#undef SNAME + s_lt_init_gt, // + s_LIMIT + }; + entry *sym[s_LIMIT]; + + // read counts from hdr, allocate main arrays + enum + { + NUM_COUNTS = 12 + }; + void init(unpacker *u, int counts[NUM_COUNTS]); + + // pointer to outer unpacker, for error checks etc. + unpacker *u; + + int getCount(byte tag) + { + assert((uint)tag < CONSTANT_Limit); + return tag_count[tag]; + } + cpindex *getIndex(byte tag) + { + assert((uint)tag < CONSTANT_Limit); + return &tag_index[tag]; + } + cpindex *getKQIndex(); // uses cur_descr + + void expandSignatures(); + void initMemberIndexes(); + + void computeOutputOrder(); + void computeOutputIndexes(); + void resetOutputIndexes(); + + // error handling + inline void abort(const char *msg); + inline bool aborting(); +}; + +/* + * The unpacker provides the entry points to the unpack engine, + * as well as maintains the state of the engine. + */ +struct unpacker +{ + // One element of the resulting JAR. + struct file + { + const char *name; + julong size; + int modtime; + int options; + bytes data[2]; + // Note: If Sum(data[*].len) < size, + // remaining bytes must be read directly from the input stream. + bool deflate_hint() + { + return ((options & FO_DEFLATE_HINT) != 0); + } + }; + + // global pointer to self, if not running under JNI (not multi-thread safe) + static unpacker *non_mt_current; + + // if running Unix-style, here are the inputs and outputs + FILE *infileptr; // buffered + int infileno; // unbuffered + bytes inbytes; // direct + gunzip *gzin; // gunzip filter, if any + jar *jarout; // output JAR file + + // pointer to self, for U_NEW macro + unpacker *u; + + // private abort message string, allocated to PATH_MAX*2 + const char *abort_message; + ptrlist mallocs; // list of guys to free when we are all done + ptrlist tmallocs; // list of guys to free on next client request + fillbytes smallbuf; // supplies small alloc requests + fillbytes tsmallbuf; // supplies temporary small alloc requests + + // option management members + int verbose; // verbose level, 0 means no output + bool strip_compile; + bool strip_debug; + bool strip_jcov; + bool remove_packfile; + int deflate_hint_or_zero; // ==0 means not set, otherwise -1 or 1 + int modification_time_or_zero; + + // input stream + fillbytes input; // the whole block (size is predicted, has slop too) + bool live_input; // is the data in this block live? + bool free_input; // must the input buffer be freed? + byte *rp; // read pointer (< rplimit <= input.limit()) + byte *rplimit; // how much of the input block has been read? + julong bytes_read; + int unsized_bytes_read; + + // callback to read at least one byte, up to available input + typedef jlong (*read_input_fn_t)(unpacker *self, void *buf, jlong minlen, jlong maxlen); + read_input_fn_t read_input_fn; + + // archive header fields + int magic, minver, majver; + size_t archive_size; + int archive_next_count, archive_options, archive_modtime; + int band_headers_size; + int file_count, attr_definition_count, ic_count, class_count; + int default_class_minver, default_class_majver; + int default_file_options, suppress_file_options; // not header fields + int default_archive_modtime, default_file_modtime; // not header fields + int code_count; // not a header field + int files_remaining; // not a header field + + // engine state + band *all_bands; // indexed by band_number + byte *meta_rp; // read-pointer into (copy of) band_headers + cpool cp; // all constant pool information + inner_class *ics; // InnerClasses + + // output stream + bytes output; // output block (either classfile head or tail) + byte *wp; // write pointer (< wplimit == output.limit()) + byte *wpbase; // write pointer starting address (<= wp) + byte *wplimit; // how much of the output block has been written? + + // output state + file cur_file; + entry *cur_class; // CONSTANT_Class entry + entry *cur_super; // CONSTANT_Class entry or nullptr + entry *cur_descr; // CONSTANT_NameandType entry + int cur_descr_flags; // flags corresponding to cur_descr + int cur_class_minver, cur_class_majver; + bool cur_class_has_local_ics; + fillbytes cur_classfile_head; + fillbytes cur_classfile_tail; + int files_written; // also tells which file we're working on + int classes_written; // also tells which class we're working on + julong bytes_written; + intlist bcimap; + fillbytes class_fixup_type; + intlist class_fixup_offset; + ptrlist class_fixup_ref; + fillbytes code_fixup_type; // which format of branch operand? + intlist code_fixup_offset; // location of operand needing fixup + intlist code_fixup_source; // encoded ID of branch insn + ptrlist requested_ics; // which ics need output? + + // stats pertaining to multiple segments (updated on reset) + julong bytes_read_before_reset; + julong bytes_written_before_reset; + int files_written_before_reset; + int classes_written_before_reset; + int segments_read_before_reset; + + // attribute state + struct layout_definition + { + uint idx; // index (0..31...) which identifies this layout + const char *name; // name of layout + entry *nameEntry; + const char *layout; // string of layout (not yet parsed) + band **elems; // array of top-level layout elems (or callables) + + bool hasCallables() + { + return layout[0] == '['; + } + band **bands() + { + assert(elems != nullptr); + return elems; + } + }; + struct attr_definitions + { + unpacker *u; // pointer to self, for U_NEW macro + int xxx_flags_hi_bn; // locator for flags, count, indexes, calls bands + int attrc; // ATTR_CONTEXT_CLASS, etc. + uint flag_limit; // 32 or 63, depending on archive_options bit + julong predef; // mask of built-in definitions + julong redef; // mask of local flag definitions or redefinitions + ptrlist layouts; // local (compressor-defined) defs, in index order + int flag_count[X_ATTR_LIMIT_FLAGS_HI]; + intlist overflow_count; + ptrlist strip_names; // what attribute names are being stripped? + ptrlist band_stack; // Temp., used during layout parsing. + ptrlist calls_to_link; // (ditto) + int bands_made; // (ditto) + + void free() + { + layouts.free(); + overflow_count.free(); + strip_names.free(); + band_stack.free(); + calls_to_link.free(); + } + + // Locate the five fixed bands. + band &xxx_flags_hi(); + band &xxx_flags_lo(); + band &xxx_attr_count(); + band &xxx_attr_indexes(); + band &xxx_attr_calls(); + band &fixed_band(int e_class_xxx); + + // Register a new layout, and make bands for it. + layout_definition *defineLayout(int idx, const char *name, const char *layout); + layout_definition *defineLayout(int idx, entry *nameEntry, const char *layout); + band **buildBands(layout_definition *lo); + + // Parse a layout string or part of one, recursively if necessary. + const char *parseLayout(const char *lp, band **&res, int curCble); + const char *parseNumeral(const char *lp, int &res); + const char *parseIntLayout(const char *lp, band *&res, byte le_kind, + bool can_be_signed = false); + band **popBody(int band_stack_base); // pops a body off band_stack + + // Read data into the bands of the idx-th layout. + void readBandData(int idx); // parse layout, make bands, read data + void readBandData(band **body, uint count); // recursive helper + + layout_definition *getLayout(uint idx) + { + if (idx >= (uint)layouts.length()) + return nullptr; + return (layout_definition *)layouts.get(idx); + } + + void setHaveLongFlags(bool z) + { + assert(flag_limit == 0); // not set up yet + flag_limit = (z ? X_ATTR_LIMIT_FLAGS_HI : X_ATTR_LIMIT_NO_FLAGS_HI); + } + bool haveLongFlags() + { + assert(flag_limit == X_ATTR_LIMIT_NO_FLAGS_HI || + flag_limit == X_ATTR_LIMIT_FLAGS_HI); + return flag_limit == X_ATTR_LIMIT_FLAGS_HI; + } + + // Return flag_count if idx is predef and not redef, else zero. + int predefCount(uint idx); + + bool isRedefined(uint idx) + { + if (idx >= flag_limit) + return false; + return (bool)((redef >> idx) & 1); + } + bool isPredefined(uint idx) + { + if (idx >= flag_limit) + return false; + return (bool)(((predef & ~redef) >> idx) & 1); + } + julong flagIndexMask() + { + return (predef | redef); + } + bool isIndex(uint idx) + { + assert(flag_limit != 0); // must be set up already + if (idx < flag_limit) + return (bool)(((predef | redef) >> idx) & 1); + else + return (idx - flag_limit < (uint)overflow_count.length()); + } + int &getCount(uint idx) + { + assert(isIndex(idx)); + if (idx < flag_limit) + return flag_count[idx]; + else + return overflow_count.get(idx - flag_limit); + } + bool aborting() + { + return u->aborting(); + } + void abort(const char *msg) + { + u->abort(msg); + } + }; + + attr_definitions attr_defs[ATTR_CONTEXT_LIMIT]; + + // Initialization + void init(read_input_fn_t input_fn = nullptr); + // Resets to a known sane state + void reset(); + // Deallocates all storage. + void free(); + // Deallocates temporary storage (volatile after next client call). + void free_temps() + { + tsmallbuf.init(); + tmallocs.freeAll(); + } + + // Option management methods + bool set_option(const char *option, const char *value); + const char *get_option(const char *option); + + void dump_options(); + + // Fetching input. + bool ensure_input(jlong more); + byte *input_scan() + { + return rp; + } + size_t input_remaining() + { + return rplimit - rp; + } + size_t input_consumed() + { + return rp - input.base(); + } + + // Entry points to the unpack engine + static int run(int argc, char **argv); // Unix-style entry point. + void check_options(); + void start(void *packptr = nullptr, size_t len = 0); + void write_file_to_jar(file *f); + void finish(); + + // Public post unpack methods + int get_files_remaining() + { + return files_remaining; + } + int get_segments_remaining() + { + return archive_next_count; + } + file *get_next_file(); // returns nullptr on last file + + // General purpose methods + void *alloc(size_t size) + { + return alloc_heap(size, true); + } + void *temp_alloc(size_t size) + { + return alloc_heap(size, true, true); + } + void *alloc_heap(size_t size, bool smallOK = false, bool temp = false); + void saveTo(bytes &b, const char *str) + { + saveTo(b, (byte *)str, strlen(str)); + } + void saveTo(bytes &b, bytes &data) + { + saveTo(b, data.ptr, data.len); + } + void saveTo(bytes &b, byte *ptr, size_t len); //{ b.ptr = U_NEW...} + const char *saveStr(const char *str) + { + bytes buf; + saveTo(buf, str); + return buf.strval(); + } + const char *saveIntStr(int num) + { + char buf[30]; + sprintf(buf, "%d", num); + return saveStr(buf); + } + const char *get_abort_message(); + void abort(const char *s = nullptr); + bool aborting() + { + return abort_message != nullptr; + } + static unpacker *current(); // find current instance + + // Output management + void set_output(fillbytes *which) + { + assert(wp == nullptr); + which->ensureSize(1 << 12); // covers the average classfile + wpbase = which->base(); + wp = which->limit(); + wplimit = which->end(); + } + fillbytes *close_output(fillbytes *which = nullptr); // inverse of set_output + + // These take an implicit parameter of wp/wplimit, and resize as necessary: + byte *put_space(size_t len); // allocates space at wp, returns pointer + size_t put_empty(size_t s) + { + byte *p = put_space(s); + return p - wpbase; + } + void ensure_put_space(size_t len); + void put_bytes(bytes &b) + { + b.writeTo(put_space(b.len)); + } + void putu1(int n) + { + putu1_at(put_space(1), n); + } + void putu1_fast(int n) + { + putu1_at(wp++, n); + } + void putu2(int n); // { putu2_at(put_space(2), n); } + void putu4(int n); // { putu4_at(put_space(4), n); } + void putu8(jlong n); // { putu8_at(put_space(8), n); } + void putref(entry *e); // { putu2_at(put_space(2), putref_index(e, 2)); } + void putu1ref(entry *e); // { putu1_at(put_space(1), putref_index(e, 1)); } + int putref_index(entry *e, int size); // size in [1..2] + void put_label(int curIP, int size); // size in {2,4} + void putlayout(band **body); + void put_stackmap_type(); + + size_t wpoffset() + { + return (size_t)(wp - wpbase); + } // (unvariant across overflow) + byte *wp_at(size_t offset) + { + return wpbase + offset; + } + uint to_bci(uint bii); + void get_code_header(int &max_stack, int &max_na_locals, int &handler_count, int &cflags); + band *ref_band_for_self_op(int bc, bool &isAloadVar, int &origBCVar); + band *ref_band_for_op(int bc); + + // Definitions of standard classfile int formats: + static void putu1_at(byte *wp, int n) + { + assert(n == (n & 0xFF)); + wp[0] = n; + } + static void putu2_at(byte *wp, int n); + static void putu4_at(byte *wp, int n); + static void putu8_at(byte *wp, jlong n); + + // Private stuff + void reset_cur_classfile(); + void write_classfile_tail(); + void write_classfile_head(); + void write_code(); + void write_bc_ops(); + void write_members(int num, int attrc); // attrc=ATTR_CONTEXT_FIELD/METHOD + int write_attrs(int attrc, julong indexBits); + + // The readers + void read_bands(); + void read_file_header(); + void read_cp(); + void read_cp_counts(value_stream &hdr); + void read_attr_defs(); + void read_ics(); + void read_attrs(int attrc, int obj_count); + void read_classes(); + void read_code_headers(); + void read_bcs(); + void read_bc_ops(); + void read_files(); + void read_Utf8_values(entry *cpMap, int len); + void read_single_words(band &cp_band, entry *cpMap, int len); + void read_double_words(band &cp_bands, entry *cpMap, int len); + void read_single_refs(band &cp_band, byte refTag, entry *cpMap, int len); + void read_double_refs(band &cp_band, byte ref1Tag, byte ref2Tag, entry *cpMap, int len); + void read_signature_values(entry *cpMap, int len); +}; + +inline void cpool::abort(const char *msg) +{ + u->abort(msg); +} +inline bool cpool::aborting() +{ + return u->aborting(); +} diff --git a/depends/pack200/src/utils.cpp b/depends/pack200/src/utils.cpp new file mode 100644 index 00000000..3ea8c92e --- /dev/null +++ b/depends/pack200/src/utils.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef _MSC_VER +#include +#include +#include +#else +#include +#endif + +#include "constants.h" +#include "defines.h" +#include "bytes.h" +#include "utils.h" + +#include "unpack.h" + +void *must_malloc(size_t size) +{ + size_t msize = size; + void *ptr = (msize > PSIZE_MAX) ? nullptr : malloc(msize); + if (ptr != nullptr) + { + memset(ptr, 0, size); + } + else + { + unpack_abort(ERROR_ENOMEM); + } + return ptr; +} + +void unpack_abort(const char *msg, unpacker *u) +{ + if (msg == nullptr) + msg = "corrupt pack file or internal error"; + if (u == nullptr) + u = unpacker::current(); + if (u == nullptr) + { + fprintf(stderr, "Error: unpacker: %s\n", msg); + ::abort(); + return; + } + u->abort(msg); +} + +bool unpack_aborting(unpacker *u) +{ + if (u == nullptr) + u = unpacker::current(); + if (u == nullptr) + { + fprintf(stderr, "Error: unpacker: no current instance\n"); + ::abort(); + return true; + } + return u->aborting(); +} diff --git a/depends/pack200/src/utils.h b/depends/pack200/src/utils.h new file mode 100644 index 00000000..0ce6b7d8 --- /dev/null +++ b/depends/pack200/src/utils.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// Definitions of our util functions + +void *must_malloc(size_t size); + +// overflow management +#define OVERFLOW ((size_t) - 1) +#define PSIZE_MAX (OVERFLOW / 2) /* normal size limit */ + +inline size_t scale_size(size_t size, size_t scale) +{ + return (size > PSIZE_MAX / scale) ? OVERFLOW : size * scale; +} + +inline size_t add_size(size_t size1, size_t size2) +{ + return ((size1 | size2 | (size1 + size2)) > PSIZE_MAX) ? OVERFLOW : size1 + size2; +} + +inline size_t add_size(size_t size1, size_t size2, int size3) +{ + return add_size(add_size(size1, size2), size3); +} + +// These may be expensive, because they have to go via Java TSD, +// if the optional u argument is missing. +struct unpacker; +extern void unpack_abort(const char *msg, unpacker *u = nullptr); +extern bool unpack_aborting(unpacker *u = nullptr); + diff --git a/depends/pack200/src/zip.cpp b/depends/pack200/src/zip.cpp new file mode 100644 index 00000000..f1bc25ad --- /dev/null +++ b/depends/pack200/src/zip.cpp @@ -0,0 +1,610 @@ +/* + * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * Note: Lifted from uncrunch.c from jdk sources + */ +#include +#include +#include +#include + +#include +#include + +#ifndef _MSC_VER +#include +#endif + +#include "defines.h" +#include "bytes.h" +#include "utils.h" + +#include "constants.h" +#include "unpack.h" + +#include "zip.h" + +#ifdef NO_ZLIB + +inline bool jar::deflate_bytes(bytes &head, bytes &tail) +{ + return false; +} +inline uint jar::get_crc32(uint c, uchar *ptr, uint len) +{ + return 0; +} +#define Z_NULL NULL + +#else // Have ZLIB + +#include + +inline uint jar::get_crc32(uint c, uchar *ptr, uint len) +{ + return crc32(c, ptr, len); +} + +#endif // End of ZLIB + +#ifdef sparc +#define SWAP_BYTES(a) ((((a) << 8) & 0xff00) | 0x00ff) & (((a) >> 8) | 0xff00) +#else +#define SWAP_BYTES(a) (a) +#endif + +#define GET_INT_LO(a) SWAP_BYTES(a & 0xFFFF) + +#define GET_INT_HI(a) SWAP_BYTES((a >> 16) & 0xFFFF); + +void jar::init(unpacker *u_) +{ + BYTES_OF(*this).clear(); + u = u_; + u->jarout = this; +} + +// Write data to the ZIP output stream. +void jar::write_data(void *buff, int len) +{ + while (len > 0) + { + int rc = (int)fwrite(buff, 1, len, jarfp); + if (rc <= 0) + { + fprintf(stderr, "Error: write on output file failed err=%d\n", errno); + exit(1); // Called only from the native standalone unpacker + } + output_file_offset += rc; + buff = ((char *)buff) + rc; + len -= rc; + } +} + +void jar::add_to_jar_directory(const char *fname, bool store, int modtime, int len, int clen, + uint32_t crc) +{ + uint fname_length = (uint)strlen(fname); + ushort header[23]; + if (modtime == 0) + modtime = default_modtime; + uint32_t dostime = get_dostime(modtime); + + header[0] = (ushort)SWAP_BYTES(0x4B50); + header[1] = (ushort)SWAP_BYTES(0x0201); + header[2] = (ushort)SWAP_BYTES(0xA); + + // required version + header[3] = (ushort)SWAP_BYTES(0xA); + + // flags 02 = maximum sub-compression flag + header[4] = (store) ? 0x0 : SWAP_BYTES(0x2); + + // Compression method 8=deflate. + header[5] = (store) ? 0x0 : SWAP_BYTES(0x08); + + // Last modified date and time. + header[6] = (ushort)GET_INT_LO(dostime); + header[7] = (ushort)GET_INT_HI(dostime); + + // CRC + header[8] = (ushort)GET_INT_LO(crc); + header[9] = (ushort)GET_INT_HI(crc); + + // Compressed length: + header[10] = (ushort)GET_INT_LO(clen); + header[11] = (ushort)GET_INT_HI(clen); + + // Uncompressed length. + header[12] = (ushort)GET_INT_LO(len); + header[13] = (ushort)GET_INT_HI(len); + + // Filename length + header[14] = (ushort)SWAP_BYTES(fname_length); + // So called "extra field" length. + header[15] = 0; + // So called "comment" length. + header[16] = 0; + // Disk number start + header[17] = 0; + // File flags => binary + header[18] = 0; + // More file flags + header[19] = 0; + header[20] = 0; + // Offset within ZIP file. + header[21] = (ushort)GET_INT_LO(output_file_offset); + header[22] = (ushort)GET_INT_HI(output_file_offset); + + // Copy the whole thing into the central directory. + central_directory.append(header, sizeof(header)); + + // Copy the fname to the header. + central_directory.append(fname, fname_length); + + central_directory_count++; +} + +void jar::write_jar_header(const char *fname, bool store, int modtime, int len, int clen, + uint crc) +{ + uint fname_length = (uint)strlen(fname); + ushort header[15]; + if (modtime == 0) + modtime = default_modtime; + uint32_t dostime = get_dostime(modtime); + + // ZIP LOC magic. + header[0] = (ushort)SWAP_BYTES(0x4B50); + header[1] = (ushort)SWAP_BYTES(0x0403); + + // Version + header[2] = (ushort)SWAP_BYTES(0xA); + + // flags 02 = maximum sub-compression flag + header[3] = (store) ? 0x0 : SWAP_BYTES(0x2); + + // Compression method = deflate + header[4] = (store) ? 0x0 : SWAP_BYTES(0x08); + + // Last modified date and time. + header[5] = (ushort)GET_INT_LO(dostime); + header[6] = (ushort)GET_INT_HI(dostime); + + // CRC + header[7] = (ushort)GET_INT_LO(crc); + header[8] = (ushort)GET_INT_HI(crc); + + // Compressed length: + header[9] = (ushort)GET_INT_LO(clen); + header[10] = (ushort)GET_INT_HI(clen); + + // Uncompressed length. + header[11] = (ushort)GET_INT_LO(len); + header[12] = (ushort)GET_INT_HI(len); + + // Filename length + header[13] = (ushort)SWAP_BYTES(fname_length); + // So called "extra field" length. + header[14] = 0; + + // Write the LOC header to the output file. + write_data(header, (int)sizeof(header)); + + // Copy the fname to the header. + write_data((char *)fname, (int)fname_length); +} + +static const char marker_comment[] = ZIP_ARCHIVE_MARKER_COMMENT; + +void jar::write_central_directory() +{ + bytes mc; + mc.set(marker_comment); + + ushort header[11]; + + // Create the End of Central Directory structure. + header[0] = (ushort)SWAP_BYTES(0x4B50); + header[1] = (ushort)SWAP_BYTES(0x0605); + // disk numbers + header[2] = 0; + header[3] = 0; + // Number of entries in central directory. + header[4] = (ushort)SWAP_BYTES(central_directory_count); + header[5] = (ushort)SWAP_BYTES(central_directory_count); + // Size of the central directory} + header[6] = (ushort)GET_INT_LO((int)central_directory.size()); + header[7] = (ushort)GET_INT_HI((int)central_directory.size()); + // Offset of central directory within disk. + header[8] = (ushort)GET_INT_LO(output_file_offset); + header[9] = (ushort)GET_INT_HI(output_file_offset); + // zipfile comment length; + header[10] = (ushort)SWAP_BYTES((int)mc.len); + + // Write the central directory. + write_data(central_directory.b); + + // Write the End of Central Directory structure. + write_data(header, (int)sizeof(header)); + + // Write the comment. + write_data(mc); +} + +// Public API + +// Open a Jar file and initialize. +void jar::openJarFile(const char *fname) +{ + if (!jarfp) + { + jarfp = fopen(fname, "wb"); + if (!jarfp) + { + fprintf(stderr, "Error: Could not open jar file: %s\n", fname); + exit(3); // Called only from the native standalone unpacker + } + } +} + +// Add a ZIP entry and copy the file data +void jar::addJarEntry(const char *fname, bool deflate_hint, int modtime, bytes &head, + bytes &tail) +{ + int len = (int)(head.len + tail.len); + int clen = 0; + + uint crc = get_crc32(0, Z_NULL, 0); + if (head.len != 0) + crc = get_crc32(crc, (uchar *)head.ptr, (uint)head.len); + if (tail.len != 0) + crc = get_crc32(crc, (uchar *)tail.ptr, (uint)tail.len); + + bool deflate = (deflate_hint && len > 0); + + if (deflate) + { + if (deflate_bytes(head, tail) == false) + { + deflate = false; + } + } + clen = (int)((deflate) ? deflated.size() : len); + add_to_jar_directory(fname, !deflate, modtime, len, clen, crc); + write_jar_header(fname, !deflate, modtime, len, clen, crc); + + if (deflate) + { + write_data(deflated.b); + } + else + { + write_data(head); + write_data(tail); + } +} + +// Add a ZIP entry for a directory name no data +void jar::addDirectoryToJarFile(const char *dir_name) +{ + bool store = true; + add_to_jar_directory((const char *)dir_name, store, default_modtime, 0, 0, 0); + write_jar_header((const char *)dir_name, store, default_modtime, 0, 0, 0); +} + +// Write out the central directory and close the jar file. +void jar::closeJarFile(bool central) +{ + if (jarfp) + { + fflush(jarfp); + if (central) + write_central_directory(); + fflush(jarfp); + fclose(jarfp); + } + reset(); +} + +/* Convert the date y/n/d and time h:m:s to a four byte DOS date and + * time (date in high two bytes, time in low two bytes allowing magnitude + * comparison). + */ +inline uint32_t jar::dostime(int y, int n, int d, int h, int m, int s) +{ + return y < 1980 ? dostime(1980, 1, 1, 0, 0, 0) + : (((uint32_t)y - 1980) << 25) | ((uint32_t)n << 21) | ((uint32_t)d << 16) | + ((uint32_t)h << 11) | ((uint32_t)m << 5) | ((uint32_t)s >> 1); +} + +#ifdef _REENTRANT // solaris +extern "C" struct tm *gmtime_r(const time_t *, struct tm *); +#else +#define gmtime_r(t, s) gmtime(t) +#endif +/* + * Return the Unix time in DOS format + */ +uint32_t jar::get_dostime(int modtime) +{ + // see defines.h + if (modtime != 0 && modtime == modtime_cache) + return dostime_cache; + if (modtime != 0 && default_modtime == 0) + default_modtime = modtime; // catch a reasonable default + time_t t = modtime; + struct tm sbuf; + (void)memset((void *)&sbuf, 0, sizeof(sbuf)); + struct tm *s = gmtime_r(&t, &sbuf); + modtime_cache = modtime; + dostime_cache = + dostime(s->tm_year + 1900, s->tm_mon + 1, s->tm_mday, s->tm_hour, s->tm_min, s->tm_sec); + // printf("modtime %d => %d\n", modtime_cache, dostime_cache); + return dostime_cache; +} + +/* Returns true on success, and will set the clen to the compressed + length, the caller should verify if true and clen less than the + input data +*/ +bool jar::deflate_bytes(bytes &head, bytes &tail) +{ + int len = (int)(head.len + tail.len); + + z_stream zs; + BYTES_OF(zs).clear(); + + // NOTE: the window size should always be -MAX_WBITS normally -15. + // unzip/zipup.c and java/Deflater.c + + int error = + deflateInit2(&zs, Z_BEST_COMPRESSION, Z_DEFLATED, -MAX_WBITS, 8, Z_DEFAULT_STRATEGY); + if (error != Z_OK) + { + /* + switch (error) + { + case Z_MEM_ERROR: + PRINTCR((2, "Error: deflate error : Out of memory \n")); + break; + case Z_STREAM_ERROR: + PRINTCR((2, "Error: deflate error : Invalid compression level \n")); + break; + case Z_VERSION_ERROR: + PRINTCR((2, "Error: deflate error : Invalid version\n")); + break; + default: + PRINTCR((2, "Error: Internal deflate error error = %d\n", error)); + } + */ + return false; + } + + deflated.empty(); + zs.next_out = (uchar *)deflated.grow(len + (len / 2)); + zs.avail_out = (int)deflated.size(); + + zs.next_in = (uchar *)head.ptr; + zs.avail_in = (int)head.len; + + bytes *first = &head; + bytes *last = &tail; + if (last->len == 0) + { + first = nullptr; + last = &head; + } + else if (first->len == 0) + { + first = nullptr; + } + + if (first != nullptr && error == Z_OK) + { + zs.next_in = (uchar *)first->ptr; + zs.avail_in = (int)first->len; + error = deflate(&zs, Z_NO_FLUSH); + } + if (error == Z_OK) + { + zs.next_in = (uchar *)last->ptr; + zs.avail_in = (int)last->len; + error = deflate(&zs, Z_FINISH); + } + if (error == Z_STREAM_END) + { + if (len > (int)zs.total_out) + { + deflated.b.len = zs.total_out; + deflateEnd(&zs); + return true; + } + deflateEnd(&zs); + return false; + } + + deflateEnd(&zs); + return false; +} + +// Callback for fetching data from a GZIP input stream +static jlong read_input_via_gzip(unpacker *u, void *buf, jlong minlen, jlong maxlen) +{ + assert(minlen <= maxlen); // don't talk nonsense + jlong numread = 0; + char *bufptr = (char *)buf; + char *inbuf = u->gzin->inbuf; + size_t inbuflen = sizeof(u->gzin->inbuf); + unpacker::read_input_fn_t read_gzin_fn = (unpacker::read_input_fn_t)u->gzin->read_input_fn; + z_stream &zs = *(z_stream *)u->gzin->zstream; + while (numread < minlen) + { + int readlen = (1 << 16); // pretty arbitrary + if (readlen > (maxlen - numread)) + readlen = (int)(maxlen - numread); + zs.next_out = (uchar *)bufptr; + zs.avail_out = readlen; + if (zs.avail_in == 0) + { + zs.avail_in = (int)read_gzin_fn(u, inbuf, 1, inbuflen); + zs.next_in = (uchar *)inbuf; + } + int error = inflate(&zs, Z_NO_FLUSH); + if (error != Z_OK && error != Z_STREAM_END) + { + u->abort("error inflating input"); + break; + } + int nr = readlen - zs.avail_out; + numread += nr; + bufptr += nr; + assert(numread <= maxlen); + if (error == Z_STREAM_END) + { + enum + { + TRAILER_LEN = 8 + }; + // skip 8-byte trailer + if (zs.avail_in >= TRAILER_LEN) + { + zs.avail_in -= TRAILER_LEN; + } + else + { + // Bug: 5023768,we read past the TRAILER_LEN to see if there is + // any extraneous data, as we dont support concatenated .gz + // files just yet. + int extra = (int)read_gzin_fn(u, inbuf, 1, inbuflen); + zs.avail_in += extra - TRAILER_LEN; + } + // %%% should check final CRC and length here + // %%% should check for concatenated *.gz files here + if (zs.avail_in > 0) + u->abort("garbage after end of deflated input stream"); + // pop this filter off: + u->gzin->free(); + break; + } + } + + // fprintf(u->errstrm, "readInputFn(%d,%d) => %d (gunzip)\n", + // (int)minlen, (int)maxlen, (int)numread); + return numread; +} + +void gunzip::init(unpacker *u_) +{ + BYTES_OF(*this).clear(); + u = u_; + assert(u->gzin == nullptr); // once only, please + read_input_fn = (void *)u->read_input_fn; + zstream = NEW(z_stream, 1); + u->gzin = this; + u->read_input_fn = read_input_via_gzip; +} + +void gunzip::start(int magic) +{ + assert((magic & GZIP_MAGIC_MASK) == GZIP_MAGIC); + int gz_flg = (magic & 0xFF); // keep "flg", discard other 3 bytes + enum + { + FHCRC = (1 << 1), + FEXTRA = (1 << 2), + FNAME = (1 << 3), + FCOMMENT = (1 << 4) + }; + char gz_mtime[4]; + char gz_xfl[1]; + char gz_os[1]; + char gz_extra_len[2]; + char gz_hcrc[2]; + char gz_ignore; + // do not save extra, name, comment + read_fixed_field(gz_mtime, sizeof(gz_mtime)); + read_fixed_field(gz_xfl, sizeof(gz_xfl)); + read_fixed_field(gz_os, sizeof(gz_os)); + if (gz_flg & FEXTRA) + { + read_fixed_field(gz_extra_len, sizeof(gz_extra_len)); + int extra_len = gz_extra_len[0] & 0xFF; + extra_len += (gz_extra_len[1] & 0xFF) << 8; + for (; extra_len > 0; extra_len--) + { + read_fixed_field(&gz_ignore, 1); + } + } + int null_terms = 0; + if (gz_flg & FNAME) + null_terms++; + if (gz_flg & FCOMMENT) + null_terms++; + for (; null_terms; null_terms--) + { + for (;;) + { + gz_ignore = 0; + read_fixed_field(&gz_ignore, 1); + if (gz_ignore == 0) + break; + } + } + if (gz_flg & FHCRC) + read_fixed_field(gz_hcrc, sizeof(gz_hcrc)); + + if (aborting()) + return; + + // now the input stream is ready to read into the inflater + int error = inflateInit2((z_stream *)zstream, -MAX_WBITS); + if (error != Z_OK) + { + abort("cannot create input"); + return; + } +} + +void gunzip::free() +{ + assert(u->gzin == this); + u->gzin = nullptr; + u->read_input_fn = (unpacker::read_input_fn_t) this->read_input_fn; + inflateEnd((z_stream *)zstream); + ::free(zstream); + zstream = nullptr; + ::free(this); +} + +void gunzip::read_fixed_field(char *buf, size_t buflen) +{ + if (aborting()) + return; + jlong nr = ((unpacker::read_input_fn_t)read_input_fn)(u, buf, buflen, buflen); + if ((size_t)nr != buflen) + u->abort("short stream header"); +} diff --git a/depends/pack200/src/zip.h b/depends/pack200/src/zip.h new file mode 100644 index 00000000..1b6a8b02 --- /dev/null +++ b/depends/pack200/src/zip.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +#include +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned char uchar; + +struct unpacker; + +struct jar +{ + // JAR file writer + FILE *jarfp; + int default_modtime; + + // Used by unix2dostime: + int modtime_cache; + uint32_t dostime_cache; + + // Private members + fillbytes central_directory; + ushort central_directory_count; + uint output_file_offset; + fillbytes deflated; // temporary buffer + + // pointer to outer unpacker, for error checks etc. + unpacker *u; + + // Public Methods + void openJarFile(const char *fname); + void addJarEntry(const char *fname, bool deflate_hint, int modtime, bytes &head, + bytes &tail); + void addDirectoryToJarFile(const char *dir_name); + void closeJarFile(bool central); + + void init(unpacker *u_); + + void free() + { + central_directory.free(); + deflated.free(); + } + + void reset() + { + free(); + init(u); + } + + // Private Methods + void write_data(void *ptr, int len); + void write_data(bytes &b) + { + write_data(b.ptr, (int)b.len); + } + void add_to_jar_directory(const char *fname, bool store, int modtime, int len, int clen, + uint32_t crc); + void write_jar_header(const char *fname, bool store, int modtime, int len, int clen, + unsigned int crc); + void write_central_directory(); + uint32_t dostime(int y, int n, int d, int h, int m, int s); + uint32_t get_dostime(int modtime); + + // The definitions of these depend on the NO_ZLIB option: + bool deflate_bytes(bytes &head, bytes &tail); + static uint get_crc32(uint c, unsigned char *ptr, uint len); + + // error handling + void abort(const char *msg) + { + unpack_abort(msg, u); + } + bool aborting() + { + return unpack_aborting(u); + } +}; + +struct gunzip +{ + // optional gzip input stream control block + + // pointer to outer unpacker, for error checks etc. + unpacker *u; + + void *read_input_fn; // underlying byte stream + void *zstream; // inflater state + char inbuf[1 << 14]; // input buffer + + void init(unpacker *u_); // pushes new value on u->read_input_fn + + void free(); + + void start(int magic); + + // private stuff + void read_fixed_field(char *buf, size_t buflen); + + // error handling + void abort(const char *msg) + { + unpack_abort(msg, u); + } + bool aborting() + { + return unpack_aborting(u); + } +}; diff --git a/logic/OneSixUpdate.cpp b/logic/OneSixUpdate.cpp index d0af8b93..73bd9403 100644 --- a/logic/OneSixUpdate.cpp +++ b/logic/OneSixUpdate.cpp @@ -189,5 +189,8 @@ void OneSixUpdate::jarlibFinished() void OneSixUpdate::jarlibFailed() { - emitFailed("Failed to download the binary garbage. Try again. Maybe. IF YOU DARE"); + QStringList failed = jarlibDownloadJob->getFailedFiles(); + QString failed_all = failed.join("\n"); + emitFailed("Failed to download the following files:\n" + failed_all + + "\n\nPlease try again."); } diff --git a/logic/lists/MinecraftVersionList.cpp b/logic/lists/MinecraftVersionList.cpp index 86ba0792..35f7251e 100644 --- a/logic/lists/MinecraftVersionList.cpp +++ b/logic/lists/MinecraftVersionList.cpp @@ -152,7 +152,7 @@ void MCVListLoadTask::executeTask() void MCVListLoadTask::list_downloaded() { - if(vlistReply->error() != QNetworkReply::QNetworkReply::NoError) + if(vlistReply->error() != QNetworkReply::NoError) { vlistReply->deleteLater(); emitFailed("Failed to load Minecraft main version list" + vlistReply->errorString()); diff --git a/logic/net/ByteArrayDownload.cpp b/logic/net/ByteArrayDownload.cpp index 6ae3f121..61ecc298 100644 --- a/logic/net/ByteArrayDownload.cpp +++ b/logic/net/ByteArrayDownload.cpp @@ -31,7 +31,7 @@ void ByteArrayDownload::downloadProgress ( qint64 bytesReceived, qint64 bytesTot void ByteArrayDownload::downloadError ( QNetworkReply::NetworkError error ) { // error happened during download. - // TODO: log the reason why + qDebug() << "URL:" << m_url.toString().toLocal8Bit() << "Network error: " << error; m_status = Job_Failed; } diff --git a/logic/net/DownloadJob.cpp b/logic/net/DownloadJob.cpp index 3acba050..8da1f39b 100644 --- a/logic/net/DownloadJob.cpp +++ b/logic/net/DownloadJob.cpp @@ -7,47 +7,48 @@ #include -ByteArrayDownloadPtr DownloadJob::add ( QUrl url ) +ByteArrayDownloadPtr DownloadJob::add(QUrl url) { - ByteArrayDownloadPtr ptr (new ByteArrayDownload(url)); + ByteArrayDownloadPtr ptr(new ByteArrayDownload(url)); ptr->index_within_job = downloads.size(); downloads.append(ptr); - parts_progress.append(QPair(0,1)); + parts_progress.append(part_info()); total_progress++; return ptr; } -FileDownloadPtr DownloadJob::add ( QUrl url, QString rel_target_path) +FileDownloadPtr DownloadJob::add(QUrl url, QString rel_target_path) { - FileDownloadPtr ptr (new FileDownload(url, rel_target_path)); + FileDownloadPtr ptr(new FileDownload(url, rel_target_path)); ptr->index_within_job = downloads.size(); downloads.append(ptr); - parts_progress.append(QPair(0,1)); + parts_progress.append(part_info()); total_progress++; return ptr; } -CacheDownloadPtr DownloadJob::add ( QUrl url, MetaEntryPtr entry) +CacheDownloadPtr DownloadJob::add(QUrl url, MetaEntryPtr entry) { - CacheDownloadPtr ptr (new CacheDownload(url, entry)); + CacheDownloadPtr ptr(new CacheDownload(url, entry)); ptr->index_within_job = downloads.size(); downloads.append(ptr); - parts_progress.append(QPair(0,1)); + parts_progress.append(part_info()); total_progress++; return ptr; } -void DownloadJob::partSucceeded ( int index ) +void DownloadJob::partSucceeded(int index) { // do progress. all slots are 1 in size at least - auto & slot = parts_progress[index]; - partProgress ( index, slot.second , slot.second ); - + auto &slot = parts_progress[index]; + partProgress(index, slot.total_progress, slot.total_progress); + num_succeeded++; - qDebug() << m_job_name.toLocal8Bit() << " progress: " << num_succeeded << "/" << downloads.size(); - if(num_failed + num_succeeded == downloads.size()) + qDebug() << m_job_name.toLocal8Bit() << " progress: " << num_succeeded << "/" + << downloads.size(); + if (num_failed + num_succeeded == downloads.size()) { - if(num_failed) + if (num_failed) { qDebug() << m_job_name.toLocal8Bit() << " failed."; emit failed(); @@ -60,39 +61,65 @@ void DownloadJob::partSucceeded ( int index ) } } -void DownloadJob::partFailed ( int index ) +void DownloadJob::partFailed(int index) { - num_failed++; - if(num_failed + num_succeeded == downloads.size()) + auto &slot = parts_progress[index]; + if (slot.failures == 3) { - qDebug() << m_job_name.toLocal8Bit() << " failed."; - emit failed(); + qDebug() << "Part " << index << " failed 3 times (" << downloads[index]->m_url << ")"; + num_failed++; + if (num_failed + num_succeeded == downloads.size()) + { + qDebug() << m_job_name.toLocal8Bit() << " failed."; + emit failed(); + } + } + else + { + qDebug() << "Part " << index << " failed, restarting (" << downloads[index]->m_url + << ")"; + // restart the job + slot.failures++; + downloads[index]->start(); } } -void DownloadJob::partProgress ( int index, qint64 bytesReceived, qint64 bytesTotal ) +void DownloadJob::partProgress(int index, qint64 bytesReceived, qint64 bytesTotal) { - auto & slot = parts_progress[index]; - - current_progress -= slot.first; - slot.first = bytesReceived; - current_progress += slot.first; - - total_progress -= slot.second; - slot.second = bytesTotal; - total_progress += slot.second; + auto &slot = parts_progress[index]; + + current_progress -= slot.current_progress; + slot.current_progress = bytesReceived; + current_progress += slot.current_progress; + + total_progress -= slot.total_progress; + slot.total_progress = bytesTotal; + total_progress += slot.total_progress; emit progress(current_progress, total_progress); } - void DownloadJob::start() { qDebug() << m_job_name.toLocal8Bit() << " started."; - for(auto iter: downloads) + for (auto iter : downloads) { connect(iter.data(), SIGNAL(succeeded(int)), SLOT(partSucceeded(int))); connect(iter.data(), SIGNAL(failed(int)), SLOT(partFailed(int))); - connect(iter.data(), SIGNAL(progress(int,qint64,qint64)), SLOT(partProgress(int,qint64,qint64))); + connect(iter.data(), SIGNAL(progress(int, qint64, qint64)), + SLOT(partProgress(int, qint64, qint64))); iter->start(); } } + +QStringList DownloadJob::getFailedFiles() +{ + QStringList failed; + for (auto download : downloads) + { + if (download->m_status == Job_Failed) + { + failed.push_back(download->m_url.toString()); + } + } + return failed; +} diff --git a/logic/net/DownloadJob.h b/logic/net/DownloadJob.h index c8f6a9d7..5d5ba01a 100644 --- a/logic/net/DownloadJob.h +++ b/logic/net/DownloadJob.h @@ -51,6 +51,7 @@ public: { return m_running; }; + QStringList getFailedFiles(); signals: void started(); void progress(qint64 current, qint64 total); @@ -63,9 +64,15 @@ private slots: void partSucceeded(int index); void partFailed(int index); private: + struct part_info + { + qint64 current_progress = 0; + qint64 total_progress = 1; + int failures = 0; + }; QString m_job_name; QList downloads; - QList> parts_progress; + QList parts_progress; qint64 current_progress = 0; qint64 total_progress = 0; int num_succeeded = 0; -- cgit v1.2.3