diff options
Diffstat (limited to 'modules/brotli/dec/transform.h')
-rw-r--r-- | modules/brotli/dec/transform.h | 300 |
1 files changed, 300 insertions, 0 deletions
diff --git a/modules/brotli/dec/transform.h b/modules/brotli/dec/transform.h new file mode 100644 index 000000000..8c08f3fc0 --- /dev/null +++ b/modules/brotli/dec/transform.h @@ -0,0 +1,300 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Transformations on dictionary words. */ + +#ifndef BROTLI_DEC_TRANSFORM_H_ +#define BROTLI_DEC_TRANSFORM_H_ + +#include "./port.h" +#include "./types.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +enum WordTransformType { + kIdentity = 0, + kOmitLast1 = 1, + kOmitLast2 = 2, + kOmitLast3 = 3, + kOmitLast4 = 4, + kOmitLast5 = 5, + kOmitLast6 = 6, + kOmitLast7 = 7, + kOmitLast8 = 8, + kOmitLast9 = 9, + kUppercaseFirst = 10, + kUppercaseAll = 11, + kOmitFirst1 = 12, + kOmitFirst2 = 13, + kOmitFirst3 = 14, + kOmitFirst4 = 15, + kOmitFirst5 = 16, + kOmitFirst6 = 17, + kOmitFirst7 = 18, + kOmitFirst8 = 19, + kOmitFirst9 = 20 +}; + +typedef struct { + const uint8_t prefix_id; + const uint8_t transform; + const uint8_t suffix_id; +} Transform; + +static const char kPrefixSuffix[208] = + "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0" + " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0" + " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0" + " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous "; + +enum { + /* EMPTY = "" + SP = " " + DQUOT = "\"" + SQUOT = "'" + CLOSEBR = "]" + OPEN = "(" + SLASH = "/" + NBSP = non-breaking space "\0xc2\xa0" + */ + kPFix_EMPTY = 0, + kPFix_SP = 1, + kPFix_COMMASP = 3, + kPFix_SPofSPtheSP = 6, + kPFix_SPtheSP = 9, + kPFix_eSP = 12, + kPFix_SPofSP = 15, + kPFix_sSP = 20, + kPFix_DOT = 23, + kPFix_SPandSP = 25, + kPFix_SPinSP = 31, + kPFix_DQUOT = 36, + kPFix_SPtoSP = 38, + kPFix_DQUOTGT = 43, + kPFix_NEWLINE = 46, + kPFix_DOTSP = 48, + kPFix_CLOSEBR = 51, + kPFix_SPforSP = 53, + kPFix_SPaSP = 59, + kPFix_SPthatSP = 63, + kPFix_SQUOT = 70, + kPFix_SPwithSP = 72, + kPFix_SPfromSP = 79, + kPFix_SPbySP = 86, + kPFix_OPEN = 91, + kPFix_DOTSPTheSP = 93, + kPFix_SPonSP = 100, + kPFix_SPasSP = 105, + kPFix_SPisSP = 110, + kPFix_ingSP = 115, + kPFix_NEWLINETAB = 120, + kPFix_COLON = 123, + kPFix_edSP = 125, + kPFix_EQDQUOT = 129, + kPFix_SPatSP = 132, + kPFix_lySP = 137, + kPFix_COMMA = 141, + kPFix_EQSQUOT = 143, + kPFix_DOTcomSLASH = 146, + kPFix_DOTSPThisSP = 152, + kPFix_SPnotSP = 160, + kPFix_erSP = 166, + kPFix_alSP = 170, + kPFix_fulSP = 174, + kPFix_iveSP = 179, + kPFix_lessSP = 184, + kPFix_estSP = 190, + kPFix_izeSP = 195, + kPFix_NBSP = 200, + kPFix_ousSP = 203 +}; + +static const Transform kTransforms[] = { + { kPFix_EMPTY, kIdentity, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_SP }, + { kPFix_SP, kIdentity, kPFix_SP }, + { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_SP }, + { kPFix_EMPTY, kIdentity, kPFix_SPtheSP }, + { kPFix_SP, kIdentity, kPFix_EMPTY }, + { kPFix_sSP, kIdentity, kPFix_SP }, + { kPFix_EMPTY, kIdentity, kPFix_SPofSP }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_SPandSP }, + { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY }, + { kPFix_COMMASP, kIdentity, kPFix_SP }, + { kPFix_EMPTY, kIdentity, kPFix_COMMASP }, + { kPFix_SP, kUppercaseFirst, kPFix_SP }, + { kPFix_EMPTY, kIdentity, kPFix_SPinSP }, + { kPFix_EMPTY, kIdentity, kPFix_SPtoSP }, + { kPFix_eSP, kIdentity, kPFix_SP }, + { kPFix_EMPTY, kIdentity, kPFix_DQUOT }, + { kPFix_EMPTY, kIdentity, kPFix_DOT }, + { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT }, + { kPFix_EMPTY, kIdentity, kPFix_NEWLINE }, + { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR }, + { kPFix_EMPTY, kIdentity, kPFix_SPforSP }, + { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_SPaSP }, + { kPFix_EMPTY, kIdentity, kPFix_SPthatSP }, + { kPFix_SP, kUppercaseFirst, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_DOTSP }, + { kPFix_DOT, kIdentity, kPFix_EMPTY }, + { kPFix_SP, kIdentity, kPFix_COMMASP }, + { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_SPwithSP }, + { kPFix_EMPTY, kIdentity, kPFix_SQUOT }, + { kPFix_EMPTY, kIdentity, kPFix_SPfromSP }, + { kPFix_EMPTY, kIdentity, kPFix_SPbySP }, + { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY }, + { kPFix_SPtheSP, kIdentity, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP }, + { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_SPonSP }, + { kPFix_EMPTY, kIdentity, kPFix_SPasSP }, + { kPFix_EMPTY, kIdentity, kPFix_SPisSP }, + { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitLast1, kPFix_ingSP }, + { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB }, + { kPFix_EMPTY, kIdentity, kPFix_COLON }, + { kPFix_SP, kIdentity, kPFix_DOTSP }, + { kPFix_EMPTY, kIdentity, kPFix_edSP }, + { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_OPEN }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP }, + { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_SPatSP }, + { kPFix_EMPTY, kIdentity, kPFix_lySP }, + { kPFix_SPtheSP, kIdentity, kPFix_SPofSP }, + { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY }, + { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY }, + { kPFix_SP, kUppercaseFirst, kPFix_COMMASP }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT }, + { kPFix_DOT, kIdentity, kPFix_OPEN }, + { kPFix_EMPTY, kUppercaseAll, kPFix_SP }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT }, + { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT }, + { kPFix_SP, kIdentity, kPFix_DOT }, + { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY }, + { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT }, + { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP }, + { kPFix_EMPTY, kIdentity, kPFix_COMMA }, + { kPFix_DOT, kIdentity, kPFix_SP }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT }, + { kPFix_EMPTY, kIdentity, kPFix_SPnotSP }, + { kPFix_SP, kIdentity, kPFix_EQDQUOT }, + { kPFix_EMPTY, kIdentity, kPFix_erSP }, + { kPFix_SP, kUppercaseAll, kPFix_SP }, + { kPFix_EMPTY, kIdentity, kPFix_alSP }, + { kPFix_SP, kUppercaseAll, kPFix_EMPTY }, + { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT }, + { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP }, + { kPFix_SP, kIdentity, kPFix_OPEN }, + { kPFix_EMPTY, kIdentity, kPFix_fulSP }, + { kPFix_SP, kUppercaseFirst, kPFix_DOTSP }, + { kPFix_EMPTY, kIdentity, kPFix_iveSP }, + { kPFix_EMPTY, kIdentity, kPFix_lessSP }, + { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT }, + { kPFix_EMPTY, kIdentity, kPFix_estSP }, + { kPFix_SP, kUppercaseFirst, kPFix_DOT }, + { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT }, + { kPFix_SP, kIdentity, kPFix_EQSQUOT }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA }, + { kPFix_EMPTY, kIdentity, kPFix_izeSP }, + { kPFix_EMPTY, kUppercaseAll, kPFix_DOT }, + { kPFix_NBSP, kIdentity, kPFix_EMPTY }, + { kPFix_SP, kIdentity, kPFix_COMMA }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT }, + { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT }, + { kPFix_EMPTY, kIdentity, kPFix_ousSP }, + { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP }, + { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT }, + { kPFix_SP, kUppercaseFirst, kPFix_COMMA }, + { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT }, + { kPFix_SP, kUppercaseAll, kPFix_COMMASP }, + { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA }, + { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN }, + { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP }, + { kPFix_SP, kUppercaseAll, kPFix_DOT }, + { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT }, + { kPFix_SP, kUppercaseAll, kPFix_DOTSP }, + { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT }, + { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT }, + { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT }, +}; + +static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); + +static int ToUpperCase(uint8_t* p) { + if (p[0] < 0xc0) { + if (p[0] >= 'a' && p[0] <= 'z') { + p[0] ^= 32; + } + return 1; + } + /* An overly simplified uppercasing model for utf-8. */ + if (p[0] < 0xe0) { + p[1] ^= 32; + return 2; + } + /* An arbitrary transform for three byte characters. */ + p[2] ^= 5; + return 3; +} + +static BROTLI_NOINLINE int TransformDictionaryWord( + uint8_t* dst, const uint8_t* word, int len, int transform) { + int idx = 0; + { + const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id]; + while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } + } + { + const int t = kTransforms[transform].transform; + int i = 0; + int skip = t - (kOmitFirst1 - 1); + if (skip > 0) { + word += skip; + len -= skip; + } else if (t <= kOmitLast9) { + len -= t; + } + while (i < len) { dst[idx++] = word[i++]; } + if (t == kUppercaseFirst) { + ToUpperCase(&dst[idx - len]); + } else if (t == kUppercaseAll) { + uint8_t* uppercase = &dst[idx - len]; + while (len > 0) { + int step = ToUpperCase(uppercase); + uppercase += step; + len -= step; + } + } + } + { + const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id]; + while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } + return idx; + } +} + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif /* BROTLI_DEC_TRANSFORM_H_ */ |