summaryrefslogtreecommitdiffstats
path: root/modules/brotli/dec/transform.h
diff options
context:
space:
mode:
Diffstat (limited to 'modules/brotli/dec/transform.h')
-rw-r--r--modules/brotli/dec/transform.h300
1 files changed, 300 insertions, 0 deletions
diff --git a/modules/brotli/dec/transform.h b/modules/brotli/dec/transform.h
new file mode 100644
index 000000000..8c08f3fc0
--- /dev/null
+++ b/modules/brotli/dec/transform.h
@@ -0,0 +1,300 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Transformations on dictionary words. */
+
+#ifndef BROTLI_DEC_TRANSFORM_H_
+#define BROTLI_DEC_TRANSFORM_H_
+
+#include "./port.h"
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum WordTransformType {
+ kIdentity = 0,
+ kOmitLast1 = 1,
+ kOmitLast2 = 2,
+ kOmitLast3 = 3,
+ kOmitLast4 = 4,
+ kOmitLast5 = 5,
+ kOmitLast6 = 6,
+ kOmitLast7 = 7,
+ kOmitLast8 = 8,
+ kOmitLast9 = 9,
+ kUppercaseFirst = 10,
+ kUppercaseAll = 11,
+ kOmitFirst1 = 12,
+ kOmitFirst2 = 13,
+ kOmitFirst3 = 14,
+ kOmitFirst4 = 15,
+ kOmitFirst5 = 16,
+ kOmitFirst6 = 17,
+ kOmitFirst7 = 18,
+ kOmitFirst8 = 19,
+ kOmitFirst9 = 20
+};
+
+typedef struct {
+ const uint8_t prefix_id;
+ const uint8_t transform;
+ const uint8_t suffix_id;
+} Transform;
+
+static const char kPrefixSuffix[208] =
+ "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
+ " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
+ " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
+ " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
+
+enum {
+ /* EMPTY = ""
+ SP = " "
+ DQUOT = "\""
+ SQUOT = "'"
+ CLOSEBR = "]"
+ OPEN = "("
+ SLASH = "/"
+ NBSP = non-breaking space "\0xc2\xa0"
+ */
+ kPFix_EMPTY = 0,
+ kPFix_SP = 1,
+ kPFix_COMMASP = 3,
+ kPFix_SPofSPtheSP = 6,
+ kPFix_SPtheSP = 9,
+ kPFix_eSP = 12,
+ kPFix_SPofSP = 15,
+ kPFix_sSP = 20,
+ kPFix_DOT = 23,
+ kPFix_SPandSP = 25,
+ kPFix_SPinSP = 31,
+ kPFix_DQUOT = 36,
+ kPFix_SPtoSP = 38,
+ kPFix_DQUOTGT = 43,
+ kPFix_NEWLINE = 46,
+ kPFix_DOTSP = 48,
+ kPFix_CLOSEBR = 51,
+ kPFix_SPforSP = 53,
+ kPFix_SPaSP = 59,
+ kPFix_SPthatSP = 63,
+ kPFix_SQUOT = 70,
+ kPFix_SPwithSP = 72,
+ kPFix_SPfromSP = 79,
+ kPFix_SPbySP = 86,
+ kPFix_OPEN = 91,
+ kPFix_DOTSPTheSP = 93,
+ kPFix_SPonSP = 100,
+ kPFix_SPasSP = 105,
+ kPFix_SPisSP = 110,
+ kPFix_ingSP = 115,
+ kPFix_NEWLINETAB = 120,
+ kPFix_COLON = 123,
+ kPFix_edSP = 125,
+ kPFix_EQDQUOT = 129,
+ kPFix_SPatSP = 132,
+ kPFix_lySP = 137,
+ kPFix_COMMA = 141,
+ kPFix_EQSQUOT = 143,
+ kPFix_DOTcomSLASH = 146,
+ kPFix_DOTSPThisSP = 152,
+ kPFix_SPnotSP = 160,
+ kPFix_erSP = 166,
+ kPFix_alSP = 170,
+ kPFix_fulSP = 174,
+ kPFix_iveSP = 179,
+ kPFix_lessSP = 184,
+ kPFix_estSP = 190,
+ kPFix_izeSP = 195,
+ kPFix_NBSP = 200,
+ kPFix_ousSP = 203
+};
+
+static const Transform kTransforms[] = {
+ { kPFix_EMPTY, kIdentity, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_SP },
+ { kPFix_SP, kIdentity, kPFix_SP },
+ { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
+ { kPFix_SP, kIdentity, kPFix_EMPTY },
+ { kPFix_sSP, kIdentity, kPFix_SP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPofSP },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_SPandSP },
+ { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
+ { kPFix_COMMASP, kIdentity, kPFix_SP },
+ { kPFix_EMPTY, kIdentity, kPFix_COMMASP },
+ { kPFix_SP, kUppercaseFirst, kPFix_SP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPinSP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
+ { kPFix_eSP, kIdentity, kPFix_SP },
+ { kPFix_EMPTY, kIdentity, kPFix_DQUOT },
+ { kPFix_EMPTY, kIdentity, kPFix_DOT },
+ { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
+ { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
+ { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
+ { kPFix_EMPTY, kIdentity, kPFix_SPforSP },
+ { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_SPaSP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
+ { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_DOTSP },
+ { kPFix_DOT, kIdentity, kPFix_EMPTY },
+ { kPFix_SP, kIdentity, kPFix_COMMASP },
+ { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
+ { kPFix_EMPTY, kIdentity, kPFix_SQUOT },
+ { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPbySP },
+ { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
+ { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_SPonSP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPasSP },
+ { kPFix_EMPTY, kIdentity, kPFix_SPisSP },
+ { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
+ { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
+ { kPFix_EMPTY, kIdentity, kPFix_COLON },
+ { kPFix_SP, kIdentity, kPFix_DOTSP },
+ { kPFix_EMPTY, kIdentity, kPFix_edSP },
+ { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_OPEN },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
+ { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_SPatSP },
+ { kPFix_EMPTY, kIdentity, kPFix_lySP },
+ { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
+ { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
+ { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
+ { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
+ { kPFix_DOT, kIdentity, kPFix_OPEN },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_SP },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
+ { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
+ { kPFix_SP, kIdentity, kPFix_DOT },
+ { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
+ { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
+ { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
+ { kPFix_EMPTY, kIdentity, kPFix_COMMA },
+ { kPFix_DOT, kIdentity, kPFix_SP },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
+ { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
+ { kPFix_SP, kIdentity, kPFix_EQDQUOT },
+ { kPFix_EMPTY, kIdentity, kPFix_erSP },
+ { kPFix_SP, kUppercaseAll, kPFix_SP },
+ { kPFix_EMPTY, kIdentity, kPFix_alSP },
+ { kPFix_SP, kUppercaseAll, kPFix_EMPTY },
+ { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
+ { kPFix_SP, kIdentity, kPFix_OPEN },
+ { kPFix_EMPTY, kIdentity, kPFix_fulSP },
+ { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
+ { kPFix_EMPTY, kIdentity, kPFix_iveSP },
+ { kPFix_EMPTY, kIdentity, kPFix_lessSP },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
+ { kPFix_EMPTY, kIdentity, kPFix_estSP },
+ { kPFix_SP, kUppercaseFirst, kPFix_DOT },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
+ { kPFix_SP, kIdentity, kPFix_EQSQUOT },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
+ { kPFix_EMPTY, kIdentity, kPFix_izeSP },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
+ { kPFix_NBSP, kIdentity, kPFix_EMPTY },
+ { kPFix_SP, kIdentity, kPFix_COMMA },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
+ { kPFix_EMPTY, kIdentity, kPFix_ousSP },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
+ { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
+ { kPFix_SP, kUppercaseFirst, kPFix_COMMA },
+ { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
+ { kPFix_SP, kUppercaseAll, kPFix_COMMASP },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
+ { kPFix_SP, kUppercaseAll, kPFix_DOT },
+ { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
+ { kPFix_SP, kUppercaseAll, kPFix_DOTSP },
+ { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
+ { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
+ { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
+};
+
+static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
+
+static int ToUpperCase(uint8_t* p) {
+ if (p[0] < 0xc0) {
+ if (p[0] >= 'a' && p[0] <= 'z') {
+ p[0] ^= 32;
+ }
+ return 1;
+ }
+ /* An overly simplified uppercasing model for utf-8. */
+ if (p[0] < 0xe0) {
+ p[1] ^= 32;
+ return 2;
+ }
+ /* An arbitrary transform for three byte characters. */
+ p[2] ^= 5;
+ return 3;
+}
+
+static BROTLI_NOINLINE int TransformDictionaryWord(
+ uint8_t* dst, const uint8_t* word, int len, int transform) {
+ int idx = 0;
+ {
+ const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
+ while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
+ }
+ {
+ const int t = kTransforms[transform].transform;
+ int i = 0;
+ int skip = t - (kOmitFirst1 - 1);
+ if (skip > 0) {
+ word += skip;
+ len -= skip;
+ } else if (t <= kOmitLast9) {
+ len -= t;
+ }
+ while (i < len) { dst[idx++] = word[i++]; }
+ if (t == kUppercaseFirst) {
+ ToUpperCase(&dst[idx - len]);
+ } else if (t == kUppercaseAll) {
+ uint8_t* uppercase = &dst[idx - len];
+ while (len > 0) {
+ int step = ToUpperCase(uppercase);
+ uppercase += step;
+ len -= step;
+ }
+ }
+ }
+ {
+ const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
+ while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
+ return idx;
+ }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_DEC_TRANSFORM_H_ */