summaryrefslogtreecommitdiffstats
path: root/mailnews/extensions/fts3/src
diff options
context:
space:
mode:
Diffstat (limited to 'mailnews/extensions/fts3/src')
-rw-r--r--mailnews/extensions/fts3/src/Normalize.c1929
-rw-r--r--mailnews/extensions/fts3/src/README.mozilla3
-rw-r--r--mailnews/extensions/fts3/src/fts3_porter.c1150
-rw-r--r--mailnews/extensions/fts3/src/fts3_tokenizer.h148
-rw-r--r--mailnews/extensions/fts3/src/moz.build18
-rw-r--r--mailnews/extensions/fts3/src/nsFts3Tokenizer.cpp72
-rw-r--r--mailnews/extensions/fts3/src/nsFts3Tokenizer.h26
-rw-r--r--mailnews/extensions/fts3/src/nsFts3TokenizerCID.h16
-rw-r--r--mailnews/extensions/fts3/src/nsGlodaRankerFunction.cpp145
-rw-r--r--mailnews/extensions/fts3/src/nsGlodaRankerFunction.h25
10 files changed, 3532 insertions, 0 deletions
diff --git a/mailnews/extensions/fts3/src/Normalize.c b/mailnews/extensions/fts3/src/Normalize.c
new file mode 100644
index 000000000..92ac400e2
--- /dev/null
+++ b/mailnews/extensions/fts3/src/Normalize.c
@@ -0,0 +1,1929 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* THIS FILE IS GENERATED BY generate_table.py. DON'T EDIT THIS */
+
+static const unsigned short gNormalizeTable0040[] = {
+ /* U+0040 */
+ 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
+ };
+
+static const unsigned short gNormalizeTable0080[] = {
+ /* U+0080 */
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
+ 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+ 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x0020, 0x00ae, 0x0020,
+ 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
+ 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
+ };
+
+static const unsigned short gNormalizeTable00c0[] = {
+ /* U+00c0 */
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
+ 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00d7,
+ 0x00f8, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0073,
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
+ 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
+ 0x00f8, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079,
+ };
+
+static const unsigned short gNormalizeTable0100[] = {
+ /* U+0100 */
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0063,
+ 0x0063, 0x0063, 0x0063, 0x0063, 0x0063, 0x0063, 0x0064, 0x0064,
+ 0x0111, 0x0111, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065,
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0067, 0x0067, 0x0067, 0x0067,
+ 0x0067, 0x0067, 0x0067, 0x0067, 0x0068, 0x0068, 0x0127, 0x0127,
+ 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069,
+ 0x0069, 0x0131, 0x0069, 0x0069, 0x006a, 0x006a, 0x006b, 0x006b,
+ 0x0138, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c,
+ };
+
+static const unsigned short gNormalizeTable0140[] = {
+ /* U+0140 */
+ 0x006c, 0x0142, 0x0142, 0x006e, 0x006e, 0x006e, 0x006e, 0x006e,
+ 0x006e, 0x02bc, 0x014b, 0x014b, 0x006f, 0x006f, 0x006f, 0x006f,
+ 0x006f, 0x006f, 0x0153, 0x0153, 0x0072, 0x0072, 0x0072, 0x0072,
+ 0x0072, 0x0072, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073,
+ 0x0073, 0x0073, 0x0074, 0x0074, 0x0074, 0x0074, 0x0167, 0x0167,
+ 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075,
+ 0x0075, 0x0075, 0x0075, 0x0075, 0x0077, 0x0077, 0x0079, 0x0079,
+ 0x0079, 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x0073,
+ };
+
+static const unsigned short gNormalizeTable0180[] = {
+ /* U+0180 */
+ 0x0180, 0x0253, 0x0183, 0x0183, 0x0185, 0x0185, 0x0254, 0x0188,
+ 0x0188, 0x0256, 0x0257, 0x018c, 0x018c, 0x018d, 0x01dd, 0x0259,
+ 0x025b, 0x0192, 0x0192, 0x0260, 0x0263, 0x0195, 0x0269, 0x0268,
+ 0x0199, 0x0199, 0x019a, 0x019b, 0x026f, 0x0272, 0x019e, 0x0275,
+ 0x006f, 0x006f, 0x01a3, 0x01a3, 0x01a5, 0x01a5, 0x0280, 0x01a8,
+ 0x01a8, 0x0283, 0x01aa, 0x01ab, 0x01ad, 0x01ad, 0x0288, 0x0075,
+ 0x0075, 0x028a, 0x028b, 0x01b4, 0x01b4, 0x01b6, 0x01b6, 0x0292,
+ 0x01b9, 0x01b9, 0x01ba, 0x01bb, 0x01bd, 0x01bd, 0x01be, 0x01bf,
+ };
+
+static const unsigned short gNormalizeTable01c0[] = {
+ /* U+01c0 */
+ 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0064, 0x0064, 0x0064, 0x006c,
+ 0x006c, 0x006c, 0x006e, 0x006e, 0x006e, 0x0061, 0x0061, 0x0069,
+ 0x0069, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075,
+ 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x01dd, 0x0061, 0x0061,
+ 0x0061, 0x0061, 0x00e6, 0x00e6, 0x01e5, 0x01e5, 0x0067, 0x0067,
+ 0x006b, 0x006b, 0x006f, 0x006f, 0x006f, 0x006f, 0x0292, 0x0292,
+ 0x006a, 0x0064, 0x0064, 0x0064, 0x0067, 0x0067, 0x0195, 0x01bf,
+ 0x006e, 0x006e, 0x0061, 0x0061, 0x00e6, 0x00e6, 0x00f8, 0x00f8,
+ };
+
+static const unsigned short gNormalizeTable0200[] = {
+ /* U+0200 */
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0065, 0x0065, 0x0065, 0x0065,
+ 0x0069, 0x0069, 0x0069, 0x0069, 0x006f, 0x006f, 0x006f, 0x006f,
+ 0x0072, 0x0072, 0x0072, 0x0072, 0x0075, 0x0075, 0x0075, 0x0075,
+ 0x0073, 0x0073, 0x0074, 0x0074, 0x021d, 0x021d, 0x0068, 0x0068,
+ 0x019e, 0x0221, 0x0223, 0x0223, 0x0225, 0x0225, 0x0061, 0x0061,
+ 0x0065, 0x0065, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f,
+ 0x006f, 0x006f, 0x0079, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
+ 0x0238, 0x0239, 0x2c65, 0x023c, 0x023c, 0x019a, 0x2c66, 0x023f,
+ };
+
+static const unsigned short gNormalizeTable0240[] = {
+ /* U+0240 */
+ 0x0240, 0x0242, 0x0242, 0x0180, 0x0289, 0x028c, 0x0247, 0x0247,
+ 0x0249, 0x0249, 0x024b, 0x024b, 0x024d, 0x024d, 0x024f, 0x024f,
+ 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
+ 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
+ 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
+ 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
+ 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
+ 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
+ };
+
+static const unsigned short gNormalizeTable0280[] = {
+ /* U+0280 */
+ 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
+ 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
+ 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
+ 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
+ 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
+ 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
+ 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
+ 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
+ };
+
+static const unsigned short gNormalizeTable02c0[] = {
+ /* U+02c0 */
+ 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
+ 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
+ 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
+ 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
+ 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
+ 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
+ 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
+ };
+
+static const unsigned short gNormalizeTable0340[] = {
+ /* U+0340 */
+ 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x03b9, 0x0346, 0x0347,
+ 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x0020,
+ 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
+ 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
+ 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
+ 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
+ 0x0371, 0x0371, 0x0373, 0x0373, 0x02b9, 0x0375, 0x0377, 0x0377,
+ 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
+ };
+
+static const unsigned short gNormalizeTable0380[] = {
+ /* U+0380 */
+ 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x0020, 0x03b1, 0x00b7,
+ 0x03b5, 0x03b7, 0x03b9, 0x038b, 0x03bf, 0x038d, 0x03c5, 0x03c9,
+ 0x03b9, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
+ 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
+ 0x03c0, 0x03c1, 0x03a2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
+ 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
+ 0x03c5, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
+ 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
+ };
+
+static const unsigned short gNormalizeTable03c0[] = {
+ /* U+03c0 */
+ 0x03c0, 0x03c1, 0x03c3, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
+ 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03d7,
+ 0x03b2, 0x03b8, 0x03c5, 0x03c5, 0x03c5, 0x03c6, 0x03c0, 0x03d7,
+ 0x03d9, 0x03d9, 0x03db, 0x03db, 0x03dd, 0x03dd, 0x03df, 0x03df,
+ 0x03e1, 0x03e1, 0x03e3, 0x03e3, 0x03e5, 0x03e5, 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9, 0x03eb, 0x03eb, 0x03ed, 0x03ed, 0x03ef, 0x03ef,
+ 0x03ba, 0x03c1, 0x03c3, 0x03f3, 0x03b8, 0x03b5, 0x03f6, 0x03f8,
+ 0x03f8, 0x03c3, 0x03fb, 0x03fb, 0x03fc, 0x037b, 0x037c, 0x037d,
+ };
+
+static const unsigned short gNormalizeTable0400[] = {
+ /* U+0400 */
+ 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
+ 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
+ };
+
+static const unsigned short gNormalizeTable0440[] = {
+ /* U+0440 */
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
+ 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
+ 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
+ 0x0461, 0x0461, 0x0463, 0x0463, 0x0465, 0x0465, 0x0467, 0x0467,
+ 0x0469, 0x0469, 0x046b, 0x046b, 0x046d, 0x046d, 0x046f, 0x046f,
+ 0x0471, 0x0471, 0x0473, 0x0473, 0x0475, 0x0475, 0x0475, 0x0475,
+ 0x0479, 0x0479, 0x047b, 0x047b, 0x047d, 0x047d, 0x047f, 0x047f,
+ };
+
+static const unsigned short gNormalizeTable0480[] = {
+ /* U+0480 */
+ 0x0481, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
+ 0x0488, 0x0489, 0x048b, 0x048b, 0x048d, 0x048d, 0x048f, 0x048f,
+ 0x0491, 0x0491, 0x0493, 0x0493, 0x0495, 0x0495, 0x0497, 0x0497,
+ 0x0499, 0x0499, 0x049b, 0x049b, 0x049d, 0x049d, 0x049f, 0x049f,
+ 0x04a1, 0x04a1, 0x04a3, 0x04a3, 0x04a5, 0x04a5, 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9, 0x04ab, 0x04ab, 0x04ad, 0x04ad, 0x04af, 0x04af,
+ 0x04b1, 0x04b1, 0x04b3, 0x04b3, 0x04b5, 0x04b5, 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9, 0x04bb, 0x04bb, 0x04bd, 0x04bd, 0x04bf, 0x04bf,
+ };
+
+static const unsigned short gNormalizeTable04c0[] = {
+ /* U+04c0 */
+ 0x04cf, 0x0436, 0x0436, 0x04c4, 0x04c4, 0x04c6, 0x04c6, 0x04c8,
+ 0x04c8, 0x04ca, 0x04ca, 0x04cc, 0x04cc, 0x04ce, 0x04ce, 0x04cf,
+ 0x0430, 0x0430, 0x0430, 0x0430, 0x04d5, 0x04d5, 0x0435, 0x0435,
+ 0x04d9, 0x04d9, 0x04d9, 0x04d9, 0x0436, 0x0436, 0x0437, 0x0437,
+ 0x04e1, 0x04e1, 0x0438, 0x0438, 0x0438, 0x0438, 0x043e, 0x043e,
+ 0x04e9, 0x04e9, 0x04e9, 0x04e9, 0x044d, 0x044d, 0x0443, 0x0443,
+ 0x0443, 0x0443, 0x0443, 0x0443, 0x0447, 0x0447, 0x04f7, 0x04f7,
+ 0x044b, 0x044b, 0x04fb, 0x04fb, 0x04fd, 0x04fd, 0x04ff, 0x04ff,
+ };
+
+static const unsigned short gNormalizeTable0500[] = {
+ /* U+0500 */
+ 0x0501, 0x0501, 0x0503, 0x0503, 0x0505, 0x0505, 0x0507, 0x0507,
+ 0x0509, 0x0509, 0x050b, 0x050b, 0x050d, 0x050d, 0x050f, 0x050f,
+ 0x0511, 0x0511, 0x0513, 0x0513, 0x0515, 0x0515, 0x0517, 0x0517,
+ 0x0519, 0x0519, 0x051b, 0x051b, 0x051d, 0x051d, 0x051f, 0x051f,
+ 0x0521, 0x0521, 0x0523, 0x0523, 0x0525, 0x0525, 0x0526, 0x0527,
+ 0x0528, 0x0529, 0x052a, 0x052b, 0x052c, 0x052d, 0x052e, 0x052f,
+ 0x0530, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567,
+ 0x0568, 0x0569, 0x056a, 0x056b, 0x056c, 0x056d, 0x056e, 0x056f,
+ };
+
+static const unsigned short gNormalizeTable0540[] = {
+ /* U+0540 */
+ 0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577,
+ 0x0578, 0x0579, 0x057a, 0x057b, 0x057c, 0x057d, 0x057e, 0x057f,
+ 0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0557,
+ 0x0558, 0x0559, 0x055a, 0x055b, 0x055c, 0x055d, 0x055e, 0x055f,
+ 0x0560, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567,
+ 0x0568, 0x0569, 0x056a, 0x056b, 0x056c, 0x056d, 0x056e, 0x056f,
+ 0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577,
+ 0x0578, 0x0579, 0x057a, 0x057b, 0x057c, 0x057d, 0x057e, 0x057f,
+ };
+
+static const unsigned short gNormalizeTable0580[] = {
+ /* U+0580 */
+ 0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0565,
+ 0x0588, 0x0589, 0x058a, 0x058b, 0x058c, 0x058d, 0x058e, 0x058f,
+ 0x0590, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597,
+ 0x0598, 0x0599, 0x059a, 0x059b, 0x059c, 0x059d, 0x059e, 0x059f,
+ 0x05a0, 0x05a1, 0x05a2, 0x05a3, 0x05a4, 0x05a5, 0x05a6, 0x05a7,
+ 0x05a8, 0x05a9, 0x05aa, 0x05ab, 0x05ac, 0x05ad, 0x05ae, 0x05af,
+ 0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7,
+ 0x05b8, 0x05b9, 0x05ba, 0x05bb, 0x05bc, 0x05bd, 0x05be, 0x05bf,
+ };
+
+static const unsigned short gNormalizeTable0600[] = {
+ /* U+0600 */
+ 0x0600, 0x0601, 0x0602, 0x0603, 0x0604, 0x0605, 0x0606, 0x0607,
+ 0x0608, 0x0609, 0x060a, 0x060b, 0x060c, 0x060d, 0x060e, 0x060f,
+ 0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0615, 0x0616, 0x0617,
+ 0x0618, 0x0619, 0x061a, 0x061b, 0x061c, 0x061d, 0x061e, 0x061f,
+ 0x0620, 0x0621, 0x0627, 0x0627, 0x0648, 0x0627, 0x064a, 0x0627,
+ 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
+ 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
+ 0x0638, 0x0639, 0x063a, 0x063b, 0x063c, 0x063d, 0x063e, 0x063f,
+ };
+
+static const unsigned short gNormalizeTable0640[] = {
+ /* U+0640 */
+ 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
+ 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
+ 0x0650, 0x0651, 0x0652, 0x0653, 0x0654, 0x0655, 0x0656, 0x0657,
+ 0x0658, 0x0659, 0x065a, 0x065b, 0x065c, 0x065d, 0x065e, 0x065f,
+ 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667,
+ 0x0668, 0x0669, 0x066a, 0x066b, 0x066c, 0x066d, 0x066e, 0x066f,
+ 0x0670, 0x0671, 0x0672, 0x0673, 0x0674, 0x0627, 0x0648, 0x06c7,
+ 0x064a, 0x0679, 0x067a, 0x067b, 0x067c, 0x067d, 0x067e, 0x067f,
+ };
+
+static const unsigned short gNormalizeTable06c0[] = {
+ /* U+06c0 */
+ 0x06d5, 0x06c1, 0x06c1, 0x06c3, 0x06c4, 0x06c5, 0x06c6, 0x06c7,
+ 0x06c8, 0x06c9, 0x06ca, 0x06cb, 0x06cc, 0x06cd, 0x06ce, 0x06cf,
+ 0x06d0, 0x06d1, 0x06d2, 0x06d2, 0x06d4, 0x06d5, 0x06d6, 0x06d7,
+ 0x06d8, 0x06d9, 0x06da, 0x06db, 0x06dc, 0x06dd, 0x06de, 0x06df,
+ 0x06e0, 0x06e1, 0x06e2, 0x06e3, 0x06e4, 0x06e5, 0x06e6, 0x06e7,
+ 0x06e8, 0x06e9, 0x06ea, 0x06eb, 0x06ec, 0x06ed, 0x06ee, 0x06ef,
+ 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5, 0x06f6, 0x06f7,
+ 0x06f8, 0x06f9, 0x06fa, 0x06fb, 0x06fc, 0x06fd, 0x06fe, 0x06ff,
+ };
+
+static const unsigned short gNormalizeTable0900[] = {
+ /* U+0900 */
+ 0x0900, 0x0901, 0x0902, 0x0903, 0x0904, 0x0905, 0x0906, 0x0907,
+ 0x0908, 0x0909, 0x090a, 0x090b, 0x090c, 0x090d, 0x090e, 0x090f,
+ 0x0910, 0x0911, 0x0912, 0x0913, 0x0914, 0x0915, 0x0916, 0x0917,
+ 0x0918, 0x0919, 0x091a, 0x091b, 0x091c, 0x091d, 0x091e, 0x091f,
+ 0x0920, 0x0921, 0x0922, 0x0923, 0x0924, 0x0925, 0x0926, 0x0927,
+ 0x0928, 0x0928, 0x092a, 0x092b, 0x092c, 0x092d, 0x092e, 0x092f,
+ 0x0930, 0x0930, 0x0932, 0x0933, 0x0933, 0x0935, 0x0936, 0x0937,
+ 0x0938, 0x0939, 0x093a, 0x093b, 0x093c, 0x093d, 0x093e, 0x093f,
+ };
+
+static const unsigned short gNormalizeTable0940[] = {
+ /* U+0940 */
+ 0x0940, 0x0941, 0x0942, 0x0943, 0x0944, 0x0945, 0x0946, 0x0947,
+ 0x0948, 0x0949, 0x094a, 0x094b, 0x094c, 0x094d, 0x094e, 0x094f,
+ 0x0950, 0x0951, 0x0952, 0x0953, 0x0954, 0x0955, 0x0956, 0x0957,
+ 0x0915, 0x0916, 0x0917, 0x091c, 0x0921, 0x0922, 0x092b, 0x092f,
+ 0x0960, 0x0961, 0x0962, 0x0963, 0x0964, 0x0965, 0x0966, 0x0967,
+ 0x0968, 0x0969, 0x096a, 0x096b, 0x096c, 0x096d, 0x096e, 0x096f,
+ 0x0970, 0x0971, 0x0972, 0x0973, 0x0974, 0x0975, 0x0976, 0x0977,
+ 0x0978, 0x0979, 0x097a, 0x097b, 0x097c, 0x097d, 0x097e, 0x097f,
+ };
+
+static const unsigned short gNormalizeTable09c0[] = {
+ /* U+09c0 */
+ 0x09c0, 0x09c1, 0x09c2, 0x09c3, 0x09c4, 0x09c5, 0x09c6, 0x09c7,
+ 0x09c8, 0x09c9, 0x09ca, 0x09c7, 0x09c7, 0x09cd, 0x09ce, 0x09cf,
+ 0x09d0, 0x09d1, 0x09d2, 0x09d3, 0x09d4, 0x09d5, 0x09d6, 0x09d7,
+ 0x09d8, 0x09d9, 0x09da, 0x09db, 0x09a1, 0x09a2, 0x09de, 0x09af,
+ 0x09e0, 0x09e1, 0x09e2, 0x09e3, 0x09e4, 0x09e5, 0x09e6, 0x09e7,
+ 0x09e8, 0x09e9, 0x09ea, 0x09eb, 0x09ec, 0x09ed, 0x09ee, 0x09ef,
+ 0x09f0, 0x09f1, 0x09f2, 0x09f3, 0x09f4, 0x09f5, 0x09f6, 0x09f7,
+ 0x09f8, 0x09f9, 0x09fa, 0x09fb, 0x09fc, 0x09fd, 0x09fe, 0x09ff,
+ };
+
+static const unsigned short gNormalizeTable0a00[] = {
+ /* U+0a00 */
+ 0x0a00, 0x0a01, 0x0a02, 0x0a03, 0x0a04, 0x0a05, 0x0a06, 0x0a07,
+ 0x0a08, 0x0a09, 0x0a0a, 0x0a0b, 0x0a0c, 0x0a0d, 0x0a0e, 0x0a0f,
+ 0x0a10, 0x0a11, 0x0a12, 0x0a13, 0x0a14, 0x0a15, 0x0a16, 0x0a17,
+ 0x0a18, 0x0a19, 0x0a1a, 0x0a1b, 0x0a1c, 0x0a1d, 0x0a1e, 0x0a1f,
+ 0x0a20, 0x0a21, 0x0a22, 0x0a23, 0x0a24, 0x0a25, 0x0a26, 0x0a27,
+ 0x0a28, 0x0a29, 0x0a2a, 0x0a2b, 0x0a2c, 0x0a2d, 0x0a2e, 0x0a2f,
+ 0x0a30, 0x0a31, 0x0a32, 0x0a32, 0x0a34, 0x0a35, 0x0a38, 0x0a37,
+ 0x0a38, 0x0a39, 0x0a3a, 0x0a3b, 0x0a3c, 0x0a3d, 0x0a3e, 0x0a3f,
+ };
+
+static const unsigned short gNormalizeTable0a40[] = {
+ /* U+0a40 */
+ 0x0a40, 0x0a41, 0x0a42, 0x0a43, 0x0a44, 0x0a45, 0x0a46, 0x0a47,
+ 0x0a48, 0x0a49, 0x0a4a, 0x0a4b, 0x0a4c, 0x0a4d, 0x0a4e, 0x0a4f,
+ 0x0a50, 0x0a51, 0x0a52, 0x0a53, 0x0a54, 0x0a55, 0x0a56, 0x0a57,
+ 0x0a58, 0x0a16, 0x0a17, 0x0a1c, 0x0a5c, 0x0a5d, 0x0a2b, 0x0a5f,
+ 0x0a60, 0x0a61, 0x0a62, 0x0a63, 0x0a64, 0x0a65, 0x0a66, 0x0a67,
+ 0x0a68, 0x0a69, 0x0a6a, 0x0a6b, 0x0a6c, 0x0a6d, 0x0a6e, 0x0a6f,
+ 0x0a70, 0x0a71, 0x0a72, 0x0a73, 0x0a74, 0x0a75, 0x0a76, 0x0a77,
+ 0x0a78, 0x0a79, 0x0a7a, 0x0a7b, 0x0a7c, 0x0a7d, 0x0a7e, 0x0a7f,
+ };
+
+static const unsigned short gNormalizeTable0b40[] = {
+ /* U+0b40 */
+ 0x0b40, 0x0b41, 0x0b42, 0x0b43, 0x0b44, 0x0b45, 0x0b46, 0x0b47,
+ 0x0b47, 0x0b49, 0x0b4a, 0x0b47, 0x0b47, 0x0b4d, 0x0b4e, 0x0b4f,
+ 0x0b50, 0x0b51, 0x0b52, 0x0b53, 0x0b54, 0x0b55, 0x0b56, 0x0b57,
+ 0x0b58, 0x0b59, 0x0b5a, 0x0b5b, 0x0b21, 0x0b22, 0x0b5e, 0x0b5f,
+ 0x0b60, 0x0b61, 0x0b62, 0x0b63, 0x0b64, 0x0b65, 0x0b66, 0x0b67,
+ 0x0b68, 0x0b69, 0x0b6a, 0x0b6b, 0x0b6c, 0x0b6d, 0x0b6e, 0x0b6f,
+ 0x0b70, 0x0b71, 0x0b72, 0x0b73, 0x0b74, 0x0b75, 0x0b76, 0x0b77,
+ 0x0b78, 0x0b79, 0x0b7a, 0x0b7b, 0x0b7c, 0x0b7d, 0x0b7e, 0x0b7f,
+ };
+
+static const unsigned short gNormalizeTable0b80[] = {
+ /* U+0b80 */
+ 0x0b80, 0x0b81, 0x0b82, 0x0b83, 0x0b84, 0x0b85, 0x0b86, 0x0b87,
+ 0x0b88, 0x0b89, 0x0b8a, 0x0b8b, 0x0b8c, 0x0b8d, 0x0b8e, 0x0b8f,
+ 0x0b90, 0x0b91, 0x0b92, 0x0b93, 0x0b92, 0x0b95, 0x0b96, 0x0b97,
+ 0x0b98, 0x0b99, 0x0b9a, 0x0b9b, 0x0b9c, 0x0b9d, 0x0b9e, 0x0b9f,
+ 0x0ba0, 0x0ba1, 0x0ba2, 0x0ba3, 0x0ba4, 0x0ba5, 0x0ba6, 0x0ba7,
+ 0x0ba8, 0x0ba9, 0x0baa, 0x0bab, 0x0bac, 0x0bad, 0x0bae, 0x0baf,
+ 0x0bb0, 0x0bb1, 0x0bb2, 0x0bb3, 0x0bb4, 0x0bb5, 0x0bb6, 0x0bb7,
+ 0x0bb8, 0x0bb9, 0x0bba, 0x0bbb, 0x0bbc, 0x0bbd, 0x0bbe, 0x0bbf,
+ };
+
+static const unsigned short gNormalizeTable0bc0[] = {
+ /* U+0bc0 */
+ 0x0bc0, 0x0bc1, 0x0bc2, 0x0bc3, 0x0bc4, 0x0bc5, 0x0bc6, 0x0bc7,
+ 0x0bc8, 0x0bc9, 0x0bc6, 0x0bc7, 0x0bc6, 0x0bcd, 0x0bce, 0x0bcf,
+ 0x0bd0, 0x0bd1, 0x0bd2, 0x0bd3, 0x0bd4, 0x0bd5, 0x0bd6, 0x0bd7,
+ 0x0bd8, 0x0bd9, 0x0bda, 0x0bdb, 0x0bdc, 0x0bdd, 0x0bde, 0x0bdf,
+ 0x0be0, 0x0be1, 0x0be2, 0x0be3, 0x0be4, 0x0be5, 0x0be6, 0x0be7,
+ 0x0be8, 0x0be9, 0x0bea, 0x0beb, 0x0bec, 0x0bed, 0x0bee, 0x0bef,
+ 0x0bf0, 0x0bf1, 0x0bf2, 0x0bf3, 0x0bf4, 0x0bf5, 0x0bf6, 0x0bf7,
+ 0x0bf8, 0x0bf9, 0x0bfa, 0x0bfb, 0x0bfc, 0x0bfd, 0x0bfe, 0x0bff,
+ };
+
+static const unsigned short gNormalizeTable0c40[] = {
+ /* U+0c40 */
+ 0x0c40, 0x0c41, 0x0c42, 0x0c43, 0x0c44, 0x0c45, 0x0c46, 0x0c47,
+ 0x0c46, 0x0c49, 0x0c4a, 0x0c4b, 0x0c4c, 0x0c4d, 0x0c4e, 0x0c4f,
+ 0x0c50, 0x0c51, 0x0c52, 0x0c53, 0x0c54, 0x0c55, 0x0c56, 0x0c57,
+ 0x0c58, 0x0c59, 0x0c5a, 0x0c5b, 0x0c5c, 0x0c5d, 0x0c5e, 0x0c5f,
+ 0x0c60, 0x0c61, 0x0c62, 0x0c63, 0x0c64, 0x0c65, 0x0c66, 0x0c67,
+ 0x0c68, 0x0c69, 0x0c6a, 0x0c6b, 0x0c6c, 0x0c6d, 0x0c6e, 0x0c6f,
+ 0x0c70, 0x0c71, 0x0c72, 0x0c73, 0x0c74, 0x0c75, 0x0c76, 0x0c77,
+ 0x0c78, 0x0c79, 0x0c7a, 0x0c7b, 0x0c7c, 0x0c7d, 0x0c7e, 0x0c7f,
+ };
+
+static const unsigned short gNormalizeTable0cc0[] = {
+ /* U+0cc0 */
+ 0x0cbf, 0x0cc1, 0x0cc2, 0x0cc3, 0x0cc4, 0x0cc5, 0x0cc6, 0x0cc6,
+ 0x0cc6, 0x0cc9, 0x0cc6, 0x0cc6, 0x0ccc, 0x0ccd, 0x0cce, 0x0ccf,
+ 0x0cd0, 0x0cd1, 0x0cd2, 0x0cd3, 0x0cd4, 0x0cd5, 0x0cd6, 0x0cd7,
+ 0x0cd8, 0x0cd9, 0x0cda, 0x0cdb, 0x0cdc, 0x0cdd, 0x0cde, 0x0cdf,
+ 0x0ce0, 0x0ce1, 0x0ce2, 0x0ce3, 0x0ce4, 0x0ce5, 0x0ce6, 0x0ce7,
+ 0x0ce8, 0x0ce9, 0x0cea, 0x0ceb, 0x0cec, 0x0ced, 0x0cee, 0x0cef,
+ 0x0cf0, 0x0cf1, 0x0cf2, 0x0cf3, 0x0cf4, 0x0cf5, 0x0cf6, 0x0cf7,
+ 0x0cf8, 0x0cf9, 0x0cfa, 0x0cfb, 0x0cfc, 0x0cfd, 0x0cfe, 0x0cff,
+ };
+
+static const unsigned short gNormalizeTable0d40[] = {
+ /* U+0d40 */
+ 0x0d40, 0x0d41, 0x0d42, 0x0d43, 0x0d44, 0x0d45, 0x0d46, 0x0d47,
+ 0x0d48, 0x0d49, 0x0d46, 0x0d47, 0x0d46, 0x0d4d, 0x0d4e, 0x0d4f,
+ 0x0d50, 0x0d51, 0x0d52, 0x0d53, 0x0d54, 0x0d55, 0x0d56, 0x0d57,
+ 0x0d58, 0x0d59, 0x0d5a, 0x0d5b, 0x0d5c, 0x0d5d, 0x0d5e, 0x0d5f,
+ 0x0d60, 0x0d61, 0x0d62, 0x0d63, 0x0d64, 0x0d65, 0x0d66, 0x0d67,
+ 0x0d68, 0x0d69, 0x0d6a, 0x0d6b, 0x0d6c, 0x0d6d, 0x0d6e, 0x0d6f,
+ 0x0d70, 0x0d71, 0x0d72, 0x0d73, 0x0d74, 0x0d75, 0x0d76, 0x0d77,
+ 0x0d78, 0x0d79, 0x0d7a, 0x0d7b, 0x0d7c, 0x0d7d, 0x0d7e, 0x0d7f,
+ };
+
+static const unsigned short gNormalizeTable0dc0[] = {
+ /* U+0dc0 */
+ 0x0dc0, 0x0dc1, 0x0dc2, 0x0dc3, 0x0dc4, 0x0dc5, 0x0dc6, 0x0dc7,
+ 0x0dc8, 0x0dc9, 0x0dca, 0x0dcb, 0x0dcc, 0x0dcd, 0x0dce, 0x0dcf,
+ 0x0dd0, 0x0dd1, 0x0dd2, 0x0dd3, 0x0dd4, 0x0dd5, 0x0dd6, 0x0dd7,
+ 0x0dd8, 0x0dd9, 0x0dd9, 0x0ddb, 0x0dd9, 0x0dd9, 0x0dd9, 0x0ddf,
+ 0x0de0, 0x0de1, 0x0de2, 0x0de3, 0x0de4, 0x0de5, 0x0de6, 0x0de7,
+ 0x0de8, 0x0de9, 0x0dea, 0x0deb, 0x0dec, 0x0ded, 0x0dee, 0x0def,
+ 0x0df0, 0x0df1, 0x0df2, 0x0df3, 0x0df4, 0x0df5, 0x0df6, 0x0df7,
+ 0x0df8, 0x0df9, 0x0dfa, 0x0dfb, 0x0dfc, 0x0dfd, 0x0dfe, 0x0dff,
+ };
+
+static const unsigned short gNormalizeTable0e00[] = {
+ /* U+0e00 */
+ 0x0e00, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
+ 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
+ 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
+ 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
+ 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
+ 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
+ 0x0e30, 0x0e31, 0x0e32, 0x0e4d, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
+ 0x0e38, 0x0e39, 0x0e3a, 0x0e3b, 0x0e3c, 0x0e3d, 0x0e3e, 0x0e3f,
+ };
+
+static const unsigned short gNormalizeTable0e80[] = {
+ /* U+0e80 */
+ 0x0e80, 0x0e81, 0x0e82, 0x0e83, 0x0e84, 0x0e85, 0x0e86, 0x0e87,
+ 0x0e88, 0x0e89, 0x0e8a, 0x0e8b, 0x0e8c, 0x0e8d, 0x0e8e, 0x0e8f,
+ 0x0e90, 0x0e91, 0x0e92, 0x0e93, 0x0e94, 0x0e95, 0x0e96, 0x0e97,
+ 0x0e98, 0x0e99, 0x0e9a, 0x0e9b, 0x0e9c, 0x0e9d, 0x0e9e, 0x0e9f,
+ 0x0ea0, 0x0ea1, 0x0ea2, 0x0ea3, 0x0ea4, 0x0ea5, 0x0ea6, 0x0ea7,
+ 0x0ea8, 0x0ea9, 0x0eaa, 0x0eab, 0x0eac, 0x0ead, 0x0eae, 0x0eaf,
+ 0x0eb0, 0x0eb1, 0x0eb2, 0x0ecd, 0x0eb4, 0x0eb5, 0x0eb6, 0x0eb7,
+ 0x0eb8, 0x0eb9, 0x0eba, 0x0ebb, 0x0ebc, 0x0ebd, 0x0ebe, 0x0ebf,
+ };
+
+static const unsigned short gNormalizeTable0ec0[] = {
+ /* U+0ec0 */
+ 0x0ec0, 0x0ec1, 0x0ec2, 0x0ec3, 0x0ec4, 0x0ec5, 0x0ec6, 0x0ec7,
+ 0x0ec8, 0x0ec9, 0x0eca, 0x0ecb, 0x0ecc, 0x0ecd, 0x0ece, 0x0ecf,
+ 0x0ed0, 0x0ed1, 0x0ed2, 0x0ed3, 0x0ed4, 0x0ed5, 0x0ed6, 0x0ed7,
+ 0x0ed8, 0x0ed9, 0x0eda, 0x0edb, 0x0eab, 0x0eab, 0x0ede, 0x0edf,
+ 0x0ee0, 0x0ee1, 0x0ee2, 0x0ee3, 0x0ee4, 0x0ee5, 0x0ee6, 0x0ee7,
+ 0x0ee8, 0x0ee9, 0x0eea, 0x0eeb, 0x0eec, 0x0eed, 0x0eee, 0x0eef,
+ 0x0ef0, 0x0ef1, 0x0ef2, 0x0ef3, 0x0ef4, 0x0ef5, 0x0ef6, 0x0ef7,
+ 0x0ef8, 0x0ef9, 0x0efa, 0x0efb, 0x0efc, 0x0efd, 0x0efe, 0x0eff,
+ };
+
+static const unsigned short gNormalizeTable0f00[] = {
+ /* U+0f00 */
+ 0x0f00, 0x0f01, 0x0f02, 0x0f03, 0x0f04, 0x0f05, 0x0f06, 0x0f07,
+ 0x0f08, 0x0f09, 0x0f0a, 0x0f0b, 0x0f0b, 0x0f0d, 0x0f0e, 0x0f0f,
+ 0x0f10, 0x0f11, 0x0f12, 0x0f13, 0x0f14, 0x0f15, 0x0f16, 0x0f17,
+ 0x0f18, 0x0f19, 0x0f1a, 0x0f1b, 0x0f1c, 0x0f1d, 0x0f1e, 0x0f1f,
+ 0x0f20, 0x0f21, 0x0f22, 0x0f23, 0x0f24, 0x0f25, 0x0f26, 0x0f27,
+ 0x0f28, 0x0f29, 0x0f2a, 0x0f2b, 0x0f2c, 0x0f2d, 0x0f2e, 0x0f2f,
+ 0x0f30, 0x0f31, 0x0f32, 0x0f33, 0x0f34, 0x0f35, 0x0f36, 0x0f37,
+ 0x0f38, 0x0f39, 0x0f3a, 0x0f3b, 0x0f3c, 0x0f3d, 0x0f3e, 0x0f3f,
+ };
+
+static const unsigned short gNormalizeTable0f40[] = {
+ /* U+0f40 */
+ 0x0f40, 0x0f41, 0x0f42, 0x0f42, 0x0f44, 0x0f45, 0x0f46, 0x0f47,
+ 0x0f48, 0x0f49, 0x0f4a, 0x0f4b, 0x0f4c, 0x0f4c, 0x0f4e, 0x0f4f,
+ 0x0f50, 0x0f51, 0x0f51, 0x0f53, 0x0f54, 0x0f55, 0x0f56, 0x0f56,
+ 0x0f58, 0x0f59, 0x0f5a, 0x0f5b, 0x0f5b, 0x0f5d, 0x0f5e, 0x0f5f,
+ 0x0f60, 0x0f61, 0x0f62, 0x0f63, 0x0f64, 0x0f65, 0x0f66, 0x0f67,
+ 0x0f68, 0x0f40, 0x0f6a, 0x0f6b, 0x0f6c, 0x0f6d, 0x0f6e, 0x0f6f,
+ 0x0f70, 0x0f71, 0x0f72, 0x0f71, 0x0f74, 0x0f71, 0x0fb2, 0x0fb2,
+ 0x0fb3, 0x0fb3, 0x0f7a, 0x0f7b, 0x0f7c, 0x0f7d, 0x0f7e, 0x0f7f,
+ };
+
+static const unsigned short gNormalizeTable0f80[] = {
+ /* U+0f80 */
+ 0x0f80, 0x0f71, 0x0f82, 0x0f83, 0x0f84, 0x0f85, 0x0f86, 0x0f87,
+ 0x0f88, 0x0f89, 0x0f8a, 0x0f8b, 0x0f8c, 0x0f8d, 0x0f8e, 0x0f8f,
+ 0x0f90, 0x0f91, 0x0f92, 0x0f92, 0x0f94, 0x0f95, 0x0f96, 0x0f97,
+ 0x0f98, 0x0f99, 0x0f9a, 0x0f9b, 0x0f9c, 0x0f9c, 0x0f9e, 0x0f9f,
+ 0x0fa0, 0x0fa1, 0x0fa1, 0x0fa3, 0x0fa4, 0x0fa5, 0x0fa6, 0x0fa6,
+ 0x0fa8, 0x0fa9, 0x0faa, 0x0fab, 0x0fab, 0x0fad, 0x0fae, 0x0faf,
+ 0x0fb0, 0x0fb1, 0x0fb2, 0x0fb3, 0x0fb4, 0x0fb5, 0x0fb6, 0x0fb7,
+ 0x0fb8, 0x0f90, 0x0fba, 0x0fbb, 0x0fbc, 0x0fbd, 0x0fbe, 0x0fbf,
+ };
+
+static const unsigned short gNormalizeTable1000[] = {
+ /* U+1000 */
+ 0x1000, 0x1001, 0x1002, 0x1003, 0x1004, 0x1005, 0x1006, 0x1007,
+ 0x1008, 0x1009, 0x100a, 0x100b, 0x100c, 0x100d, 0x100e, 0x100f,
+ 0x1010, 0x1011, 0x1012, 0x1013, 0x1014, 0x1015, 0x1016, 0x1017,
+ 0x1018, 0x1019, 0x101a, 0x101b, 0x101c, 0x101d, 0x101e, 0x101f,
+ 0x1020, 0x1021, 0x1022, 0x1023, 0x1024, 0x1025, 0x1025, 0x1027,
+ 0x1028, 0x1029, 0x102a, 0x102b, 0x102c, 0x102d, 0x102e, 0x102f,
+ 0x1030, 0x1031, 0x1032, 0x1033, 0x1034, 0x1035, 0x1036, 0x1037,
+ 0x1038, 0x1039, 0x103a, 0x103b, 0x103c, 0x103d, 0x103e, 0x103f,
+ };
+
+static const unsigned short gNormalizeTable1080[] = {
+ /* U+1080 */
+ 0x1080, 0x1081, 0x1082, 0x1083, 0x1084, 0x1085, 0x1086, 0x1087,
+ 0x1088, 0x1089, 0x108a, 0x108b, 0x108c, 0x108d, 0x108e, 0x108f,
+ 0x1090, 0x1091, 0x1092, 0x1093, 0x1094, 0x1095, 0x1096, 0x1097,
+ 0x1098, 0x1099, 0x109a, 0x109b, 0x109c, 0x109d, 0x109e, 0x109f,
+ 0x2d00, 0x2d01, 0x2d02, 0x2d03, 0x2d04, 0x2d05, 0x2d06, 0x2d07,
+ 0x2d08, 0x2d09, 0x2d0a, 0x2d0b, 0x2d0c, 0x2d0d, 0x2d0e, 0x2d0f,
+ 0x2d10, 0x2d11, 0x2d12, 0x2d13, 0x2d14, 0x2d15, 0x2d16, 0x2d17,
+ 0x2d18, 0x2d19, 0x2d1a, 0x2d1b, 0x2d1c, 0x2d1d, 0x2d1e, 0x2d1f,
+ };
+
+static const unsigned short gNormalizeTable10c0[] = {
+ /* U+10c0 */
+ 0x2d20, 0x2d21, 0x2d22, 0x2d23, 0x2d24, 0x2d25, 0x10c6, 0x10c7,
+ 0x10c8, 0x10c9, 0x10ca, 0x10cb, 0x10cc, 0x10cd, 0x10ce, 0x10cf,
+ 0x10d0, 0x10d1, 0x10d2, 0x10d3, 0x10d4, 0x10d5, 0x10d6, 0x10d7,
+ 0x10d8, 0x10d9, 0x10da, 0x10db, 0x10dc, 0x10dd, 0x10de, 0x10df,
+ 0x10e0, 0x10e1, 0x10e2, 0x10e3, 0x10e4, 0x10e5, 0x10e6, 0x10e7,
+ 0x10e8, 0x10e9, 0x10ea, 0x10eb, 0x10ec, 0x10ed, 0x10ee, 0x10ef,
+ 0x10f0, 0x10f1, 0x10f2, 0x10f3, 0x10f4, 0x10f5, 0x10f6, 0x10f7,
+ 0x10f8, 0x10f9, 0x10fa, 0x10fb, 0x10dc, 0x10fd, 0x10fe, 0x10ff,
+ };
+
+static const unsigned short gNormalizeTable1140[] = {
+ /* U+1140 */
+ 0x1140, 0x1141, 0x1142, 0x1143, 0x1144, 0x1145, 0x1146, 0x1147,
+ 0x1148, 0x1149, 0x114a, 0x114b, 0x114c, 0x114d, 0x114e, 0x114f,
+ 0x1150, 0x1151, 0x1152, 0x1153, 0x1154, 0x1155, 0x1156, 0x1157,
+ 0x1158, 0x1159, 0x115a, 0x115b, 0x115c, 0x115d, 0x115e, 0x0020,
+ 0x0020, 0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167,
+ 0x1168, 0x1169, 0x116a, 0x116b, 0x116c, 0x116d, 0x116e, 0x116f,
+ 0x1170, 0x1171, 0x1172, 0x1173, 0x1174, 0x1175, 0x1176, 0x1177,
+ 0x1178, 0x1179, 0x117a, 0x117b, 0x117c, 0x117d, 0x117e, 0x117f,
+ };
+
+static const unsigned short gNormalizeTable1780[] = {
+ /* U+1780 */
+ 0x1780, 0x1781, 0x1782, 0x1783, 0x1784, 0x1785, 0x1786, 0x1787,
+ 0x1788, 0x1789, 0x178a, 0x178b, 0x178c, 0x178d, 0x178e, 0x178f,
+ 0x1790, 0x1791, 0x1792, 0x1793, 0x1794, 0x1795, 0x1796, 0x1797,
+ 0x1798, 0x1799, 0x179a, 0x179b, 0x179c, 0x179d, 0x179e, 0x179f,
+ 0x17a0, 0x17a1, 0x17a2, 0x17a3, 0x17a4, 0x17a5, 0x17a6, 0x17a7,
+ 0x17a8, 0x17a9, 0x17aa, 0x17ab, 0x17ac, 0x17ad, 0x17ae, 0x17af,
+ 0x17b0, 0x17b1, 0x17b2, 0x17b3, 0x0020, 0x0020, 0x17b6, 0x17b7,
+ 0x17b8, 0x17b9, 0x17ba, 0x17bb, 0x17bc, 0x17bd, 0x17be, 0x17bf,
+ };
+
+static const unsigned short gNormalizeTable1800[] = {
+ /* U+1800 */
+ 0x1800, 0x1801, 0x1802, 0x1803, 0x1804, 0x1805, 0x1806, 0x1807,
+ 0x1808, 0x1809, 0x180a, 0x0020, 0x0020, 0x0020, 0x180e, 0x180f,
+ 0x1810, 0x1811, 0x1812, 0x1813, 0x1814, 0x1815, 0x1816, 0x1817,
+ 0x1818, 0x1819, 0x181a, 0x181b, 0x181c, 0x181d, 0x181e, 0x181f,
+ 0x1820, 0x1821, 0x1822, 0x1823, 0x1824, 0x1825, 0x1826, 0x1827,
+ 0x1828, 0x1829, 0x182a, 0x182b, 0x182c, 0x182d, 0x182e, 0x182f,
+ 0x1830, 0x1831, 0x1832, 0x1833, 0x1834, 0x1835, 0x1836, 0x1837,
+ 0x1838, 0x1839, 0x183a, 0x183b, 0x183c, 0x183d, 0x183e, 0x183f,
+ };
+
+static const unsigned short gNormalizeTable1b00[] = {
+ /* U+1b00 */
+ 0x1b00, 0x1b01, 0x1b02, 0x1b03, 0x1b04, 0x1b05, 0x1b05, 0x1b07,
+ 0x1b07, 0x1b09, 0x1b09, 0x1b0b, 0x1b0b, 0x1b0d, 0x1b0d, 0x1b0f,
+ 0x1b10, 0x1b11, 0x1b11, 0x1b13, 0x1b14, 0x1b15, 0x1b16, 0x1b17,
+ 0x1b18, 0x1b19, 0x1b1a, 0x1b1b, 0x1b1c, 0x1b1d, 0x1b1e, 0x1b1f,
+ 0x1b20, 0x1b21, 0x1b22, 0x1b23, 0x1b24, 0x1b25, 0x1b26, 0x1b27,
+ 0x1b28, 0x1b29, 0x1b2a, 0x1b2b, 0x1b2c, 0x1b2d, 0x1b2e, 0x1b2f,
+ 0x1b30, 0x1b31, 0x1b32, 0x1b33, 0x1b34, 0x1b35, 0x1b36, 0x1b37,
+ 0x1b38, 0x1b39, 0x1b3a, 0x1b3a, 0x1b3c, 0x1b3c, 0x1b3e, 0x1b3f,
+ };
+
+static const unsigned short gNormalizeTable1b40[] = {
+ /* U+1b40 */
+ 0x1b3e, 0x1b3f, 0x1b42, 0x1b42, 0x1b44, 0x1b45, 0x1b46, 0x1b47,
+ 0x1b48, 0x1b49, 0x1b4a, 0x1b4b, 0x1b4c, 0x1b4d, 0x1b4e, 0x1b4f,
+ 0x1b50, 0x1b51, 0x1b52, 0x1b53, 0x1b54, 0x1b55, 0x1b56, 0x1b57,
+ 0x1b58, 0x1b59, 0x1b5a, 0x1b5b, 0x1b5c, 0x1b5d, 0x1b5e, 0x1b5f,
+ 0x1b60, 0x1b61, 0x1b62, 0x1b63, 0x1b64, 0x1b65, 0x1b66, 0x1b67,
+ 0x1b68, 0x1b69, 0x1b6a, 0x1b6b, 0x1b6c, 0x1b6d, 0x1b6e, 0x1b6f,
+ 0x1b70, 0x1b71, 0x1b72, 0x1b73, 0x1b74, 0x1b75, 0x1b76, 0x1b77,
+ 0x1b78, 0x1b79, 0x1b7a, 0x1b7b, 0x1b7c, 0x1b7d, 0x1b7e, 0x1b7f,
+ };
+
+static const unsigned short gNormalizeTable1d00[] = {
+ /* U+1d00 */
+ 0x1d00, 0x1d01, 0x1d02, 0x1d03, 0x1d04, 0x1d05, 0x1d06, 0x1d07,
+ 0x1d08, 0x1d09, 0x1d0a, 0x1d0b, 0x1d0c, 0x1d0d, 0x1d0e, 0x1d0f,
+ 0x1d10, 0x1d11, 0x1d12, 0x1d13, 0x1d14, 0x1d15, 0x1d16, 0x1d17,
+ 0x1d18, 0x1d19, 0x1d1a, 0x1d1b, 0x1d1c, 0x1d1d, 0x1d1e, 0x1d1f,
+ 0x1d20, 0x1d21, 0x1d22, 0x1d23, 0x1d24, 0x1d25, 0x1d26, 0x1d27,
+ 0x1d28, 0x1d29, 0x1d2a, 0x1d2b, 0x0061, 0x00e6, 0x0062, 0x1d2f,
+ 0x0064, 0x0065, 0x01dd, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,
+ 0x006c, 0x006d, 0x006e, 0x1d3b, 0x006f, 0x0223, 0x0070, 0x0072,
+ };
+
+static const unsigned short gNormalizeTable1d40[] = {
+ /* U+1d40 */
+ 0x0074, 0x0075, 0x0077, 0x0061, 0x0250, 0x0251, 0x1d02, 0x0062,
+ 0x0064, 0x0065, 0x0259, 0x025b, 0x025c, 0x0067, 0x1d4e, 0x006b,
+ 0x006d, 0x014b, 0x006f, 0x0254, 0x1d16, 0x1d17, 0x0070, 0x0074,
+ 0x0075, 0x1d1d, 0x026f, 0x0076, 0x1d25, 0x03b2, 0x03b3, 0x03b4,
+ 0x03c6, 0x03c7, 0x0069, 0x0072, 0x0075, 0x0076, 0x03b2, 0x03b3,
+ 0x03c1, 0x03c6, 0x03c7, 0x1d6b, 0x1d6c, 0x1d6d, 0x1d6e, 0x1d6f,
+ 0x1d70, 0x1d71, 0x1d72, 0x1d73, 0x1d74, 0x1d75, 0x1d76, 0x1d77,
+ 0x043d, 0x1d79, 0x1d7a, 0x1d7b, 0x1d7c, 0x1d7d, 0x1d7e, 0x1d7f,
+ };
+
+static const unsigned short gNormalizeTable1d80[] = {
+ /* U+1d80 */
+ 0x1d80, 0x1d81, 0x1d82, 0x1d83, 0x1d84, 0x1d85, 0x1d86, 0x1d87,
+ 0x1d88, 0x1d89, 0x1d8a, 0x1d8b, 0x1d8c, 0x1d8d, 0x1d8e, 0x1d8f,
+ 0x1d90, 0x1d91, 0x1d92, 0x1d93, 0x1d94, 0x1d95, 0x1d96, 0x1d97,
+ 0x1d98, 0x1d99, 0x1d9a, 0x0252, 0x0063, 0x0255, 0x00f0, 0x025c,
+ 0x0066, 0x025f, 0x0261, 0x0265, 0x0268, 0x0269, 0x026a, 0x1d7b,
+ 0x029d, 0x026d, 0x1d85, 0x029f, 0x0271, 0x0270, 0x0272, 0x0273,
+ 0x0274, 0x0275, 0x0278, 0x0282, 0x0283, 0x01ab, 0x0289, 0x028a,
+ 0x1d1c, 0x028b, 0x028c, 0x007a, 0x0290, 0x0291, 0x0292, 0x03b8,
+ };
+
+static const unsigned short gNormalizeTable1e00[] = {
+ /* U+1e00 */
+ 0x0061, 0x0061, 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x0062,
+ 0x0063, 0x0063, 0x0064, 0x0064, 0x0064, 0x0064, 0x0064, 0x0064,
+ 0x0064, 0x0064, 0x0064, 0x0064, 0x0065, 0x0065, 0x0065, 0x0065,
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0066, 0x0066,
+ 0x0067, 0x0067, 0x0068, 0x0068, 0x0068, 0x0068, 0x0068, 0x0068,
+ 0x0068, 0x0068, 0x0068, 0x0068, 0x0069, 0x0069, 0x0069, 0x0069,
+ 0x006b, 0x006b, 0x006b, 0x006b, 0x006b, 0x006b, 0x006c, 0x006c,
+ 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006d, 0x006d,
+ };
+
+static const unsigned short gNormalizeTable1e40[] = {
+ /* U+1e40 */
+ 0x006d, 0x006d, 0x006d, 0x006d, 0x006e, 0x006e, 0x006e, 0x006e,
+ 0x006e, 0x006e, 0x006e, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f,
+ 0x006f, 0x006f, 0x006f, 0x006f, 0x0070, 0x0070, 0x0070, 0x0070,
+ 0x0072, 0x0072, 0x0072, 0x0072, 0x0072, 0x0072, 0x0072, 0x0072,
+ 0x0073, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073,
+ 0x0073, 0x0073, 0x0074, 0x0074, 0x0074, 0x0074, 0x0074, 0x0074,
+ 0x0074, 0x0074, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075,
+ 0x0075, 0x0075, 0x0075, 0x0075, 0x0076, 0x0076, 0x0076, 0x0076,
+ };
+
+static const unsigned short gNormalizeTable1e80[] = {
+ /* U+1e80 */
+ 0x0077, 0x0077, 0x0077, 0x0077, 0x0077, 0x0077, 0x0077, 0x0077,
+ 0x0077, 0x0077, 0x0078, 0x0078, 0x0078, 0x0078, 0x0079, 0x0079,
+ 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x0068, 0x0074,
+ 0x0077, 0x0079, 0x0061, 0x0073, 0x1e9c, 0x1e9d, 0x0073, 0x1e9f,
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061,
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061,
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061,
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065,
+ };
+
+static const unsigned short gNormalizeTable1ec0[] = {
+ /* U+1ec0 */
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065,
+ 0x0069, 0x0069, 0x0069, 0x0069, 0x006f, 0x006f, 0x006f, 0x006f,
+ 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f,
+ 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f,
+ 0x006f, 0x006f, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075,
+ 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075,
+ 0x0075, 0x0075, 0x0079, 0x0079, 0x0079, 0x0079, 0x0079, 0x0079,
+ 0x0079, 0x0079, 0x1efb, 0x1efb, 0x1efd, 0x1efd, 0x1eff, 0x1eff,
+ };
+
+static const unsigned short gNormalizeTable1f00[] = {
+ /* U+1f00 */
+ 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1,
+ 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1,
+ 0x03b5, 0x03b5, 0x03b5, 0x03b5, 0x03b5, 0x03b5, 0x1f16, 0x1f17,
+ 0x03b5, 0x03b5, 0x03b5, 0x03b5, 0x03b5, 0x03b5, 0x1f1e, 0x1f1f,
+ 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7,
+ 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7,
+ 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9,
+ 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x03b9,
+ };
+
+static const unsigned short gNormalizeTable1f40[] = {
+ /* U+1f40 */
+ 0x03bf, 0x03bf, 0x03bf, 0x03bf, 0x03bf, 0x03bf, 0x1f46, 0x1f47,
+ 0x03bf, 0x03bf, 0x03bf, 0x03bf, 0x03bf, 0x03bf, 0x1f4e, 0x1f4f,
+ 0x03c5, 0x03c5, 0x03c5, 0x03c5, 0x03c5, 0x03c5, 0x03c5, 0x03c5,
+ 0x1f58, 0x03c5, 0x1f5a, 0x03c5, 0x1f5c, 0x03c5, 0x1f5e, 0x03c5,
+ 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9,
+ 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9,
+ 0x03b1, 0x03b1, 0x03b5, 0x03b5, 0x03b7, 0x03b7, 0x03b9, 0x03b9,
+ 0x03bf, 0x03bf, 0x03c5, 0x03c5, 0x03c9, 0x03c9, 0x1f7e, 0x1f7f,
+ };
+
+static const unsigned short gNormalizeTable1f80[] = {
+ /* U+1f80 */
+ 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1,
+ 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1,
+ 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7,
+ 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7, 0x03b7,
+ 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9,
+ 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9, 0x03c9,
+ 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x1fb5, 0x03b1, 0x03b1,
+ 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x03b1, 0x0020, 0x03b9, 0x0020,
+ };
+
+static const unsigned short gNormalizeTable1fc0[] = {
+ /* U+1fc0 */
+ 0x0020, 0x0020, 0x03b7, 0x03b7, 0x03b7, 0x1fc5, 0x03b7, 0x03b7,
+ 0x03b5, 0x03b5, 0x03b7, 0x03b7, 0x03b7, 0x0020, 0x0020, 0x0020,
+ 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x1fd4, 0x1fd5, 0x03b9, 0x03b9,
+ 0x03b9, 0x03b9, 0x03b9, 0x03b9, 0x1fdc, 0x0020, 0x0020, 0x0020,
+ 0x03c5, 0x03c5, 0x03c5, 0x03c5, 0x03c1, 0x03c1, 0x03c5, 0x03c5,
+ 0x03c5, 0x03c5, 0x03c5, 0x03c5, 0x03c1, 0x0020, 0x0020, 0x0060,
+ 0x1ff0, 0x1ff1, 0x03c9, 0x03c9, 0x03c9, 0x1ff5, 0x03c9, 0x03c9,
+ 0x03bf, 0x03bf, 0x03c9, 0x03c9, 0x03c9, 0x0020, 0x0020, 0x1fff,
+ };
+
+static const unsigned short gNormalizeTable2000[] = {
+ /* U+2000 */
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x2010, 0x2010, 0x2012, 0x2013, 0x2014, 0x2015, 0x2016, 0x0020,
+ 0x2018, 0x2019, 0x201a, 0x201b, 0x201c, 0x201d, 0x201e, 0x201f,
+ 0x2020, 0x2021, 0x2022, 0x2023, 0x002e, 0x002e, 0x002e, 0x2027,
+ 0x2028, 0x2029, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x2030, 0x2031, 0x2032, 0x2032, 0x2032, 0x2035, 0x2035, 0x2035,
+ 0x2038, 0x2039, 0x203a, 0x203b, 0x0021, 0x203d, 0x0020, 0x203f,
+ };
+
+static const unsigned short gNormalizeTable2040[] = {
+ /* U+2040 */
+ 0x2040, 0x2041, 0x2042, 0x2043, 0x2044, 0x2045, 0x2046, 0x003f,
+ 0x003f, 0x0021, 0x204a, 0x204b, 0x204c, 0x204d, 0x204e, 0x204f,
+ 0x2050, 0x2051, 0x2052, 0x2053, 0x2054, 0x2055, 0x2056, 0x2032,
+ 0x2058, 0x2059, 0x205a, 0x205b, 0x205c, 0x205d, 0x205e, 0x0020,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x0030, 0x0069, 0x2072, 0x2073, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x002b, 0x2212, 0x003d, 0x0028, 0x0029, 0x006e,
+ };
+
+static const unsigned short gNormalizeTable2080[] = {
+ /* U+2080 */
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x002b, 0x2212, 0x003d, 0x0028, 0x0029, 0x208f,
+ 0x0061, 0x0065, 0x006f, 0x0078, 0x0259, 0x2095, 0x2096, 0x2097,
+ 0x2098, 0x2099, 0x209a, 0x209b, 0x209c, 0x209d, 0x209e, 0x209f,
+ 0x20a0, 0x20a1, 0x20a2, 0x20a3, 0x20a4, 0x20a5, 0x20a6, 0x20a7,
+ 0x0072, 0x20a9, 0x20aa, 0x20ab, 0x20ac, 0x20ad, 0x20ae, 0x20af,
+ 0x20b0, 0x20b1, 0x20b2, 0x20b3, 0x20b4, 0x20b5, 0x20b6, 0x20b7,
+ 0x20b8, 0x20b9, 0x20ba, 0x20bb, 0x20bc, 0x20bd, 0x20be, 0x20bf,
+ };
+
+static const unsigned short gNormalizeTable2100[] = {
+ /* U+2100 */
+ 0x0061, 0x0061, 0x0063, 0x00b0, 0x2104, 0x0063, 0x0063, 0x025b,
+ 0x2108, 0x00b0, 0x0067, 0x0068, 0x0068, 0x0068, 0x0068, 0x0127,
+ 0x0069, 0x0069, 0x006c, 0x006c, 0x2114, 0x006e, 0x006e, 0x2117,
+ 0x2118, 0x0070, 0x0071, 0x0072, 0x0072, 0x0072, 0x211e, 0x211f,
+ 0x0073, 0x0074, 0x0074, 0x2123, 0x007a, 0x2125, 0x03c9, 0x2127,
+ 0x007a, 0x2129, 0x006b, 0x0061, 0x0062, 0x0063, 0x212e, 0x0065,
+ 0x0065, 0x0066, 0x214e, 0x006d, 0x006f, 0x05d0, 0x05d1, 0x05d2,
+ 0x05d3, 0x0069, 0x213a, 0x0066, 0x03c0, 0x03b3, 0x03b3, 0x03c0,
+ };
+
+static const unsigned short gNormalizeTable2140[] = {
+ /* U+2140 */
+ 0x2211, 0x2141, 0x2142, 0x2143, 0x2144, 0x0064, 0x0064, 0x0065,
+ 0x0069, 0x006a, 0x214a, 0x214b, 0x214c, 0x214d, 0x214e, 0x214f,
+ 0x0031, 0x0031, 0x0031, 0x0031, 0x0032, 0x0031, 0x0032, 0x0033,
+ 0x0034, 0x0031, 0x0035, 0x0031, 0x0033, 0x0035, 0x0037, 0x0031,
+ 0x0069, 0x0069, 0x0069, 0x0069, 0x0076, 0x0076, 0x0076, 0x0076,
+ 0x0069, 0x0078, 0x0078, 0x0078, 0x006c, 0x0063, 0x0064, 0x006d,
+ 0x0069, 0x0069, 0x0069, 0x0069, 0x0076, 0x0076, 0x0076, 0x0076,
+ 0x0069, 0x0078, 0x0078, 0x0078, 0x006c, 0x0063, 0x0064, 0x006d,
+ };
+
+static const unsigned short gNormalizeTable2180[] = {
+ /* U+2180 */
+ 0x2180, 0x2181, 0x2182, 0x2184, 0x2184, 0x2185, 0x2186, 0x2187,
+ 0x2188, 0x0030, 0x218a, 0x218b, 0x218c, 0x218d, 0x218e, 0x218f,
+ 0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197,
+ 0x2198, 0x2199, 0x2190, 0x2192, 0x219c, 0x219d, 0x219e, 0x219f,
+ 0x21a0, 0x21a1, 0x21a2, 0x21a3, 0x21a4, 0x21a5, 0x21a6, 0x21a7,
+ 0x21a8, 0x21a9, 0x21aa, 0x21ab, 0x21ac, 0x21ad, 0x2194, 0x21af,
+ 0x21b0, 0x21b1, 0x21b2, 0x21b3, 0x21b4, 0x21b5, 0x21b6, 0x21b7,
+ 0x21b8, 0x21b9, 0x21ba, 0x21bb, 0x21bc, 0x21bd, 0x21be, 0x21bf,
+ };
+
+static const unsigned short gNormalizeTable21c0[] = {
+ /* U+21c0 */
+ 0x21c0, 0x21c1, 0x21c2, 0x21c3, 0x21c4, 0x21c5, 0x21c6, 0x21c7,
+ 0x21c8, 0x21c9, 0x21ca, 0x21cb, 0x21cc, 0x21d0, 0x21d4, 0x21d2,
+ 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x21d5, 0x21d6, 0x21d7,
+ 0x21d8, 0x21d9, 0x21da, 0x21db, 0x21dc, 0x21dd, 0x21de, 0x21df,
+ 0x21e0, 0x21e1, 0x21e2, 0x21e3, 0x21e4, 0x21e5, 0x21e6, 0x21e7,
+ 0x21e8, 0x21e9, 0x21ea, 0x21eb, 0x21ec, 0x21ed, 0x21ee, 0x21ef,
+ 0x21f0, 0x21f1, 0x21f2, 0x21f3, 0x21f4, 0x21f5, 0x21f6, 0x21f7,
+ 0x21f8, 0x21f9, 0x21fa, 0x21fb, 0x21fc, 0x21fd, 0x21fe, 0x21ff,
+ };
+
+static const unsigned short gNormalizeTable2200[] = {
+ /* U+2200 */
+ 0x2200, 0x2201, 0x2202, 0x2203, 0x2203, 0x2205, 0x2206, 0x2207,
+ 0x2208, 0x2208, 0x220a, 0x220b, 0x220b, 0x220d, 0x220e, 0x220f,
+ 0x2210, 0x2211, 0x2212, 0x2213, 0x2214, 0x2215, 0x2216, 0x2217,
+ 0x2218, 0x2219, 0x221a, 0x221b, 0x221c, 0x221d, 0x221e, 0x221f,
+ 0x2220, 0x2221, 0x2222, 0x2223, 0x2223, 0x2225, 0x2225, 0x2227,
+ 0x2228, 0x2229, 0x222a, 0x222b, 0x222b, 0x222b, 0x222e, 0x222e,
+ 0x222e, 0x2231, 0x2232, 0x2233, 0x2234, 0x2235, 0x2236, 0x2237,
+ 0x2238, 0x2239, 0x223a, 0x223b, 0x223c, 0x223d, 0x223e, 0x223f,
+ };
+
+static const unsigned short gNormalizeTable2240[] = {
+ /* U+2240 */
+ 0x2240, 0x223c, 0x2242, 0x2243, 0x2243, 0x2245, 0x2246, 0x2245,
+ 0x2248, 0x2248, 0x224a, 0x224b, 0x224c, 0x224d, 0x224e, 0x224f,
+ 0x2250, 0x2251, 0x2252, 0x2253, 0x2254, 0x2255, 0x2256, 0x2257,
+ 0x2258, 0x2259, 0x225a, 0x225b, 0x225c, 0x225d, 0x225e, 0x225f,
+ 0x003d, 0x2261, 0x2261, 0x2263, 0x2264, 0x2265, 0x2266, 0x2267,
+ 0x2268, 0x2269, 0x226a, 0x226b, 0x226c, 0x224d, 0x003c, 0x003e,
+ 0x2264, 0x2265, 0x2272, 0x2273, 0x2272, 0x2273, 0x2276, 0x2277,
+ 0x2276, 0x2277, 0x227a, 0x227b, 0x227c, 0x227d, 0x227e, 0x227f,
+ };
+
+static const unsigned short gNormalizeTable2280[] = {
+ /* U+2280 */
+ 0x227a, 0x227b, 0x2282, 0x2283, 0x2282, 0x2283, 0x2286, 0x2287,
+ 0x2286, 0x2287, 0x228a, 0x228b, 0x228c, 0x228d, 0x228e, 0x228f,
+ 0x2290, 0x2291, 0x2292, 0x2293, 0x2294, 0x2295, 0x2296, 0x2297,
+ 0x2298, 0x2299, 0x229a, 0x229b, 0x229c, 0x229d, 0x229e, 0x229f,
+ 0x22a0, 0x22a1, 0x22a2, 0x22a3, 0x22a4, 0x22a5, 0x22a6, 0x22a7,
+ 0x22a8, 0x22a9, 0x22aa, 0x22ab, 0x22a2, 0x22a8, 0x22a9, 0x22ab,
+ 0x22b0, 0x22b1, 0x22b2, 0x22b3, 0x22b4, 0x22b5, 0x22b6, 0x22b7,
+ 0x22b8, 0x22b9, 0x22ba, 0x22bb, 0x22bc, 0x22bd, 0x22be, 0x22bf,
+ };
+
+static const unsigned short gNormalizeTable22c0[] = {
+ /* U+22c0 */
+ 0x22c0, 0x22c1, 0x22c2, 0x22c3, 0x22c4, 0x22c5, 0x22c6, 0x22c7,
+ 0x22c8, 0x22c9, 0x22ca, 0x22cb, 0x22cc, 0x22cd, 0x22ce, 0x22cf,
+ 0x22d0, 0x22d1, 0x22d2, 0x22d3, 0x22d4, 0x22d5, 0x22d6, 0x22d7,
+ 0x22d8, 0x22d9, 0x22da, 0x22db, 0x22dc, 0x22dd, 0x22de, 0x22df,
+ 0x227c, 0x227d, 0x2291, 0x2292, 0x22e4, 0x22e5, 0x22e6, 0x22e7,
+ 0x22e8, 0x22e9, 0x22b2, 0x22b3, 0x22b4, 0x22b5, 0x22ee, 0x22ef,
+ 0x22f0, 0x22f1, 0x22f2, 0x22f3, 0x22f4, 0x22f5, 0x22f6, 0x22f7,
+ 0x22f8, 0x22f9, 0x22fa, 0x22fb, 0x22fc, 0x22fd, 0x22fe, 0x22ff,
+ };
+
+static const unsigned short gNormalizeTable2300[] = {
+ /* U+2300 */
+ 0x2300, 0x2301, 0x2302, 0x2303, 0x2304, 0x2305, 0x2306, 0x2307,
+ 0x2308, 0x2309, 0x230a, 0x230b, 0x230c, 0x230d, 0x230e, 0x230f,
+ 0x2310, 0x2311, 0x2312, 0x2313, 0x2314, 0x2315, 0x2316, 0x2317,
+ 0x2318, 0x2319, 0x231a, 0x231b, 0x231c, 0x231d, 0x231e, 0x231f,
+ 0x2320, 0x2321, 0x2322, 0x2323, 0x2324, 0x2325, 0x2326, 0x2327,
+ 0x2328, 0x3008, 0x3009, 0x232b, 0x232c, 0x232d, 0x232e, 0x232f,
+ 0x2330, 0x2331, 0x2332, 0x2333, 0x2334, 0x2335, 0x2336, 0x2337,
+ 0x2338, 0x2339, 0x233a, 0x233b, 0x233c, 0x233d, 0x233e, 0x233f,
+ };
+
+static const unsigned short gNormalizeTable2440[] = {
+ /* U+2440 */
+ 0x2440, 0x2441, 0x2442, 0x2443, 0x2444, 0x2445, 0x2446, 0x2447,
+ 0x2448, 0x2449, 0x244a, 0x244b, 0x244c, 0x244d, 0x244e, 0x244f,
+ 0x2450, 0x2451, 0x2452, 0x2453, 0x2454, 0x2455, 0x2456, 0x2457,
+ 0x2458, 0x2459, 0x245a, 0x245b, 0x245c, 0x245d, 0x245e, 0x245f,
+ 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038,
+ 0x0039, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031,
+ 0x0031, 0x0031, 0x0031, 0x0032, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ };
+
+static const unsigned short gNormalizeTable2480[] = {
+ /* U+2480 */
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038,
+ 0x0039, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031,
+ 0x0031, 0x0031, 0x0031, 0x0032, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0061, 0x0062,
+ 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a,
+ };
+
+static const unsigned short gNormalizeTable24c0[] = {
+ /* U+24c0 */
+ 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072,
+ 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a,
+ 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068,
+ 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
+ 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078,
+ 0x0079, 0x007a, 0x0030, 0x24eb, 0x24ec, 0x24ed, 0x24ee, 0x24ef,
+ 0x24f0, 0x24f1, 0x24f2, 0x24f3, 0x24f4, 0x24f5, 0x24f6, 0x24f7,
+ 0x24f8, 0x24f9, 0x24fa, 0x24fb, 0x24fc, 0x24fd, 0x24fe, 0x24ff,
+ };
+
+static const unsigned short gNormalizeTable2a00[] = {
+ /* U+2a00 */
+ 0x2a00, 0x2a01, 0x2a02, 0x2a03, 0x2a04, 0x2a05, 0x2a06, 0x2a07,
+ 0x2a08, 0x2a09, 0x2a0a, 0x2a0b, 0x222b, 0x2a0d, 0x2a0e, 0x2a0f,
+ 0x2a10, 0x2a11, 0x2a12, 0x2a13, 0x2a14, 0x2a15, 0x2a16, 0x2a17,
+ 0x2a18, 0x2a19, 0x2a1a, 0x2a1b, 0x2a1c, 0x2a1d, 0x2a1e, 0x2a1f,
+ 0x2a20, 0x2a21, 0x2a22, 0x2a23, 0x2a24, 0x2a25, 0x2a26, 0x2a27,
+ 0x2a28, 0x2a29, 0x2a2a, 0x2a2b, 0x2a2c, 0x2a2d, 0x2a2e, 0x2a2f,
+ 0x2a30, 0x2a31, 0x2a32, 0x2a33, 0x2a34, 0x2a35, 0x2a36, 0x2a37,
+ 0x2a38, 0x2a39, 0x2a3a, 0x2a3b, 0x2a3c, 0x2a3d, 0x2a3e, 0x2a3f,
+ };
+
+static const unsigned short gNormalizeTable2a40[] = {
+ /* U+2a40 */
+ 0x2a40, 0x2a41, 0x2a42, 0x2a43, 0x2a44, 0x2a45, 0x2a46, 0x2a47,
+ 0x2a48, 0x2a49, 0x2a4a, 0x2a4b, 0x2a4c, 0x2a4d, 0x2a4e, 0x2a4f,
+ 0x2a50, 0x2a51, 0x2a52, 0x2a53, 0x2a54, 0x2a55, 0x2a56, 0x2a57,
+ 0x2a58, 0x2a59, 0x2a5a, 0x2a5b, 0x2a5c, 0x2a5d, 0x2a5e, 0x2a5f,
+ 0x2a60, 0x2a61, 0x2a62, 0x2a63, 0x2a64, 0x2a65, 0x2a66, 0x2a67,
+ 0x2a68, 0x2a69, 0x2a6a, 0x2a6b, 0x2a6c, 0x2a6d, 0x2a6e, 0x2a6f,
+ 0x2a70, 0x2a71, 0x2a72, 0x2a73, 0x003a, 0x003d, 0x003d, 0x2a77,
+ 0x2a78, 0x2a79, 0x2a7a, 0x2a7b, 0x2a7c, 0x2a7d, 0x2a7e, 0x2a7f,
+ };
+
+static const unsigned short gNormalizeTable2ac0[] = {
+ /* U+2ac0 */
+ 0x2ac0, 0x2ac1, 0x2ac2, 0x2ac3, 0x2ac4, 0x2ac5, 0x2ac6, 0x2ac7,
+ 0x2ac8, 0x2ac9, 0x2aca, 0x2acb, 0x2acc, 0x2acd, 0x2ace, 0x2acf,
+ 0x2ad0, 0x2ad1, 0x2ad2, 0x2ad3, 0x2ad4, 0x2ad5, 0x2ad6, 0x2ad7,
+ 0x2ad8, 0x2ad9, 0x2ada, 0x2adb, 0x2add, 0x2add, 0x2ade, 0x2adf,
+ 0x2ae0, 0x2ae1, 0x2ae2, 0x2ae3, 0x2ae4, 0x2ae5, 0x2ae6, 0x2ae7,
+ 0x2ae8, 0x2ae9, 0x2aea, 0x2aeb, 0x2aec, 0x2aed, 0x2aee, 0x2aef,
+ 0x2af0, 0x2af1, 0x2af2, 0x2af3, 0x2af4, 0x2af5, 0x2af6, 0x2af7,
+ 0x2af8, 0x2af9, 0x2afa, 0x2afb, 0x2afc, 0x2afd, 0x2afe, 0x2aff,
+ };
+
+static const unsigned short gNormalizeTable2c00[] = {
+ /* U+2c00 */
+ 0x2c30, 0x2c31, 0x2c32, 0x2c33, 0x2c34, 0x2c35, 0x2c36, 0x2c37,
+ 0x2c38, 0x2c39, 0x2c3a, 0x2c3b, 0x2c3c, 0x2c3d, 0x2c3e, 0x2c3f,
+ 0x2c40, 0x2c41, 0x2c42, 0x2c43, 0x2c44, 0x2c45, 0x2c46, 0x2c47,
+ 0x2c48, 0x2c49, 0x2c4a, 0x2c4b, 0x2c4c, 0x2c4d, 0x2c4e, 0x2c4f,
+ 0x2c50, 0x2c51, 0x2c52, 0x2c53, 0x2c54, 0x2c55, 0x2c56, 0x2c57,
+ 0x2c58, 0x2c59, 0x2c5a, 0x2c5b, 0x2c5c, 0x2c5d, 0x2c5e, 0x2c2f,
+ 0x2c30, 0x2c31, 0x2c32, 0x2c33, 0x2c34, 0x2c35, 0x2c36, 0x2c37,
+ 0x2c38, 0x2c39, 0x2c3a, 0x2c3b, 0x2c3c, 0x2c3d, 0x2c3e, 0x2c3f,
+ };
+
+static const unsigned short gNormalizeTable2c40[] = {
+ /* U+2c40 */
+ 0x2c40, 0x2c41, 0x2c42, 0x2c43, 0x2c44, 0x2c45, 0x2c46, 0x2c47,
+ 0x2c48, 0x2c49, 0x2c4a, 0x2c4b, 0x2c4c, 0x2c4d, 0x2c4e, 0x2c4f,
+ 0x2c50, 0x2c51, 0x2c52, 0x2c53, 0x2c54, 0x2c55, 0x2c56, 0x2c57,
+ 0x2c58, 0x2c59, 0x2c5a, 0x2c5b, 0x2c5c, 0x2c5d, 0x2c5e, 0x2c5f,
+ 0x2c61, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c65, 0x2c66, 0x2c68,
+ 0x2c68, 0x2c6a, 0x2c6a, 0x2c6c, 0x2c6c, 0x0251, 0x0271, 0x0250,
+ 0x0252, 0x2c71, 0x2c73, 0x2c73, 0x2c74, 0x2c76, 0x2c76, 0x2c77,
+ 0x2c78, 0x2c79, 0x2c7a, 0x2c7b, 0x006a, 0x0076, 0x023f, 0x0240,
+ };
+
+static const unsigned short gNormalizeTable2c80[] = {
+ /* U+2c80 */
+ 0x2c81, 0x2c81, 0x2c83, 0x2c83, 0x2c85, 0x2c85, 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89, 0x2c8b, 0x2c8b, 0x2c8d, 0x2c8d, 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91, 0x2c93, 0x2c93, 0x2c95, 0x2c95, 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99, 0x2c9b, 0x2c9b, 0x2c9d, 0x2c9d, 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1, 0x2ca3, 0x2ca3, 0x2ca5, 0x2ca5, 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9, 0x2cab, 0x2cab, 0x2cad, 0x2cad, 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1, 0x2cb3, 0x2cb3, 0x2cb5, 0x2cb5, 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9, 0x2cbb, 0x2cbb, 0x2cbd, 0x2cbd, 0x2cbf, 0x2cbf,
+ };
+
+static const unsigned short gNormalizeTable2cc0[] = {
+ /* U+2cc0 */
+ 0x2cc1, 0x2cc1, 0x2cc3, 0x2cc3, 0x2cc5, 0x2cc5, 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9, 0x2ccb, 0x2ccb, 0x2ccd, 0x2ccd, 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1, 0x2cd3, 0x2cd3, 0x2cd5, 0x2cd5, 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9, 0x2cdb, 0x2cdb, 0x2cdd, 0x2cdd, 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1, 0x2ce3, 0x2ce3, 0x2ce4, 0x2ce5, 0x2ce6, 0x2ce7,
+ 0x2ce8, 0x2ce9, 0x2cea, 0x2cec, 0x2cec, 0x2cee, 0x2cee, 0x2cef,
+ 0x2cf0, 0x2cf1, 0x2cf2, 0x2cf3, 0x2cf4, 0x2cf5, 0x2cf6, 0x2cf7,
+ 0x2cf8, 0x2cf9, 0x2cfa, 0x2cfb, 0x2cfc, 0x2cfd, 0x2cfe, 0x2cff,
+ };
+
+static const unsigned short gNormalizeTable2d40[] = {
+ /* U+2d40 */
+ 0x2d40, 0x2d41, 0x2d42, 0x2d43, 0x2d44, 0x2d45, 0x2d46, 0x2d47,
+ 0x2d48, 0x2d49, 0x2d4a, 0x2d4b, 0x2d4c, 0x2d4d, 0x2d4e, 0x2d4f,
+ 0x2d50, 0x2d51, 0x2d52, 0x2d53, 0x2d54, 0x2d55, 0x2d56, 0x2d57,
+ 0x2d58, 0x2d59, 0x2d5a, 0x2d5b, 0x2d5c, 0x2d5d, 0x2d5e, 0x2d5f,
+ 0x2d60, 0x2d61, 0x2d62, 0x2d63, 0x2d64, 0x2d65, 0x2d66, 0x2d67,
+ 0x2d68, 0x2d69, 0x2d6a, 0x2d6b, 0x2d6c, 0x2d6d, 0x2d6e, 0x2d61,
+ 0x2d70, 0x2d71, 0x2d72, 0x2d73, 0x2d74, 0x2d75, 0x2d76, 0x2d77,
+ 0x2d78, 0x2d79, 0x2d7a, 0x2d7b, 0x2d7c, 0x2d7d, 0x2d7e, 0x2d7f,
+ };
+
+static const unsigned short gNormalizeTable2e80[] = {
+ /* U+2e80 */
+ 0x2e80, 0x2e81, 0x2e82, 0x2e83, 0x2e84, 0x2e85, 0x2e86, 0x2e87,
+ 0x2e88, 0x2e89, 0x2e8a, 0x2e8b, 0x2e8c, 0x2e8d, 0x2e8e, 0x2e8f,
+ 0x2e90, 0x2e91, 0x2e92, 0x2e93, 0x2e94, 0x2e95, 0x2e96, 0x2e97,
+ 0x2e98, 0x2e99, 0x2e9a, 0x2e9b, 0x2e9c, 0x2e9d, 0x2e9e, 0x6bcd,
+ 0x2ea0, 0x2ea1, 0x2ea2, 0x2ea3, 0x2ea4, 0x2ea5, 0x2ea6, 0x2ea7,
+ 0x2ea8, 0x2ea9, 0x2eaa, 0x2eab, 0x2eac, 0x2ead, 0x2eae, 0x2eaf,
+ 0x2eb0, 0x2eb1, 0x2eb2, 0x2eb3, 0x2eb4, 0x2eb5, 0x2eb6, 0x2eb7,
+ 0x2eb8, 0x2eb9, 0x2eba, 0x2ebb, 0x2ebc, 0x2ebd, 0x2ebe, 0x2ebf,
+ };
+
+static const unsigned short gNormalizeTable2ec0[] = {
+ /* U+2ec0 */
+ 0x2ec0, 0x2ec1, 0x2ec2, 0x2ec3, 0x2ec4, 0x2ec5, 0x2ec6, 0x2ec7,
+ 0x2ec8, 0x2ec9, 0x2eca, 0x2ecb, 0x2ecc, 0x2ecd, 0x2ece, 0x2ecf,
+ 0x2ed0, 0x2ed1, 0x2ed2, 0x2ed3, 0x2ed4, 0x2ed5, 0x2ed6, 0x2ed7,
+ 0x2ed8, 0x2ed9, 0x2eda, 0x2edb, 0x2edc, 0x2edd, 0x2ede, 0x2edf,
+ 0x2ee0, 0x2ee1, 0x2ee2, 0x2ee3, 0x2ee4, 0x2ee5, 0x2ee6, 0x2ee7,
+ 0x2ee8, 0x2ee9, 0x2eea, 0x2eeb, 0x2eec, 0x2eed, 0x2eee, 0x2eef,
+ 0x2ef0, 0x2ef1, 0x2ef2, 0x9f9f, 0x2ef4, 0x2ef5, 0x2ef6, 0x2ef7,
+ 0x2ef8, 0x2ef9, 0x2efa, 0x2efb, 0x2efc, 0x2efd, 0x2efe, 0x2eff,
+ };
+
+static const unsigned short gNormalizeTable2f00[] = {
+ /* U+2f00 */
+ 0x4e00, 0x4e28, 0x4e36, 0x4e3f, 0x4e59, 0x4e85, 0x4e8c, 0x4ea0,
+ 0x4eba, 0x513f, 0x5165, 0x516b, 0x5182, 0x5196, 0x51ab, 0x51e0,
+ 0x51f5, 0x5200, 0x529b, 0x52f9, 0x5315, 0x531a, 0x5338, 0x5341,
+ 0x535c, 0x5369, 0x5382, 0x53b6, 0x53c8, 0x53e3, 0x56d7, 0x571f,
+ 0x58eb, 0x5902, 0x590a, 0x5915, 0x5927, 0x5973, 0x5b50, 0x5b80,
+ 0x5bf8, 0x5c0f, 0x5c22, 0x5c38, 0x5c6e, 0x5c71, 0x5ddb, 0x5de5,
+ 0x5df1, 0x5dfe, 0x5e72, 0x5e7a, 0x5e7f, 0x5ef4, 0x5efe, 0x5f0b,
+ 0x5f13, 0x5f50, 0x5f61, 0x5f73, 0x5fc3, 0x6208, 0x6236, 0x624b,
+ };
+
+static const unsigned short gNormalizeTable2f40[] = {
+ /* U+2f40 */
+ 0x652f, 0x6534, 0x6587, 0x6597, 0x65a4, 0x65b9, 0x65e0, 0x65e5,
+ 0x66f0, 0x6708, 0x6728, 0x6b20, 0x6b62, 0x6b79, 0x6bb3, 0x6bcb,
+ 0x6bd4, 0x6bdb, 0x6c0f, 0x6c14, 0x6c34, 0x706b, 0x722a, 0x7236,
+ 0x723b, 0x723f, 0x7247, 0x7259, 0x725b, 0x72ac, 0x7384, 0x7389,
+ 0x74dc, 0x74e6, 0x7518, 0x751f, 0x7528, 0x7530, 0x758b, 0x7592,
+ 0x7676, 0x767d, 0x76ae, 0x76bf, 0x76ee, 0x77db, 0x77e2, 0x77f3,
+ 0x793a, 0x79b8, 0x79be, 0x7a74, 0x7acb, 0x7af9, 0x7c73, 0x7cf8,
+ 0x7f36, 0x7f51, 0x7f8a, 0x7fbd, 0x8001, 0x800c, 0x8012, 0x8033,
+ };
+
+static const unsigned short gNormalizeTable2f80[] = {
+ /* U+2f80 */
+ 0x807f, 0x8089, 0x81e3, 0x81ea, 0x81f3, 0x81fc, 0x820c, 0x821b,
+ 0x821f, 0x826e, 0x8272, 0x8278, 0x864d, 0x866b, 0x8840, 0x884c,
+ 0x8863, 0x897e, 0x898b, 0x89d2, 0x8a00, 0x8c37, 0x8c46, 0x8c55,
+ 0x8c78, 0x8c9d, 0x8d64, 0x8d70, 0x8db3, 0x8eab, 0x8eca, 0x8f9b,
+ 0x8fb0, 0x8fb5, 0x9091, 0x9149, 0x91c6, 0x91cc, 0x91d1, 0x9577,
+ 0x9580, 0x961c, 0x96b6, 0x96b9, 0x96e8, 0x9751, 0x975e, 0x9762,
+ 0x9769, 0x97cb, 0x97ed, 0x97f3, 0x9801, 0x98a8, 0x98db, 0x98df,
+ 0x9996, 0x9999, 0x99ac, 0x9aa8, 0x9ad8, 0x9adf, 0x9b25, 0x9b2f,
+ };
+
+static const unsigned short gNormalizeTable2fc0[] = {
+ /* U+2fc0 */
+ 0x9b32, 0x9b3c, 0x9b5a, 0x9ce5, 0x9e75, 0x9e7f, 0x9ea5, 0x9ebb,
+ 0x9ec3, 0x9ecd, 0x9ed1, 0x9ef9, 0x9efd, 0x9f0e, 0x9f13, 0x9f20,
+ 0x9f3b, 0x9f4a, 0x9f52, 0x9f8d, 0x9f9c, 0x9fa0, 0x2fd6, 0x2fd7,
+ 0x2fd8, 0x2fd9, 0x2fda, 0x2fdb, 0x2fdc, 0x2fdd, 0x2fde, 0x2fdf,
+ 0x2fe0, 0x2fe1, 0x2fe2, 0x2fe3, 0x2fe4, 0x2fe5, 0x2fe6, 0x2fe7,
+ 0x2fe8, 0x2fe9, 0x2fea, 0x2feb, 0x2fec, 0x2fed, 0x2fee, 0x2fef,
+ 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6, 0x2ff7,
+ 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb, 0x2ffc, 0x2ffd, 0x2ffe, 0x2fff,
+ };
+
+static const unsigned short gNormalizeTable3000[] = {
+ /* U+3000 */
+ 0x0020, 0x3001, 0x3002, 0x3003, 0x3004, 0x3005, 0x3006, 0x3007,
+ 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 0x300d, 0x300e, 0x300f,
+ 0x3010, 0x3011, 0x3012, 0x3013, 0x3014, 0x3015, 0x3016, 0x3017,
+ 0x3018, 0x3019, 0x301a, 0x301b, 0x301c, 0x301d, 0x301e, 0x301f,
+ 0x3020, 0x3021, 0x3022, 0x3023, 0x3024, 0x3025, 0x3026, 0x3027,
+ 0x3028, 0x3029, 0x302a, 0x302b, 0x302c, 0x302d, 0x302e, 0x302f,
+ 0x3030, 0x3031, 0x3032, 0x3033, 0x3034, 0x3035, 0x3012, 0x3037,
+ 0x5341, 0x5344, 0x5345, 0x303b, 0x303c, 0x303d, 0x303e, 0x303f,
+ };
+
+static const unsigned short gNormalizeTable3040[] = {
+ /* U+3040 */
+ 0x3040, 0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047,
+ 0x3048, 0x3049, 0x304a, 0x304b, 0x304b, 0x304d, 0x304d, 0x304f,
+ 0x304f, 0x3051, 0x3051, 0x3053, 0x3053, 0x3055, 0x3055, 0x3057,
+ 0x3057, 0x3059, 0x3059, 0x305b, 0x305b, 0x305d, 0x305d, 0x305f,
+ 0x305f, 0x3061, 0x3061, 0x3063, 0x3064, 0x3064, 0x3066, 0x3066,
+ 0x3068, 0x3068, 0x306a, 0x306b, 0x306c, 0x306d, 0x306e, 0x306f,
+ 0x306f, 0x306f, 0x3072, 0x3072, 0x3072, 0x3075, 0x3075, 0x3075,
+ 0x3078, 0x3078, 0x3078, 0x307b, 0x307b, 0x307b, 0x307e, 0x307f,
+ };
+
+static const unsigned short gNormalizeTable3080[] = {
+ /* U+3080 */
+ 0x3080, 0x3081, 0x3082, 0x3083, 0x3084, 0x3085, 0x3086, 0x3087,
+ 0x3088, 0x3089, 0x308a, 0x308b, 0x308c, 0x308d, 0x308e, 0x308f,
+ 0x3090, 0x3091, 0x3092, 0x3093, 0x3046, 0x3095, 0x3096, 0x3097,
+ 0x3098, 0x3099, 0x309a, 0x0020, 0x0020, 0x309d, 0x309d, 0x3088,
+ 0x30a0, 0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7,
+ 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ab, 0x30ad, 0x30ad, 0x30af,
+ 0x30af, 0x30b1, 0x30b1, 0x30b3, 0x30b3, 0x30b5, 0x30b5, 0x30b7,
+ 0x30b7, 0x30b9, 0x30b9, 0x30bb, 0x30bb, 0x30bd, 0x30bd, 0x30bf,
+ };
+
+static const unsigned short gNormalizeTable30c0[] = {
+ /* U+30c0 */
+ 0x30bf, 0x30c1, 0x30c1, 0x30c3, 0x30c4, 0x30c4, 0x30c6, 0x30c6,
+ 0x30c8, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf,
+ 0x30cf, 0x30cf, 0x30d2, 0x30d2, 0x30d2, 0x30d5, 0x30d5, 0x30d5,
+ 0x30d8, 0x30d8, 0x30d8, 0x30db, 0x30db, 0x30db, 0x30de, 0x30df,
+ 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7,
+ 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef,
+ 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30a6, 0x30f5, 0x30f6, 0x30ef,
+ 0x30f0, 0x30f1, 0x30f2, 0x30fb, 0x30fc, 0x30fd, 0x30fd, 0x30b3,
+ };
+
+static const unsigned short gNormalizeTable3100[] = {
+ /* U+3100 */
+ 0x3100, 0x3101, 0x3102, 0x3103, 0x3104, 0x3105, 0x3106, 0x3107,
+ 0x3108, 0x3109, 0x310a, 0x310b, 0x310c, 0x310d, 0x310e, 0x310f,
+ 0x3110, 0x3111, 0x3112, 0x3113, 0x3114, 0x3115, 0x3116, 0x3117,
+ 0x3118, 0x3119, 0x311a, 0x311b, 0x311c, 0x311d, 0x311e, 0x311f,
+ 0x3120, 0x3121, 0x3122, 0x3123, 0x3124, 0x3125, 0x3126, 0x3127,
+ 0x3128, 0x3129, 0x312a, 0x312b, 0x312c, 0x312d, 0x312e, 0x312f,
+ 0x3130, 0x1100, 0x1101, 0x11aa, 0x1102, 0x11ac, 0x11ad, 0x1103,
+ 0x1104, 0x1105, 0x11b0, 0x11b1, 0x11b2, 0x11b3, 0x11b4, 0x11b5,
+ };
+
+static const unsigned short gNormalizeTable3140[] = {
+ /* U+3140 */
+ 0x111a, 0x1106, 0x1107, 0x1108, 0x1121, 0x1109, 0x110a, 0x110b,
+ 0x110c, 0x110d, 0x110e, 0x110f, 0x1110, 0x1111, 0x1112, 0x1161,
+ 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167, 0x1168, 0x1169,
+ 0x116a, 0x116b, 0x116c, 0x116d, 0x116e, 0x116f, 0x1170, 0x1171,
+ 0x1172, 0x1173, 0x1174, 0x1175, 0x0020, 0x1114, 0x1115, 0x11c7,
+ 0x11c8, 0x11cc, 0x11ce, 0x11d3, 0x11d7, 0x11d9, 0x111c, 0x11dd,
+ 0x11df, 0x111d, 0x111e, 0x1120, 0x1122, 0x1123, 0x1127, 0x1129,
+ 0x112b, 0x112c, 0x112d, 0x112e, 0x112f, 0x1132, 0x1136, 0x1140,
+ };
+
+static const unsigned short gNormalizeTable3180[] = {
+ /* U+3180 */
+ 0x1147, 0x114c, 0x11f1, 0x11f2, 0x1157, 0x1158, 0x1159, 0x1184,
+ 0x1185, 0x1188, 0x1191, 0x1192, 0x1194, 0x119e, 0x11a1, 0x318f,
+ 0x3190, 0x3191, 0x4e00, 0x4e8c, 0x4e09, 0x56db, 0x4e0a, 0x4e2d,
+ 0x4e0b, 0x7532, 0x4e59, 0x4e19, 0x4e01, 0x5929, 0x5730, 0x4eba,
+ 0x31a0, 0x31a1, 0x31a2, 0x31a3, 0x31a4, 0x31a5, 0x31a6, 0x31a7,
+ 0x31a8, 0x31a9, 0x31aa, 0x31ab, 0x31ac, 0x31ad, 0x31ae, 0x31af,
+ 0x31b0, 0x31b1, 0x31b2, 0x31b3, 0x31b4, 0x31b5, 0x31b6, 0x31b7,
+ 0x31b8, 0x31b9, 0x31ba, 0x31bb, 0x31bc, 0x31bd, 0x31be, 0x31bf,
+ };
+
+static const unsigned short gNormalizeTable3200[] = {
+ /* U+3200 */
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x321f,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0028,
+ };
+
+static const unsigned short gNormalizeTable3240[] = {
+ /* U+3240 */
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x554f, 0x5e7c, 0x6587, 0x7b8f,
+ 0x3248, 0x3249, 0x324a, 0x324b, 0x324c, 0x324d, 0x324e, 0x324f,
+ 0x0070, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032,
+ 0x0032, 0x0032, 0x0033, 0x0033, 0x0033, 0x0033, 0x0033, 0x0033,
+ 0x1100, 0x1102, 0x1103, 0x1105, 0x1106, 0x1107, 0x1109, 0x110b,
+ 0x110c, 0x110e, 0x110f, 0x1110, 0x1111, 0x1112, 0x1100, 0x1102,
+ 0x1103, 0x1105, 0x1106, 0x1107, 0x1109, 0x110b, 0x110c, 0x110e,
+ 0x110f, 0x1110, 0x1111, 0x1112, 0x110e, 0x110c, 0x110b, 0x327f,
+ };
+
+static const unsigned short gNormalizeTable3280[] = {
+ /* U+3280 */
+ 0x4e00, 0x4e8c, 0x4e09, 0x56db, 0x4e94, 0x516d, 0x4e03, 0x516b,
+ 0x4e5d, 0x5341, 0x6708, 0x706b, 0x6c34, 0x6728, 0x91d1, 0x571f,
+ 0x65e5, 0x682a, 0x6709, 0x793e, 0x540d, 0x7279, 0x8ca1, 0x795d,
+ 0x52b4, 0x79d8, 0x7537, 0x5973, 0x9069, 0x512a, 0x5370, 0x6ce8,
+ 0x9805, 0x4f11, 0x5199, 0x6b63, 0x4e0a, 0x4e2d, 0x4e0b, 0x5de6,
+ 0x53f3, 0x533b, 0x5b97, 0x5b66, 0x76e3, 0x4f01, 0x8cc7, 0x5354,
+ 0x591c, 0x0033, 0x0033, 0x0033, 0x0033, 0x0034, 0x0034, 0x0034,
+ 0x0034, 0x0034, 0x0034, 0x0034, 0x0034, 0x0034, 0x0034, 0x0035,
+ };
+
+static const unsigned short gNormalizeTable32c0[] = {
+ /* U+32c0 */
+ 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038,
+ 0x0039, 0x0031, 0x0031, 0x0031, 0x0068, 0x0065, 0x0065, 0x006c,
+ 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab, 0x30ad, 0x30af,
+ 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd, 0x30bf,
+ 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 0x30cd,
+ 0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de, 0x30df,
+ 0x30e0, 0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9, 0x30ea,
+ 0x30eb, 0x30ec, 0x30ed, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x32ff,
+ };
+
+static const unsigned short gNormalizeTable3300[] = {
+ /* U+3300 */
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a4, 0x30a4, 0x30a6, 0x30a8,
+ 0x30a8, 0x30aa, 0x30aa, 0x30ab, 0x30ab, 0x30ab, 0x30ab, 0x30ab,
+ 0x30ad, 0x30ad, 0x30ad, 0x30ad, 0x30ad, 0x30ad, 0x30ad, 0x30ad,
+ 0x30af, 0x30af, 0x30af, 0x30af, 0x30b1, 0x30b3, 0x30b3, 0x30b5,
+ 0x30b5, 0x30b7, 0x30bb, 0x30bb, 0x30bf, 0x30c6, 0x30c8, 0x30c8,
+ 0x30ca, 0x30ce, 0x30cf, 0x30cf, 0x30cf, 0x30cf, 0x30d2, 0x30d2,
+ 0x30d2, 0x30d2, 0x30d5, 0x30d5, 0x30d5, 0x30d5, 0x30d8, 0x30d8,
+ 0x30d8, 0x30d8, 0x30d8, 0x30d8, 0x30d8, 0x30db, 0x30db, 0x30db,
+ };
+
+static const unsigned short gNormalizeTable3340[] = {
+ /* U+3340 */
+ 0x30db, 0x30db, 0x30db, 0x30de, 0x30de, 0x30de, 0x30de, 0x30de,
+ 0x30df, 0x30df, 0x30df, 0x30e1, 0x30e1, 0x30e1, 0x30e4, 0x30e4,
+ 0x30e6, 0x30ea, 0x30ea, 0x30eb, 0x30eb, 0x30ec, 0x30ec, 0x30ef,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031,
+ 0x0031, 0x0031, 0x0031, 0x0031, 0x0032, 0x0032, 0x0032, 0x0032,
+ 0x0032, 0x0068, 0x0064, 0x0061, 0x0062, 0x006f, 0x0070, 0x0064,
+ 0x0064, 0x0064, 0x0069, 0x5e73, 0x662d, 0x5927, 0x660e, 0x682a,
+ };
+
+static const unsigned short gNormalizeTable3380[] = {
+ /* U+3380 */
+ 0x0070, 0x006e, 0x03bc, 0x006d, 0x006b, 0x006b, 0x006d, 0x0067,
+ 0x0063, 0x006b, 0x0070, 0x006e, 0x03bc, 0x03bc, 0x006d, 0x006b,
+ 0x0068, 0x006b, 0x006d, 0x0067, 0x0074, 0x03bc, 0x006d, 0x0064,
+ 0x006b, 0x0066, 0x006e, 0x03bc, 0x006d, 0x0063, 0x006b, 0x006d,
+ 0x0063, 0x006d, 0x006b, 0x006d, 0x0063, 0x006d, 0x006b, 0x006d,
+ 0x006d, 0x0070, 0x006b, 0x006d, 0x0067, 0x0072, 0x0072, 0x0072,
+ 0x0070, 0x006e, 0x03bc, 0x006d, 0x0070, 0x006e, 0x03bc, 0x006d,
+ 0x006b, 0x006d, 0x0070, 0x006e, 0x03bc, 0x006d, 0x006b, 0x006d,
+ };
+
+static const unsigned short gNormalizeTable33c0[] = {
+ /* U+33c0 */
+ 0x006b, 0x006d, 0x0061, 0x0062, 0x0063, 0x0063, 0x0063, 0x0063,
+ 0x0064, 0x0067, 0x0068, 0x0068, 0x0069, 0x006b, 0x006b, 0x006b,
+ 0x006c, 0x006c, 0x006c, 0x006c, 0x006d, 0x006d, 0x006d, 0x0070,
+ 0x0070, 0x0070, 0x0070, 0x0073, 0x0073, 0x0077, 0x0076, 0x0061,
+ 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038,
+ 0x0039, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031, 0x0031,
+ 0x0031, 0x0031, 0x0031, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032,
+ 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0033, 0x0033, 0x0067,
+ };
+
+static const unsigned short gNormalizeTablea640[] = {
+ /* U+a640 */
+ 0xa641, 0xa641, 0xa643, 0xa643, 0xa645, 0xa645, 0xa647, 0xa647,
+ 0xa649, 0xa649, 0xa64b, 0xa64b, 0xa64d, 0xa64d, 0xa64f, 0xa64f,
+ 0xa651, 0xa651, 0xa653, 0xa653, 0xa655, 0xa655, 0xa657, 0xa657,
+ 0xa659, 0xa659, 0xa65b, 0xa65b, 0xa65d, 0xa65d, 0xa65f, 0xa65f,
+ 0xa660, 0xa661, 0xa663, 0xa663, 0xa665, 0xa665, 0xa667, 0xa667,
+ 0xa669, 0xa669, 0xa66b, 0xa66b, 0xa66d, 0xa66d, 0xa66e, 0xa66f,
+ 0xa670, 0xa671, 0xa672, 0xa673, 0xa674, 0xa675, 0xa676, 0xa677,
+ 0xa678, 0xa679, 0xa67a, 0xa67b, 0xa67c, 0xa67d, 0xa67e, 0xa67f,
+ };
+
+static const unsigned short gNormalizeTablea680[] = {
+ /* U+a680 */
+ 0xa681, 0xa681, 0xa683, 0xa683, 0xa685, 0xa685, 0xa687, 0xa687,
+ 0xa689, 0xa689, 0xa68b, 0xa68b, 0xa68d, 0xa68d, 0xa68f, 0xa68f,
+ 0xa691, 0xa691, 0xa693, 0xa693, 0xa695, 0xa695, 0xa697, 0xa697,
+ 0xa698, 0xa699, 0xa69a, 0xa69b, 0xa69c, 0xa69d, 0xa69e, 0xa69f,
+ 0xa6a0, 0xa6a1, 0xa6a2, 0xa6a3, 0xa6a4, 0xa6a5, 0xa6a6, 0xa6a7,
+ 0xa6a8, 0xa6a9, 0xa6aa, 0xa6ab, 0xa6ac, 0xa6ad, 0xa6ae, 0xa6af,
+ 0xa6b0, 0xa6b1, 0xa6b2, 0xa6b3, 0xa6b4, 0xa6b5, 0xa6b6, 0xa6b7,
+ 0xa6b8, 0xa6b9, 0xa6ba, 0xa6bb, 0xa6bc, 0xa6bd, 0xa6be, 0xa6bf,
+ };
+
+static const unsigned short gNormalizeTablea700[] = {
+ /* U+a700 */
+ 0xa700, 0xa701, 0xa702, 0xa703, 0xa704, 0xa705, 0xa706, 0xa707,
+ 0xa708, 0xa709, 0xa70a, 0xa70b, 0xa70c, 0xa70d, 0xa70e, 0xa70f,
+ 0xa710, 0xa711, 0xa712, 0xa713, 0xa714, 0xa715, 0xa716, 0xa717,
+ 0xa718, 0xa719, 0xa71a, 0xa71b, 0xa71c, 0xa71d, 0xa71e, 0xa71f,
+ 0xa720, 0xa721, 0xa723, 0xa723, 0xa725, 0xa725, 0xa727, 0xa727,
+ 0xa729, 0xa729, 0xa72b, 0xa72b, 0xa72d, 0xa72d, 0xa72f, 0xa72f,
+ 0xa730, 0xa731, 0xa733, 0xa733, 0xa735, 0xa735, 0xa737, 0xa737,
+ 0xa739, 0xa739, 0xa73b, 0xa73b, 0xa73d, 0xa73d, 0xa73f, 0xa73f,
+ };
+
+static const unsigned short gNormalizeTablea740[] = {
+ /* U+a740 */
+ 0xa741, 0xa741, 0xa743, 0xa743, 0xa745, 0xa745, 0xa747, 0xa747,
+ 0xa749, 0xa749, 0xa74b, 0xa74b, 0xa74d, 0xa74d, 0xa74f, 0xa74f,
+ 0xa751, 0xa751, 0xa753, 0xa753, 0xa755, 0xa755, 0xa757, 0xa757,
+ 0xa759, 0xa759, 0xa75b, 0xa75b, 0xa75d, 0xa75d, 0xa75f, 0xa75f,
+ 0xa761, 0xa761, 0xa763, 0xa763, 0xa765, 0xa765, 0xa767, 0xa767,
+ 0xa769, 0xa769, 0xa76b, 0xa76b, 0xa76d, 0xa76d, 0xa76f, 0xa76f,
+ 0xa76f, 0xa771, 0xa772, 0xa773, 0xa774, 0xa775, 0xa776, 0xa777,
+ 0xa778, 0xa77a, 0xa77a, 0xa77c, 0xa77c, 0x1d79, 0xa77f, 0xa77f,
+ };
+
+static const unsigned short gNormalizeTablea780[] = {
+ /* U+a780 */
+ 0xa781, 0xa781, 0xa783, 0xa783, 0xa785, 0xa785, 0xa787, 0xa787,
+ 0xa788, 0xa789, 0xa78a, 0xa78c, 0xa78c, 0xa78d, 0xa78e, 0xa78f,
+ 0xa790, 0xa791, 0xa792, 0xa793, 0xa794, 0xa795, 0xa796, 0xa797,
+ 0xa798, 0xa799, 0xa79a, 0xa79b, 0xa79c, 0xa79d, 0xa79e, 0xa79f,
+ 0xa7a0, 0xa7a1, 0xa7a2, 0xa7a3, 0xa7a4, 0xa7a5, 0xa7a6, 0xa7a7,
+ 0xa7a8, 0xa7a9, 0xa7aa, 0xa7ab, 0xa7ac, 0xa7ad, 0xa7ae, 0xa7af,
+ 0xa7b0, 0xa7b1, 0xa7b2, 0xa7b3, 0xa7b4, 0xa7b5, 0xa7b6, 0xa7b7,
+ 0xa7b8, 0xa7b9, 0xa7ba, 0xa7bb, 0xa7bc, 0xa7bd, 0xa7be, 0xa7bf,
+ };
+
+static const unsigned short gNormalizeTablef900[] = {
+ /* U+f900 */
+ 0x8c48, 0x66f4, 0x8eca, 0x8cc8, 0x6ed1, 0x4e32, 0x53e5, 0x9f9c,
+ 0x9f9c, 0x5951, 0x91d1, 0x5587, 0x5948, 0x61f6, 0x7669, 0x7f85,
+ 0x863f, 0x87ba, 0x88f8, 0x908f, 0x6a02, 0x6d1b, 0x70d9, 0x73de,
+ 0x843d, 0x916a, 0x99f1, 0x4e82, 0x5375, 0x6b04, 0x721b, 0x862d,
+ 0x9e1e, 0x5d50, 0x6feb, 0x85cd, 0x8964, 0x62c9, 0x81d8, 0x881f,
+ 0x5eca, 0x6717, 0x6d6a, 0x72fc, 0x90ce, 0x4f86, 0x51b7, 0x52de,
+ 0x64c4, 0x6ad3, 0x7210, 0x76e7, 0x8001, 0x8606, 0x865c, 0x8def,
+ 0x9732, 0x9b6f, 0x9dfa, 0x788c, 0x797f, 0x7da0, 0x83c9, 0x9304,
+ };
+
+static const unsigned short gNormalizeTablef940[] = {
+ /* U+f940 */
+ 0x9e7f, 0x8ad6, 0x58df, 0x5f04, 0x7c60, 0x807e, 0x7262, 0x78ca,
+ 0x8cc2, 0x96f7, 0x58d8, 0x5c62, 0x6a13, 0x6dda, 0x6f0f, 0x7d2f,
+ 0x7e37, 0x964b, 0x52d2, 0x808b, 0x51dc, 0x51cc, 0x7a1c, 0x7dbe,
+ 0x83f1, 0x9675, 0x8b80, 0x62cf, 0x6a02, 0x8afe, 0x4e39, 0x5be7,
+ 0x6012, 0x7387, 0x7570, 0x5317, 0x78fb, 0x4fbf, 0x5fa9, 0x4e0d,
+ 0x6ccc, 0x6578, 0x7d22, 0x53c3, 0x585e, 0x7701, 0x8449, 0x8aaa,
+ 0x6bba, 0x8fb0, 0x6c88, 0x62fe, 0x82e5, 0x63a0, 0x7565, 0x4eae,
+ 0x5169, 0x51c9, 0x6881, 0x7ce7, 0x826f, 0x8ad2, 0x91cf, 0x52f5,
+ };
+
+static const unsigned short gNormalizeTablef980[] = {
+ /* U+f980 */
+ 0x5442, 0x5973, 0x5eec, 0x65c5, 0x6ffe, 0x792a, 0x95ad, 0x9a6a,
+ 0x9e97, 0x9ece, 0x529b, 0x66c6, 0x6b77, 0x8f62, 0x5e74, 0x6190,
+ 0x6200, 0x649a, 0x6f23, 0x7149, 0x7489, 0x79ca, 0x7df4, 0x806f,
+ 0x8f26, 0x84ee, 0x9023, 0x934a, 0x5217, 0x52a3, 0x54bd, 0x70c8,
+ 0x88c2, 0x8aaa, 0x5ec9, 0x5ff5, 0x637b, 0x6bae, 0x7c3e, 0x7375,
+ 0x4ee4, 0x56f9, 0x5be7, 0x5dba, 0x601c, 0x73b2, 0x7469, 0x7f9a,
+ 0x8046, 0x9234, 0x96f6, 0x9748, 0x9818, 0x4f8b, 0x79ae, 0x91b4,
+ 0x96b8, 0x60e1, 0x4e86, 0x50da, 0x5bee, 0x5c3f, 0x6599, 0x6a02,
+ };
+
+static const unsigned short gNormalizeTablef9c0[] = {
+ /* U+f9c0 */
+ 0x71ce, 0x7642, 0x84fc, 0x907c, 0x9f8d, 0x6688, 0x962e, 0x5289,
+ 0x677b, 0x67f3, 0x6d41, 0x6e9c, 0x7409, 0x7559, 0x786b, 0x7d10,
+ 0x985e, 0x516d, 0x622e, 0x9678, 0x502b, 0x5d19, 0x6dea, 0x8f2a,
+ 0x5f8b, 0x6144, 0x6817, 0x7387, 0x9686, 0x5229, 0x540f, 0x5c65,
+ 0x6613, 0x674e, 0x68a8, 0x6ce5, 0x7406, 0x75e2, 0x7f79, 0x88cf,
+ 0x88e1, 0x91cc, 0x96e2, 0x533f, 0x6eba, 0x541d, 0x71d0, 0x7498,
+ 0x85fa, 0x96a3, 0x9c57, 0x9e9f, 0x6797, 0x6dcb, 0x81e8, 0x7acb,
+ 0x7b20, 0x7c92, 0x72c0, 0x7099, 0x8b58, 0x4ec0, 0x8336, 0x523a,
+ };
+
+static const unsigned short gNormalizeTablefa00[] = {
+ /* U+fa00 */
+ 0x5207, 0x5ea6, 0x62d3, 0x7cd6, 0x5b85, 0x6d1e, 0x66b4, 0x8f3b,
+ 0x884c, 0x964d, 0x898b, 0x5ed3, 0x5140, 0x55c0, 0xfa0e, 0xfa0f,
+ 0x585a, 0xfa11, 0x6674, 0xfa13, 0xfa14, 0x51de, 0x732a, 0x76ca,
+ 0x793c, 0x795e, 0x7965, 0x798f, 0x9756, 0x7cbe, 0x7fbd, 0xfa1f,
+ 0x8612, 0xfa21, 0x8af8, 0xfa23, 0xfa24, 0x9038, 0x90fd, 0xfa27,
+ 0xfa28, 0xfa29, 0x98ef, 0x98fc, 0x9928, 0x9db4, 0xfa2e, 0xfa2f,
+ 0x4fae, 0x50e7, 0x514d, 0x52c9, 0x52e4, 0x5351, 0x559d, 0x5606,
+ 0x5668, 0x5840, 0x58a8, 0x5c64, 0x5c6e, 0x6094, 0x6168, 0x618e,
+ };
+
+static const unsigned short gNormalizeTablefa40[] = {
+ /* U+fa40 */
+ 0x61f2, 0x654f, 0x65e2, 0x6691, 0x6885, 0x6d77, 0x6e1a, 0x6f22,
+ 0x716e, 0x722b, 0x7422, 0x7891, 0x793e, 0x7949, 0x7948, 0x7950,
+ 0x7956, 0x795d, 0x798d, 0x798e, 0x7a40, 0x7a81, 0x7bc0, 0x7df4,
+ 0x7e09, 0x7e41, 0x7f72, 0x8005, 0x81ed, 0x8279, 0x8279, 0x8457,
+ 0x8910, 0x8996, 0x8b01, 0x8b39, 0x8cd3, 0x8d08, 0x8fb6, 0x9038,
+ 0x96e3, 0x97ff, 0x983b, 0x6075, 0xfa6c, 0x8218, 0xfa6e, 0xfa6f,
+ 0x4e26, 0x51b5, 0x5168, 0x4f80, 0x5145, 0x5180, 0x52c7, 0x52fa,
+ 0x559d, 0x5555, 0x5599, 0x55e2, 0x585a, 0x58b3, 0x5944, 0x5954,
+ };
+
+static const unsigned short gNormalizeTablefa80[] = {
+ /* U+fa80 */
+ 0x5a62, 0x5b28, 0x5ed2, 0x5ed9, 0x5f69, 0x5fad, 0x60d8, 0x614e,
+ 0x6108, 0x618e, 0x6160, 0x61f2, 0x6234, 0x63c4, 0x641c, 0x6452,
+ 0x6556, 0x6674, 0x6717, 0x671b, 0x6756, 0x6b79, 0x6bba, 0x6d41,
+ 0x6edb, 0x6ecb, 0x6f22, 0x701e, 0x716e, 0x77a7, 0x7235, 0x72af,
+ 0x732a, 0x7471, 0x7506, 0x753b, 0x761d, 0x761f, 0x76ca, 0x76db,
+ 0x76f4, 0x774a, 0x7740, 0x78cc, 0x7ab1, 0x7bc0, 0x7c7b, 0x7d5b,
+ 0x7df4, 0x7f3e, 0x8005, 0x8352, 0x83ef, 0x8779, 0x8941, 0x8986,
+ 0x8996, 0x8abf, 0x8af8, 0x8acb, 0x8b01, 0x8afe, 0x8aed, 0x8b39,
+ };
+
+static const unsigned short gNormalizeTablefac0[] = {
+ /* U+fac0 */
+ 0x8b8a, 0x8d08, 0x8f38, 0x9072, 0x9199, 0x9276, 0x967c, 0x96e3,
+ 0x9756, 0x97db, 0x97ff, 0x980b, 0x983b, 0x9b12, 0x9f9c, 0xfacf,
+ 0xfad0, 0xfad1, 0x3b9d, 0x4018, 0x4039, 0xfad5, 0xfad6, 0xfad7,
+ 0x9f43, 0x9f8e, 0xfada, 0xfadb, 0xfadc, 0xfadd, 0xfade, 0xfadf,
+ 0xfae0, 0xfae1, 0xfae2, 0xfae3, 0xfae4, 0xfae5, 0xfae6, 0xfae7,
+ 0xfae8, 0xfae9, 0xfaea, 0xfaeb, 0xfaec, 0xfaed, 0xfaee, 0xfaef,
+ 0xfaf0, 0xfaf1, 0xfaf2, 0xfaf3, 0xfaf4, 0xfaf5, 0xfaf6, 0xfaf7,
+ 0xfaf8, 0xfaf9, 0xfafa, 0xfafb, 0xfafc, 0xfafd, 0xfafe, 0xfaff,
+ };
+
+static const unsigned short gNormalizeTablefb00[] = {
+ /* U+fb00 */
+ 0x0066, 0x0066, 0x0066, 0x0066, 0x0066, 0x0073, 0x0073, 0xfb07,
+ 0xfb08, 0xfb09, 0xfb0a, 0xfb0b, 0xfb0c, 0xfb0d, 0xfb0e, 0xfb0f,
+ 0xfb10, 0xfb11, 0xfb12, 0x0574, 0x0574, 0x0574, 0x057e, 0x0574,
+ 0xfb18, 0xfb19, 0xfb1a, 0xfb1b, 0xfb1c, 0x05d9, 0xfb1e, 0x05f2,
+ 0x05e2, 0x05d0, 0x05d3, 0x05d4, 0x05db, 0x05dc, 0x05dd, 0x05e8,
+ 0x05ea, 0x002b, 0x05e9, 0x05e9, 0x05e9, 0x05e9, 0x05d0, 0x05d0,
+ 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0xfb37,
+ 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0xfb3d, 0x05de, 0xfb3f,
+ };
+
+static const unsigned short gNormalizeTablefb40[] = {
+ /* U+fb40 */
+ 0x05e0, 0x05e1, 0xfb42, 0x05e3, 0x05e4, 0xfb45, 0x05e6, 0x05e7,
+ 0x05e8, 0x05e9, 0x05ea, 0x05d5, 0x05d1, 0x05db, 0x05e4, 0x05d0,
+ 0x0671, 0x0671, 0x067b, 0x067b, 0x067b, 0x067b, 0x067e, 0x067e,
+ 0x067e, 0x067e, 0x0680, 0x0680, 0x0680, 0x0680, 0x067a, 0x067a,
+ 0x067a, 0x067a, 0x067f, 0x067f, 0x067f, 0x067f, 0x0679, 0x0679,
+ 0x0679, 0x0679, 0x06a4, 0x06a4, 0x06a4, 0x06a4, 0x06a6, 0x06a6,
+ 0x06a6, 0x06a6, 0x0684, 0x0684, 0x0684, 0x0684, 0x0683, 0x0683,
+ 0x0683, 0x0683, 0x0686, 0x0686, 0x0686, 0x0686, 0x0687, 0x0687,
+ };
+
+static const unsigned short gNormalizeTablefb80[] = {
+ /* U+fb80 */
+ 0x0687, 0x0687, 0x068d, 0x068d, 0x068c, 0x068c, 0x068e, 0x068e,
+ 0x0688, 0x0688, 0x0698, 0x0698, 0x0691, 0x0691, 0x06a9, 0x06a9,
+ 0x06a9, 0x06a9, 0x06af, 0x06af, 0x06af, 0x06af, 0x06b3, 0x06b3,
+ 0x06b3, 0x06b3, 0x06b1, 0x06b1, 0x06b1, 0x06b1, 0x06ba, 0x06ba,
+ 0x06bb, 0x06bb, 0x06bb, 0x06bb, 0x06d5, 0x06d5, 0x06c1, 0x06c1,
+ 0x06c1, 0x06c1, 0x06be, 0x06be, 0x06be, 0x06be, 0x06d2, 0x06d2,
+ 0x06d2, 0x06d2, 0xfbb2, 0xfbb3, 0xfbb4, 0xfbb5, 0xfbb6, 0xfbb7,
+ 0xfbb8, 0xfbb9, 0xfbba, 0xfbbb, 0xfbbc, 0xfbbd, 0xfbbe, 0xfbbf,
+ };
+
+static const unsigned short gNormalizeTablefbc0[] = {
+ /* U+fbc0 */
+ 0xfbc0, 0xfbc1, 0xfbc2, 0xfbc3, 0xfbc4, 0xfbc5, 0xfbc6, 0xfbc7,
+ 0xfbc8, 0xfbc9, 0xfbca, 0xfbcb, 0xfbcc, 0xfbcd, 0xfbce, 0xfbcf,
+ 0xfbd0, 0xfbd1, 0xfbd2, 0x06ad, 0x06ad, 0x06ad, 0x06ad, 0x06c7,
+ 0x06c7, 0x06c6, 0x06c6, 0x06c8, 0x06c8, 0x06c7, 0x06cb, 0x06cb,
+ 0x06c5, 0x06c5, 0x06c9, 0x06c9, 0x06d0, 0x06d0, 0x06d0, 0x06d0,
+ 0x0649, 0x0649, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a,
+ 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a,
+ 0x064a, 0x064a, 0x064a, 0x064a, 0x06cc, 0x06cc, 0x06cc, 0x06cc,
+ };
+
+static const unsigned short gNormalizeTablefc00[] = {
+ /* U+fc00 */
+ 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x0628, 0x0628, 0x0628,
+ 0x0628, 0x0628, 0x0628, 0x062a, 0x062a, 0x062a, 0x062a, 0x062a,
+ 0x062a, 0x062b, 0x062b, 0x062b, 0x062b, 0x062c, 0x062c, 0x062d,
+ 0x062d, 0x062e, 0x062e, 0x062e, 0x0633, 0x0633, 0x0633, 0x0633,
+ 0x0635, 0x0635, 0x0636, 0x0636, 0x0636, 0x0636, 0x0637, 0x0637,
+ 0x0638, 0x0639, 0x0639, 0x063a, 0x063a, 0x0641, 0x0641, 0x0641,
+ 0x0641, 0x0641, 0x0641, 0x0642, 0x0642, 0x0642, 0x0642, 0x0643,
+ 0x0643, 0x0643, 0x0643, 0x0643, 0x0643, 0x0643, 0x0643, 0x0644,
+ };
+
+static const unsigned short gNormalizeTablefc40[] = {
+ /* U+fc40 */
+ 0x0644, 0x0644, 0x0644, 0x0644, 0x0644, 0x0645, 0x0645, 0x0645,
+ 0x0645, 0x0645, 0x0645, 0x0646, 0x0646, 0x0646, 0x0646, 0x0646,
+ 0x0646, 0x0647, 0x0647, 0x0647, 0x0647, 0x064a, 0x064a, 0x064a,
+ 0x064a, 0x064a, 0x064a, 0x0630, 0x0631, 0x0649, 0x0020, 0x0020,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x064a, 0x064a, 0x064a, 0x064a,
+ 0x064a, 0x064a, 0x0628, 0x0628, 0x0628, 0x0628, 0x0628, 0x0628,
+ 0x062a, 0x062a, 0x062a, 0x062a, 0x062a, 0x062a, 0x062b, 0x062b,
+ 0x062b, 0x062b, 0x062b, 0x062b, 0x0641, 0x0641, 0x0642, 0x0642,
+ };
+
+static const unsigned short gNormalizeTablefc80[] = {
+ /* U+fc80 */
+ 0x0643, 0x0643, 0x0643, 0x0643, 0x0643, 0x0644, 0x0644, 0x0644,
+ 0x0645, 0x0645, 0x0646, 0x0646, 0x0646, 0x0646, 0x0646, 0x0646,
+ 0x0649, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a,
+ 0x064a, 0x064a, 0x064a, 0x064a, 0x0628, 0x0628, 0x0628, 0x0628,
+ 0x0628, 0x062a, 0x062a, 0x062a, 0x062a, 0x062a, 0x062b, 0x062c,
+ 0x062c, 0x062d, 0x062d, 0x062e, 0x062e, 0x0633, 0x0633, 0x0633,
+ 0x0633, 0x0635, 0x0635, 0x0635, 0x0636, 0x0636, 0x0636, 0x0636,
+ 0x0637, 0x0638, 0x0639, 0x0639, 0x063a, 0x063a, 0x0641, 0x0641,
+ };
+
+static const unsigned short gNormalizeTablefcc0[] = {
+ /* U+fcc0 */
+ 0x0641, 0x0641, 0x0642, 0x0642, 0x0643, 0x0643, 0x0643, 0x0643,
+ 0x0643, 0x0644, 0x0644, 0x0644, 0x0644, 0x0644, 0x0645, 0x0645,
+ 0x0645, 0x0645, 0x0646, 0x0646, 0x0646, 0x0646, 0x0646, 0x0647,
+ 0x0647, 0x0647, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a, 0x064a,
+ 0x064a, 0x0628, 0x0628, 0x062a, 0x062a, 0x062b, 0x062b, 0x0633,
+ 0x0633, 0x0634, 0x0634, 0x0643, 0x0643, 0x0644, 0x0646, 0x0646,
+ 0x064a, 0x064a, 0x0640, 0x0640, 0x0640, 0x0637, 0x0637, 0x0639,
+ 0x0639, 0x063a, 0x063a, 0x0633, 0x0633, 0x0634, 0x0634, 0x062d,
+ };
+
+static const unsigned short gNormalizeTablefd00[] = {
+ /* U+fd00 */
+ 0x062d, 0x062c, 0x062c, 0x062e, 0x062e, 0x0635, 0x0635, 0x0636,
+ 0x0636, 0x0634, 0x0634, 0x0634, 0x0634, 0x0634, 0x0633, 0x0635,
+ 0x0636, 0x0637, 0x0637, 0x0639, 0x0639, 0x063a, 0x063a, 0x0633,
+ 0x0633, 0x0634, 0x0634, 0x062d, 0x062d, 0x062c, 0x062c, 0x062e,
+ 0x062e, 0x0635, 0x0635, 0x0636, 0x0636, 0x0634, 0x0634, 0x0634,
+ 0x0634, 0x0634, 0x0633, 0x0635, 0x0636, 0x0634, 0x0634, 0x0634,
+ 0x0634, 0x0633, 0x0634, 0x0637, 0x0633, 0x0633, 0x0633, 0x0634,
+ 0x0634, 0x0634, 0x0637, 0x0638, 0x0627, 0x0627, 0xfd3e, 0xfd3f,
+ };
+
+static const unsigned short gNormalizeTablefd40[] = {
+ /* U+fd40 */
+ 0xfd40, 0xfd41, 0xfd42, 0xfd43, 0xfd44, 0xfd45, 0xfd46, 0xfd47,
+ 0xfd48, 0xfd49, 0xfd4a, 0xfd4b, 0xfd4c, 0xfd4d, 0xfd4e, 0xfd4f,
+ 0x062a, 0x062a, 0x062a, 0x062a, 0x062a, 0x062a, 0x062a, 0x062a,
+ 0x062c, 0x062c, 0x062d, 0x062d, 0x0633, 0x0633, 0x0633, 0x0633,
+ 0x0633, 0x0633, 0x0633, 0x0633, 0x0635, 0x0635, 0x0635, 0x0634,
+ 0x0634, 0x0634, 0x0634, 0x0634, 0x0634, 0x0634, 0x0636, 0x0636,
+ 0x0636, 0x0637, 0x0637, 0x0637, 0x0637, 0x0639, 0x0639, 0x0639,
+ 0x0639, 0x063a, 0x063a, 0x063a, 0x0641, 0x0641, 0x0642, 0x0642,
+ };
+
+static const unsigned short gNormalizeTablefd80[] = {
+ /* U+fd80 */
+ 0x0644, 0x0644, 0x0644, 0x0644, 0x0644, 0x0644, 0x0644, 0x0644,
+ 0x0644, 0x0645, 0x0645, 0x0645, 0x0645, 0x0645, 0x0645, 0x0645,
+ 0xfd90, 0xfd91, 0x0645, 0x0647, 0x0647, 0x0646, 0x0646, 0x0646,
+ 0x0646, 0x0646, 0x0646, 0x0646, 0x064a, 0x064a, 0x0628, 0x062a,
+ 0x062a, 0x062a, 0x062a, 0x062a, 0x062a, 0x062c, 0x062c, 0x062c,
+ 0x0633, 0x0635, 0x0634, 0x0636, 0x0644, 0x0644, 0x064a, 0x064a,
+ 0x064a, 0x0645, 0x0642, 0x0646, 0x0642, 0x0644, 0x0639, 0x0643,
+ 0x0646, 0x0645, 0x0644, 0x0643, 0x0644, 0x0646, 0x062c, 0x062d,
+ };
+
+static const unsigned short gNormalizeTablefdc0[] = {
+ /* U+fdc0 */
+ 0x0645, 0x0641, 0x0628, 0x0643, 0x0639, 0x0635, 0x0633, 0x0646,
+ 0xfdc8, 0xfdc9, 0xfdca, 0xfdcb, 0xfdcc, 0xfdcd, 0xfdce, 0xfdcf,
+ 0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7,
+ 0xfdd8, 0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf,
+ 0xfde0, 0xfde1, 0xfde2, 0xfde3, 0xfde4, 0xfde5, 0xfde6, 0xfde7,
+ 0xfde8, 0xfde9, 0xfdea, 0xfdeb, 0xfdec, 0xfded, 0xfdee, 0xfdef,
+ 0x0635, 0x0642, 0x0627, 0x0627, 0x0645, 0x0635, 0x0631, 0x0639,
+ 0x0648, 0x0635, 0x0635, 0x062c, 0x0631, 0xfdfd, 0xfdfe, 0xfdff,
+ };
+
+static const unsigned short gNormalizeTablefe00[] = {
+ /* U+fe00 */
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x002c, 0x3001, 0x3002, 0x003a, 0x003b, 0x0021, 0x003f, 0x3016,
+ 0x3017, 0x002e, 0xfe1a, 0xfe1b, 0xfe1c, 0xfe1d, 0xfe1e, 0xfe1f,
+ 0xfe20, 0xfe21, 0xfe22, 0xfe23, 0xfe24, 0xfe25, 0xfe26, 0xfe27,
+ 0xfe28, 0xfe29, 0xfe2a, 0xfe2b, 0xfe2c, 0xfe2d, 0xfe2e, 0xfe2f,
+ 0x002e, 0x2014, 0x2013, 0x005f, 0x005f, 0x0028, 0x0029, 0x007b,
+ 0x007d, 0x3014, 0x3015, 0x3010, 0x3011, 0x300a, 0x300b, 0x3008,
+ };
+
+static const unsigned short gNormalizeTablefe40[] = {
+ /* U+fe40 */
+ 0x3009, 0x300c, 0x300d, 0x300e, 0x300f, 0xfe45, 0xfe46, 0x005b,
+ 0x005d, 0x0020, 0x0020, 0x0020, 0x0020, 0x005f, 0x005f, 0x005f,
+ 0x002c, 0x3001, 0x002e, 0xfe53, 0x003b, 0x003a, 0x003f, 0x0021,
+ 0x2014, 0x0028, 0x0029, 0x007b, 0x007d, 0x3014, 0x3015, 0x0023,
+ 0x0026, 0x002a, 0x002b, 0x002d, 0x003c, 0x003e, 0x003d, 0xfe67,
+ 0x005c, 0x0024, 0x0025, 0x0040, 0xfe6c, 0xfe6d, 0xfe6e, 0xfe6f,
+ 0x0020, 0x0640, 0x0020, 0xfe73, 0x0020, 0xfe75, 0x0020, 0x0640,
+ 0x0020, 0x0640, 0x0020, 0x0640, 0x0020, 0x0640, 0x0020, 0x0640,
+ };
+
+static const unsigned short gNormalizeTablefe80[] = {
+ /* U+fe80 */
+ 0x0621, 0x0627, 0x0627, 0x0627, 0x0627, 0x0648, 0x0648, 0x0627,
+ 0x0627, 0x064a, 0x064a, 0x064a, 0x064a, 0x0627, 0x0627, 0x0628,
+ 0x0628, 0x0628, 0x0628, 0x0629, 0x0629, 0x062a, 0x062a, 0x062a,
+ 0x062a, 0x062b, 0x062b, 0x062b, 0x062b, 0x062c, 0x062c, 0x062c,
+ 0x062c, 0x062d, 0x062d, 0x062d, 0x062d, 0x062e, 0x062e, 0x062e,
+ 0x062e, 0x062f, 0x062f, 0x0630, 0x0630, 0x0631, 0x0631, 0x0632,
+ 0x0632, 0x0633, 0x0633, 0x0633, 0x0633, 0x0634, 0x0634, 0x0634,
+ 0x0634, 0x0635, 0x0635, 0x0635, 0x0635, 0x0636, 0x0636, 0x0636,
+ };
+
+static const unsigned short gNormalizeTablefec0[] = {
+ /* U+fec0 */
+ 0x0636, 0x0637, 0x0637, 0x0637, 0x0637, 0x0638, 0x0638, 0x0638,
+ 0x0638, 0x0639, 0x0639, 0x0639, 0x0639, 0x063a, 0x063a, 0x063a,
+ 0x063a, 0x0641, 0x0641, 0x0641, 0x0641, 0x0642, 0x0642, 0x0642,
+ 0x0642, 0x0643, 0x0643, 0x0643, 0x0643, 0x0644, 0x0644, 0x0644,
+ 0x0644, 0x0645, 0x0645, 0x0645, 0x0645, 0x0646, 0x0646, 0x0646,
+ 0x0646, 0x0647, 0x0647, 0x0647, 0x0647, 0x0648, 0x0648, 0x0649,
+ 0x0649, 0x064a, 0x064a, 0x064a, 0x064a, 0x0644, 0x0644, 0x0644,
+ 0x0644, 0x0644, 0x0644, 0x0644, 0x0644, 0xfefd, 0xfefe, 0x0020,
+ };
+
+static const unsigned short gNormalizeTableff00[] = {
+ /* U+ff00 */
+ 0xff00, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+ 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+ };
+
+static const unsigned short gNormalizeTableff40[] = {
+ /* U+ff40 */
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x2985,
+ 0x2986, 0x3002, 0x300c, 0x300d, 0x3001, 0x30fb, 0x30f2, 0x30a1,
+ 0x30a3, 0x30a5, 0x30a7, 0x30a9, 0x30e3, 0x30e5, 0x30e7, 0x30c3,
+ 0x30fc, 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab, 0x30ad,
+ 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd,
+ };
+
+static const unsigned short gNormalizeTableff80[] = {
+ /* U+ff80 */
+ 0x30bf, 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc,
+ 0x30cd, 0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de,
+ 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9,
+ 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ef, 0x30f3, 0x3099, 0x309a,
+ 0x0020, 0x1100, 0x1101, 0x11aa, 0x1102, 0x11ac, 0x11ad, 0x1103,
+ 0x1104, 0x1105, 0x11b0, 0x11b1, 0x11b2, 0x11b3, 0x11b4, 0x11b5,
+ 0x111a, 0x1106, 0x1107, 0x1108, 0x1121, 0x1109, 0x110a, 0x110b,
+ 0x110c, 0x110d, 0x110e, 0x110f, 0x1110, 0x1111, 0x1112, 0xffbf,
+ };
+
+static const unsigned short gNormalizeTableffc0[] = {
+ /* U+ffc0 */
+ 0xffc0, 0xffc1, 0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166,
+ 0xffc8, 0xffc9, 0x1167, 0x1168, 0x1169, 0x116a, 0x116b, 0x116c,
+ 0xffd0, 0xffd1, 0x116d, 0x116e, 0x116f, 0x1170, 0x1171, 0x1172,
+ 0xffd8, 0xffd9, 0x1173, 0x1174, 0x1175, 0xffdd, 0xffde, 0xffdf,
+ 0x00a2, 0x00a3, 0x00ac, 0x0020, 0x00a6, 0x00a5, 0x20a9, 0xffe7,
+ 0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25a0, 0x25cb, 0xffef,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
+ 0x0020, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff,
+ };
+
+static const unsigned short* gNormalizeTable[] = {
+ 0, gNormalizeTable0040, gNormalizeTable0080, gNormalizeTable00c0,
+ gNormalizeTable0100, gNormalizeTable0140, gNormalizeTable0180, gNormalizeTable01c0,
+ gNormalizeTable0200, gNormalizeTable0240, gNormalizeTable0280, gNormalizeTable02c0,
+ 0, gNormalizeTable0340, gNormalizeTable0380, gNormalizeTable03c0,
+ gNormalizeTable0400, gNormalizeTable0440, gNormalizeTable0480, gNormalizeTable04c0,
+ gNormalizeTable0500, gNormalizeTable0540, gNormalizeTable0580, 0,
+ gNormalizeTable0600, gNormalizeTable0640, 0, gNormalizeTable06c0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ gNormalizeTable0900, gNormalizeTable0940, 0, gNormalizeTable09c0,
+ gNormalizeTable0a00, gNormalizeTable0a40, 0, 0,
+ 0, gNormalizeTable0b40, gNormalizeTable0b80, gNormalizeTable0bc0,
+ 0, gNormalizeTable0c40, 0, gNormalizeTable0cc0,
+ 0, gNormalizeTable0d40, 0, gNormalizeTable0dc0,
+ gNormalizeTable0e00, 0, gNormalizeTable0e80, gNormalizeTable0ec0,
+ gNormalizeTable0f00, gNormalizeTable0f40, gNormalizeTable0f80, 0,
+ gNormalizeTable1000, 0, gNormalizeTable1080, gNormalizeTable10c0,
+ 0, gNormalizeTable1140, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, gNormalizeTable1780, 0,
+ gNormalizeTable1800, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ gNormalizeTable1b00, gNormalizeTable1b40, 0, 0,
+ 0, 0, 0, 0,
+ gNormalizeTable1d00, gNormalizeTable1d40, gNormalizeTable1d80, 0,
+ gNormalizeTable1e00, gNormalizeTable1e40, gNormalizeTable1e80, gNormalizeTable1ec0,
+ gNormalizeTable1f00, gNormalizeTable1f40, gNormalizeTable1f80, gNormalizeTable1fc0,
+ gNormalizeTable2000, gNormalizeTable2040, gNormalizeTable2080, 0,
+ gNormalizeTable2100, gNormalizeTable2140, gNormalizeTable2180, gNormalizeTable21c0,
+ gNormalizeTable2200, gNormalizeTable2240, gNormalizeTable2280, gNormalizeTable22c0,
+ gNormalizeTable2300, 0, 0, 0,
+ 0, gNormalizeTable2440, gNormalizeTable2480, gNormalizeTable24c0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ gNormalizeTable2a00, gNormalizeTable2a40, 0, gNormalizeTable2ac0,
+ 0, 0, 0, 0,
+ gNormalizeTable2c00, gNormalizeTable2c40, gNormalizeTable2c80, gNormalizeTable2cc0,
+ 0, gNormalizeTable2d40, 0, 0,
+ 0, 0, gNormalizeTable2e80, gNormalizeTable2ec0,
+ gNormalizeTable2f00, gNormalizeTable2f40, gNormalizeTable2f80, gNormalizeTable2fc0,
+ gNormalizeTable3000, gNormalizeTable3040, gNormalizeTable3080, gNormalizeTable30c0,
+ gNormalizeTable3100, gNormalizeTable3140, gNormalizeTable3180, 0,
+ gNormalizeTable3200, gNormalizeTable3240, gNormalizeTable3280, gNormalizeTable32c0,
+ gNormalizeTable3300, gNormalizeTable3340, gNormalizeTable3380, gNormalizeTable33c0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, gNormalizeTablea640, gNormalizeTablea680, 0,
+ gNormalizeTablea700, gNormalizeTablea740, gNormalizeTablea780, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ gNormalizeTablef900, gNormalizeTablef940, gNormalizeTablef980, gNormalizeTablef9c0,
+ gNormalizeTablefa00, gNormalizeTablefa40, gNormalizeTablefa80, gNormalizeTablefac0,
+ gNormalizeTablefb00, gNormalizeTablefb40, gNormalizeTablefb80, gNormalizeTablefbc0,
+ gNormalizeTablefc00, gNormalizeTablefc40, gNormalizeTablefc80, gNormalizeTablefcc0,
+ gNormalizeTablefd00, gNormalizeTablefd40, gNormalizeTablefd80, gNormalizeTablefdc0,
+ gNormalizeTablefe00, gNormalizeTablefe40, gNormalizeTablefe80, gNormalizeTablefec0,
+ gNormalizeTableff00, gNormalizeTableff40, gNormalizeTableff80, gNormalizeTableffc0,
+};
+
+unsigned int normalize_character(const unsigned int c)
+{
+ if (c >= 0x10000 || !gNormalizeTable[c >> 6])
+ return c;
+ return gNormalizeTable[c >> 6][c & 0x3f];
+}
+
diff --git a/mailnews/extensions/fts3/src/README.mozilla b/mailnews/extensions/fts3/src/README.mozilla
new file mode 100644
index 000000000..0bfe7deb3
--- /dev/null
+++ b/mailnews/extensions/fts3/src/README.mozilla
@@ -0,0 +1,3 @@
+fts3_porter.c code is from SQLite3.
+
+This customized tokenizer "mozporter" by Mozilla supports CJK indexing using bi-gram. So you have to use bi-gram search string if you wanto to search CJK character.
diff --git a/mailnews/extensions/fts3/src/fts3_porter.c b/mailnews/extensions/fts3/src/fts3_porter.c
new file mode 100644
index 000000000..2276244c1
--- /dev/null
+++ b/mailnews/extensions/fts3/src/fts3_porter.c
@@ -0,0 +1,1150 @@
+/*
+** 2006 September 30
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Implementation of the full-text-search tokenizer that implements
+** a Porter stemmer.
+**
+*/
+
+/*
+ * This file is based on the SQLite FTS3 Porter Stemmer implementation.
+ *
+ * This is an attempt to provide some level of full-text search to users of
+ * Thunderbird who use languages that are not space/punctuation delimited.
+ * This is accomplished by performing bi-gram indexing of characters fall
+ * into the unicode space occupied by character sets used in such languages.
+ *
+ * Bi-gram indexing means that given the string "12345" we would index the
+ * pairs "12", "23", "34", and "45" (with position information). We do this
+ * because we are not sure where the word/semantic boundaries are in that
+ * string. Then, when a user searches for "234" the FTS3 engine tokenizes the
+ * search query into "23" and "34". Using special phrase-logic FTS3 requires
+ * the matches to have the tokens "23" and "34" adjacent to each other and in
+ * that order. In theory if the user searched for "2345" we we could just
+ * search for "23 NEAR/2 34". Unfortunately, NEAR does not imply ordering,
+ * so even though that would be more efficient, we would lose correctness
+ * and cannot do it.
+ *
+ * The efficiency and usability of bi-gram search assumes that the character
+ * space is large enough and actually observed bi-grams sufficiently
+ * distributed throughout the potential space so that the search bi-grams
+ * generated when the user issues a query find a 'reasonable' number of
+ * documents for each bi-gram match.
+ *
+ * Mozilla contributors:
+ * Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ * Andrew Sutherland <asutherland@asutherland.org>
+ */
+
+/*
+** The code in this file is only compiled if:
+**
+** * The FTS3 module is being built as an extension
+** (in which case SQLITE_CORE is not defined), or
+**
+** * The FTS3 module is being built into the core of
+** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "fts3_tokenizer.h"
+
+/* need some defined to compile without sqlite3 code */
+
+#define sqlite3_malloc malloc
+#define sqlite3_free free
+#define sqlite3_realloc realloc
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+typedef unsigned char u8;
+
+/**
+ * SQLite helper macro from sqlite3.c (really utf.c) to encode a unicode
+ * character into utf8.
+ *
+ * @param zOut A pointer to the current write position that is updated by
+ * the routine. At entry it should point to one-past the last valid
+ * encoded byte. The same holds true at exit.
+ * @param c The character to encode; this should be an unsigned int.
+ */
+#define WRITE_UTF8(zOut, c) { \
+ if( c<0x0080 ){ \
+ *zOut++ = (u8)(c&0xff); \
+ } \
+ else if( c<0x0800 ){ \
+ *zOut++ = 0xC0 + (u8)((c>>6) & 0x1F); \
+ *zOut++ = 0x80 + (u8)(c & 0x3F); \
+ } \
+ else if( c<0x10000 ){ \
+ *zOut++ = 0xE0 + (u8)((c>>12) & 0x0F); \
+ *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (u8)(c & 0x3F); \
+ }else{ \
+ *zOut++ = 0xf0 + (u8)((c>>18) & 0x07); \
+ *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \
+ *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (u8)(c & 0x3F); \
+ } \
+}
+
+/**
+ * Fudge factor to avoid buffer overwrites when WRITE_UTF8 is involved.
+ *
+ * Our normalization table includes entries that may result in a larger
+ * utf-8 encoding. Namely, 023a maps to 2c65. This is a growth from 2 bytes
+ * as utf-8 encoded to 3 bytes. This is currently the only transition possible
+ * because 1-byte encodings are known to stay 1-byte and our normalization
+ * table is 16-bit and so can't generate a 4-byte encoded output.
+ *
+ * For simplicity, we just multiple by 2 which covers the current case and
+ * potential growth for 2-byte to 4-byte growth. We can afford to do this
+ * because we're not talking about a lot of memory here as a rule.
+ */
+#define MAX_UTF8_GROWTH_FACTOR 2
+
+/**
+ * Helper from sqlite3.c to read a single UTF8 character.
+ *
+ * The clever bit with multi-byte reading is that you keep going until you find
+ * a byte whose top bits are not '10'. A single-byte UTF8 character will have
+ * '00' or '01', and a multi-byte UTF8 character must start with '11'.
+ *
+ * In the event of illegal UTF-8 this macro may read an arbitrary number of
+ * characters but will never read past zTerm. The resulting character value
+ * of illegal UTF-8 can be anything, although efforts are made to return the
+ * illegal character (0xfffd) for UTF-16 surrogates.
+ *
+ * @param zIn A pointer to the current position that is updated by the routine,
+ * pointing at the start of the next character when the routine returns.
+ * @param zTerm A pointer one past the end of the buffer.
+ * @param c The 'unsigned int' to hold the resulting character value. Do not
+ * use a short or a char.
+ */
+#define READ_UTF8(zIn, zTerm, c) { \
+ c = *(zIn++); \
+ if( c>=0xc0 ){ \
+ c = sqlite3Utf8Trans1[c-0xc0]; \
+ while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
+ c = (c<<6) + (0x3f & *(zIn++)); \
+ } \
+ if( c<0x80 \
+ || (c&0xFFFFF800)==0xD800 \
+ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
+ } \
+}
+
+/* end of compatible block to complie codes */
+
+/*
+** Class derived from sqlite3_tokenizer
+*/
+typedef struct porter_tokenizer {
+ sqlite3_tokenizer base; /* Base class */
+} porter_tokenizer;
+
+/*
+** Class derived from sqlit3_tokenizer_cursor
+*/
+typedef struct porter_tokenizer_cursor {
+ sqlite3_tokenizer_cursor base;
+ const char *zInput; /* input we are tokenizing */
+ int nInput; /* size of the input */
+ int iOffset; /* current position in zInput */
+ int iToken; /* index of next token to be returned */
+ unsigned char *zToken; /* storage for current token */
+ int nAllocated; /* space allocated to zToken buffer */
+ /**
+ * Store the offset of the second character in the bi-gram pair that we just
+ * emitted so that we can consider it being the first character in a bi-gram
+ * pair.
+ * The value 0 indicates that there is no previous such character. This is
+ * an acceptable sentinel value because the 0th offset can never be the
+ * offset of the second in a bi-gram pair.
+ *
+ * For example, let us say we are tokenizing a string of 4 CJK characters
+ * represented by the byte-string "11223344" where each repeated digit
+ * indicates 2-bytes of storage used to encode the character in UTF-8.
+ * (It actually takes 3, btw.) Then on the passes to emit each token,
+ * the iOffset and iPrevGigramOffset values at entry will be:
+ *
+ * 1122: iOffset = 0, iPrevBigramOffset = 0
+ * 2233: iOffset = 4, iPrevBigramOffset = 2
+ * 3344: iOffset = 6, iPrevBigramOffset = 4
+ * (nothing will be emitted): iOffset = 8, iPrevBigramOffset = 6
+ */
+ int iPrevBigramOffset; /* previous result was bi-gram */
+} porter_tokenizer_cursor;
+
+
+/* Forward declaration */
+static const sqlite3_tokenizer_module porterTokenizerModule;
+
+/* from normalize.c */
+extern unsigned int normalize_character(const unsigned int c);
+
+/*
+** Create a new tokenizer instance.
+*/
+static int porterCreate(
+ int argc, const char * const *argv,
+ sqlite3_tokenizer **ppTokenizer
+){
+ porter_tokenizer *t;
+ t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
+ if( t==NULL ) return SQLITE_NOMEM;
+ memset(t, 0, sizeof(*t));
+ *ppTokenizer = &t->base;
+ return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int porterDestroy(sqlite3_tokenizer *pTokenizer){
+ sqlite3_free(pTokenizer);
+ return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string. The input
+** string to be tokenized is zInput[0..nInput-1]. A cursor
+** used to incrementally tokenize this string is returned in
+** *ppCursor.
+*/
+static int porterOpen(
+ sqlite3_tokenizer *pTokenizer, /* The tokenizer */
+ const char *zInput, int nInput, /* String to be tokenized */
+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
+){
+ porter_tokenizer_cursor *c;
+
+ c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
+ if( c==NULL ) return SQLITE_NOMEM;
+
+ c->zInput = zInput;
+ if( zInput==0 ){
+ c->nInput = 0;
+ }else if( nInput<0 ){
+ c->nInput = (int)strlen(zInput);
+ }else{
+ c->nInput = nInput;
+ }
+ c->iOffset = 0; /* start tokenizing at the beginning */
+ c->iToken = 0;
+ c->zToken = NULL; /* no space allocated, yet. */
+ c->nAllocated = 0;
+ c->iPrevBigramOffset = 0;
+
+ *ppCursor = &c->base;
+ return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** porterOpen() above.
+*/
+static int porterClose(sqlite3_tokenizer_cursor *pCursor){
+ porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+ sqlite3_free(c->zToken);
+ sqlite3_free(c);
+ return SQLITE_OK;
+}
+/*
+** Vowel or consonant
+*/
+static const char cType[] = {
+ 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 2, 1
+};
+
+/*
+** isConsonant() and isVowel() determine if their first character in
+** the string they point to is a consonant or a vowel, according
+** to Porter ruls.
+**
+** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
+** 'Y' is a consonant unless it follows another consonant,
+** in which case it is a vowel.
+**
+** In these routine, the letters are in reverse order. So the 'y' rule
+** is that 'y' is a consonant unless it is followed by another
+** consonent.
+*/
+static int isVowel(const char*);
+static int isConsonant(const char *z){
+ int j;
+ char x = *z;
+ if( x==0 ) return 0;
+ assert( x>='a' && x<='z' );
+ j = cType[x-'a'];
+ if( j<2 ) return j;
+ return z[1]==0 || isVowel(z + 1);
+}
+static int isVowel(const char *z){
+ int j;
+ char x = *z;
+ if( x==0 ) return 0;
+ assert( x>='a' && x<='z' );
+ j = cType[x-'a'];
+ if( j<2 ) return 1-j;
+ return isConsonant(z + 1);
+}
+
+/*
+** Let any sequence of one or more vowels be represented by V and let
+** C be sequence of one or more consonants. Then every word can be
+** represented as:
+**
+** [C] (VC){m} [V]
+**
+** In prose: A word is an optional consonant followed by zero or
+** vowel-consonant pairs followed by an optional vowel. "m" is the
+** number of vowel consonant pairs. This routine computes the value
+** of m for the first i bytes of a word.
+**
+** Return true if the m-value for z is 1 or more. In other words,
+** return true if z contains at least one vowel that is followed
+** by a consonant.
+**
+** In this routine z[] is in reverse order. So we are really looking
+** for an instance of of a consonant followed by a vowel.
+*/
+static int m_gt_0(const char *z){
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ return *z!=0;
+}
+
+/* Like mgt0 above except we are looking for a value of m which is
+** exactly 1
+*/
+static int m_eq_1(const char *z){
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 1;
+ while( isConsonant(z) ){ z++; }
+ return *z==0;
+}
+
+/* Like mgt0 above except we are looking for a value of m>1 instead
+** or m>0
+*/
+static int m_gt_1(const char *z){
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ return *z!=0;
+}
+
+/*
+** Return TRUE if there is a vowel anywhere within z[0..n-1]
+*/
+static int hasVowel(const char *z){
+ while( isConsonant(z) ){ z++; }
+ return *z!=0;
+}
+
+/*
+** Return TRUE if the word ends in a double consonant.
+**
+** The text is reversed here. So we are really looking at
+** the first two characters of z[].
+*/
+static int doubleConsonant(const char *z){
+ return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
+}
+
+/*
+** Return TRUE if the word ends with three letters which
+** are consonant-vowel-consonent and where the final consonant
+** is not 'w', 'x', or 'y'.
+**
+** The word is reversed here. So we are really checking the
+** first three letters and the first one cannot be in [wxy].
+*/
+static int star_oh(const char *z){
+ return
+ z[0]!=0 && isConsonant(z) &&
+ z[0]!='w' && z[0]!='x' && z[0]!='y' &&
+ z[1]!=0 && isVowel(z+1) &&
+ z[2]!=0 && isConsonant(z+2);
+}
+
+/*
+** If the word ends with zFrom and xCond() is true for the stem
+** of the word that preceeds the zFrom ending, then change the
+** ending to zTo.
+**
+** The input word *pz and zFrom are both in reverse order. zTo
+** is in normal order.
+**
+** Return TRUE if zFrom matches. Return FALSE if zFrom does not
+** match. Not that TRUE is returned even if xCond() fails and
+** no substitution occurs.
+*/
+static int stem(
+ char **pz, /* The word being stemmed (Reversed) */
+ const char *zFrom, /* If the ending matches this... (Reversed) */
+ const char *zTo, /* ... change the ending to this (not reversed) */
+ int (*xCond)(const char*) /* Condition that must be true */
+){
+ char *z = *pz;
+ while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
+ if( *zFrom!=0 ) return 0;
+ if( xCond && !xCond(z) ) return 1;
+ while( *zTo ){
+ *(--z) = *(zTo++);
+ }
+ *pz = z;
+ return 1;
+}
+
+/**
+ * Voiced sound mark is only on Japanese. It is like accent. It combines with
+ * previous character. Example, "サ" (Katakana) with "゛" (voiced sound mark) is
+ * "ザ". Although full-width character mapping has combined character like "ザ",
+ * there is no combined character on half-width Katanaka character mapping.
+ */
+static int isVoicedSoundMark(const unsigned int c)
+{
+ if (c == 0xff9e || c == 0xff9f || c == 0x3099 || c == 0x309a)
+ return 1;
+ return 0;
+}
+
+/**
+ * How many unicode characters to take from the front and back of a term in
+ * |copy_stemmer|.
+ */
+#define COPY_STEMMER_COPY_HALF_LEN 10
+
+/**
+ * Normalizing but non-stemming term copying.
+ *
+ * The original function would take 10 bytes from the front and 10 bytes from
+ * the back if there were no digits in the string and it was more than 20
+ * bytes long. If there were digits involved that would decrease to 3 bytes
+ * from the front and 3 from the back. This would potentially corrupt utf-8
+ * encoded characters, which is fine from the perspective of the FTS3 logic.
+ *
+ * In our revised form we now operate on a unicode character basis rather than
+ * a byte basis. Additionally we use the same length limit even if there are
+ * digits involved because it's not clear digit token-space reduction is saving
+ * us from anything and could be hurting. Specifically, if no one is ever
+ * going to search on things with digits, then we should just remove them.
+ * Right now, the space reduction is going to increase false positives when
+ * people do search on them and increase the number of collisions sufficiently
+ * to make it really expensive. The caveat is there will be some increase in
+ * index size which could be meaningful if people are receiving lots of emails
+ * full of distinct numbers.
+ *
+ * In order to do the copy-from-the-front and copy-from-the-back trick, once
+ * we reach N characters in, we set zFrontEnd to the current value of zOut
+ * (which represents the termination of the first part of the result string)
+ * and set zBackStart to the value of zOutStart. We then advanced zBackStart
+ * along a character at a time as we write more characters. Once we have
+ * traversed the entire string, if zBackStart > zFrontEnd, then we know
+ * the string should be shrunk using the characters in the two ranges.
+ *
+ * (It would be faster to scan from the back with specialized logic but that
+ * particular logic seems easy to screw up and we don't have unit tests in here
+ * to the extent required.)
+ *
+ * @param zIn Input string to normalize and potentially shrink.
+ * @param nBytesIn The number of bytes in zIn, distinct from the number of
+ * unicode characters encoded in zIn.
+ * @param zOut The string to write our output into. This must have at least
+ * nBytesIn * MAX_UTF8_GROWTH_FACTOR in order to compensate for
+ * normalization that results in a larger utf-8 encoding.
+ * @param pnBytesOut Integer to write the number of bytes in zOut into.
+ */
+static void copy_stemmer(const unsigned char *zIn, const int nBytesIn,
+ unsigned char *zOut, int *pnBytesOut){
+ const unsigned char *zInTerm = zIn + nBytesIn;
+ unsigned char *zOutStart = zOut;
+ unsigned int c;
+ unsigned int charCount = 0;
+ unsigned char *zFrontEnd = NULL, *zBackStart = NULL;
+ unsigned int trashC;
+
+ /* copy normalized character */
+ while (zIn < zInTerm) {
+ READ_UTF8(zIn, zInTerm, c);
+ c = normalize_character(c);
+
+ /* ignore voiced/semi-voiced sound mark */
+ if (!isVoicedSoundMark(c)) {
+ /* advance one non-voiced sound mark character. */
+ if (zBackStart)
+ READ_UTF8(zBackStart, zOut, trashC);
+
+ WRITE_UTF8(zOut, c);
+ charCount++;
+ if (charCount == COPY_STEMMER_COPY_HALF_LEN) {
+ zFrontEnd = zOut;
+ zBackStart = zOutStart;
+ }
+ }
+ }
+
+ /* if we need to shrink the string, transplant the back bytes */
+ if (zBackStart > zFrontEnd) { /* this handles when both are null too */
+ size_t backBytes = zOut - zBackStart;
+ memmove(zFrontEnd, zBackStart, backBytes);
+ zOut = zFrontEnd + backBytes;
+ }
+ *zOut = 0;
+ *pnBytesOut = zOut - zOutStart;
+}
+
+
+/*
+** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
+** zOut is at least big enough to hold nIn bytes. Write the actual
+** size of the output word (exclusive of the '\0' terminator) into *pnOut.
+**
+** Any upper-case characters in the US-ASCII character set ([A-Z])
+** are converted to lower case. Upper-case UTF characters are
+** unchanged.
+**
+** Words that are longer than about 20 bytes are stemmed by retaining
+** a few bytes from the beginning and the end of the word. If the
+** word contains digits, 3 bytes are taken from the beginning and
+** 3 bytes from the end. For long words without digits, 10 bytes
+** are taken from each end. US-ASCII case folding still applies.
+**
+** If the input word contains not digits but does characters not
+** in [a-zA-Z] then no stemming is attempted and this routine just
+** copies the input into the input into the output with US-ASCII
+** case folding.
+**
+** Stemming never increases the length of the word. So there is
+** no chance of overflowing the zOut buffer.
+*/
+static void porter_stemmer(
+ const unsigned char *zIn,
+ unsigned int nIn,
+ unsigned char *zOut,
+ int *pnOut
+){
+ unsigned int i, j, c;
+ char zReverse[28];
+ char *z, *z2;
+ const unsigned char *zTerm = zIn + nIn;
+ const unsigned char *zTmp = zIn;
+
+ if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
+ /* The word is too big or too small for the porter stemmer.
+ ** Fallback to the copy stemmer */
+ copy_stemmer(zIn, nIn, zOut, pnOut);
+ return;
+ }
+ for (j = sizeof(zReverse) - 6; zTmp < zTerm; j--) {
+ READ_UTF8(zTmp, zTerm, c);
+ c = normalize_character(c);
+ if( c>='a' && c<='z' ){
+ zReverse[j] = c;
+ }else{
+ /* The use of a character not in [a-zA-Z] means that we fallback
+ ** to the copy stemmer */
+ copy_stemmer(zIn, nIn, zOut, pnOut);
+ return;
+ }
+ }
+ memset(&zReverse[sizeof(zReverse)-5], 0, 5);
+ z = &zReverse[j+1];
+
+
+ /* Step 1a */
+ if( z[0]=='s' ){
+ if(
+ !stem(&z, "sess", "ss", 0) &&
+ !stem(&z, "sei", "i", 0) &&
+ !stem(&z, "ss", "ss", 0)
+ ){
+ z++;
+ }
+ }
+
+ /* Step 1b */
+ z2 = z;
+ if( stem(&z, "dee", "ee", m_gt_0) ){
+ /* Do nothing. The work was all in the test */
+ }else if(
+ (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
+ && z!=z2
+ ){
+ if( stem(&z, "ta", "ate", 0) ||
+ stem(&z, "lb", "ble", 0) ||
+ stem(&z, "zi", "ize", 0) ){
+ /* Do nothing. The work was all in the test */
+ }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
+ z++;
+ }else if( m_eq_1(z) && star_oh(z) ){
+ *(--z) = 'e';
+ }
+ }
+
+ /* Step 1c */
+ if( z[0]=='y' && hasVowel(z+1) ){
+ z[0] = 'i';
+ }
+
+ /* Step 2 */
+ switch( z[1] ){
+ case 'a':
+ (void) (stem(&z, "lanoita", "ate", m_gt_0) ||
+ stem(&z, "lanoit", "tion", m_gt_0));
+ break;
+ case 'c':
+ (void) (stem(&z, "icne", "ence", m_gt_0) ||
+ stem(&z, "icna", "ance", m_gt_0));
+ break;
+ case 'e':
+ (void) (stem(&z, "rezi", "ize", m_gt_0));
+ break;
+ case 'g':
+ (void) (stem(&z, "igol", "log", m_gt_0));
+ break;
+ case 'l':
+ (void) (stem(&z, "ilb", "ble", m_gt_0) ||
+ stem(&z, "illa", "al", m_gt_0) ||
+ stem(&z, "iltne", "ent", m_gt_0) ||
+ stem(&z, "ile", "e", m_gt_0) ||
+ stem(&z, "ilsuo", "ous", m_gt_0));
+ break;
+ case 'o':
+ (void) (stem(&z, "noitazi", "ize", m_gt_0) ||
+ stem(&z, "noita", "ate", m_gt_0) ||
+ stem(&z, "rota", "ate", m_gt_0));
+ break;
+ case 's':
+ (void) (stem(&z, "msila", "al", m_gt_0) ||
+ stem(&z, "ssenevi", "ive", m_gt_0) ||
+ stem(&z, "ssenluf", "ful", m_gt_0) ||
+ stem(&z, "ssensuo", "ous", m_gt_0));
+ break;
+ case 't':
+ (void) (stem(&z, "itila", "al", m_gt_0) ||
+ stem(&z, "itivi", "ive", m_gt_0) ||
+ stem(&z, "itilib", "ble", m_gt_0));
+ break;
+ }
+
+ /* Step 3 */
+ switch( z[0] ){
+ case 'e':
+ (void) (stem(&z, "etaci", "ic", m_gt_0) ||
+ stem(&z, "evita", "", m_gt_0) ||
+ stem(&z, "ezila", "al", m_gt_0));
+ break;
+ case 'i':
+ (void) (stem(&z, "itici", "ic", m_gt_0));
+ break;
+ case 'l':
+ (void) (stem(&z, "laci", "ic", m_gt_0) ||
+ stem(&z, "luf", "", m_gt_0));
+ break;
+ case 's':
+ (void) (stem(&z, "ssen", "", m_gt_0));
+ break;
+ }
+
+ /* Step 4 */
+ switch( z[1] ){
+ case 'a':
+ if( z[0]=='l' && m_gt_1(z+2) ){
+ z += 2;
+ }
+ break;
+ case 'c':
+ if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
+ z += 4;
+ }
+ break;
+ case 'e':
+ if( z[0]=='r' && m_gt_1(z+2) ){
+ z += 2;
+ }
+ break;
+ case 'i':
+ if( z[0]=='c' && m_gt_1(z+2) ){
+ z += 2;
+ }
+ break;
+ case 'l':
+ if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
+ z += 4;
+ }
+ break;
+ case 'n':
+ if( z[0]=='t' ){
+ if( z[2]=='a' ){
+ if( m_gt_1(z+3) ){
+ z += 3;
+ }
+ }else if( z[2]=='e' ){
+ (void) (stem(&z, "tneme", "", m_gt_1) ||
+ stem(&z, "tnem", "", m_gt_1) ||
+ stem(&z, "tne", "", m_gt_1));
+ }
+ }
+ break;
+ case 'o':
+ if( z[0]=='u' ){
+ if( m_gt_1(z+2) ){
+ z += 2;
+ }
+ }else if( z[3]=='s' || z[3]=='t' ){
+ (void) (stem(&z, "noi", "", m_gt_1));
+ }
+ break;
+ case 's':
+ if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
+ z += 3;
+ }
+ break;
+ case 't':
+ (void) (stem(&z, "eta", "", m_gt_1) ||
+ stem(&z, "iti", "", m_gt_1));
+ break;
+ case 'u':
+ if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
+ z += 3;
+ }
+ break;
+ case 'v':
+ case 'z':
+ if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
+ z += 3;
+ }
+ break;
+ }
+
+ /* Step 5a */
+ if( z[0]=='e' ){
+ if( m_gt_1(z+1) ){
+ z++;
+ }else if( m_eq_1(z+1) && !star_oh(z+1) ){
+ z++;
+ }
+ }
+
+ /* Step 5b */
+ if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
+ z++;
+ }
+
+ /* z[] is now the stemmed word in reverse order. Flip it back
+ ** around into forward order and return.
+ */
+ *pnOut = i = strlen(z);
+ zOut[i] = 0;
+ while( *z ){
+ zOut[--i] = *(z++);
+ }
+}
+
+/**
+ * Indicate whether characters in the 0x30 - 0x7f region can be part of a token.
+ * Letters and numbers can; punctuation (and 'del') can't.
+ */
+static const char porterIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
+};
+
+/**
+ * Test whether a character is a (non-ascii) space character or not. isDelim
+ * uses the existing porter stemmer logic for anything in the ASCII (< 0x80)
+ * space which covers 0x20.
+ *
+ * 0x2000-0x206F is the general punctuation table. 0x2000 - 0x200b are spaces.
+ * The spaces 0x2000 - 0x200a are all defined as roughly equivalent to a
+ * standard 0x20 space. 0x200b is a "zero width space" (ZWSP) and not like an
+ * 0x20 space. 0x202f is a narrow no-break space and roughly equivalent to an
+ * 0x20 space. 0x205f is a "medium mathematical space" and defined as roughly
+ * equivalent to an 0x20 space.
+ */
+#define IS_UNI_SPACE(x) (((x)>=0x2000&&(x)<=0x200a) || (x)==0x202f || (x)==0x205f)
+/**
+ * What we are checking for:
+ * - 0x3001: Ideographic comma (-> 0x2c ',')
+ * - 0x3002: Ideographic full stop (-> 0x2e '.')
+ * - 0xff0c: fullwidth comma (~ wide 0x2c ',')
+ * - 0xff0e: fullwidth full stop (~ wide 0x2e '.')
+ * - 0xff61: halfwidth ideographic full stop (~ narrow 0x3002)
+ * - 0xff64: halfwidth ideographic comma (~ narrow 0x3001)
+ *
+ * It is possible we should be treating other things as delimiters!
+ */
+#define IS_JA_DELIM(x) (((x)==0x3001)||((x)==0xFF64)||((x)==0xFF0E)||((x)==0x3002)||((x)==0xFF61)||((x)==0xFF0C))
+
+/**
+ * The previous character was a delimeter (which includes the start of the
+ * string).
+ */
+#define BIGRAM_RESET 0
+/**
+ * The previous character was a CJK character and we have only seen one of them.
+ * If we had seen more than one in a row it would be the BIGRAM_USE state.
+ */
+#define BIGRAM_UNKNOWN 1
+/**
+ * We have seen two or more CJK characters in a row.
+ */
+#define BIGRAM_USE 2
+/**
+ * The previous character was ASCII or something in the unicode general scripts
+ * area that we do not believe is a delimeter. We call it 'alpha' as in
+ * alphabetic/alphanumeric and something that should be tokenized based on
+ * delimiters rather than on a bi-gram basis.
+ */
+#define BIGRAM_ALPHA 3
+
+static int isDelim(
+ const unsigned char *zCur, /* IN: current pointer of token */
+ const unsigned char *zTerm, /* IN: one character beyond end of token */
+ int *len, /* OUT: analyzed bytes in this token */
+ int *state /* IN/OUT: analyze state */
+){
+ const unsigned char *zIn = zCur;
+ unsigned int c;
+ int delim;
+
+ /* get the unicode character to analyze */
+ READ_UTF8(zIn, zTerm, c);
+ c = normalize_character(c);
+ *len = zIn - zCur;
+
+ /* ASCII character range has rule */
+ if( c < 0x80 ){
+ // This is original porter stemmer isDelim logic.
+ // 0x0 - 0x1f are all control characters, 0x20 is space, 0x21-0x2f are
+ // punctuation.
+ delim = (c < 0x30 || !porterIdChar[c - 0x30]);
+ // cases: "&a", "&."
+ if (*state == BIGRAM_USE || *state == BIGRAM_UNKNOWN ){
+ /* previous maybe CJK and current is ascii */
+ *state = BIGRAM_ALPHA; /*ascii*/
+ delim = 1; /* must break */
+ } else if (delim == 1) {
+ // cases: "a.", ".."
+ /* this is delimiter character */
+ *state = BIGRAM_RESET; /*reset*/
+ } else {
+ // cases: "aa", ".a"
+ *state = BIGRAM_ALPHA; /*ascii*/
+ }
+ return delim;
+ }
+
+ // (at this point we must be a non-ASCII character)
+
+ /* voiced/semi-voiced sound mark is ignore */
+ if (isVoicedSoundMark(c) && *state != BIGRAM_ALPHA) {
+ /* ignore this because it is combined with previous char */
+ return 0;
+ }
+
+ /* this isn't CJK range, so return as no delim */
+ // Anything less than 0x2000 (except to U+0E00-U+0EFF and U+1780-U+17FF)
+ // is the general scripts area and should not be bi-gram indexed.
+ // 0xa000 - 0a4cf is the Yi area. It is apparently a phonetic language whose
+ // usage does not appear to have simple delimeter rules, so we're leaving it
+ // as bigram processed. This is a guess, if you know better, let us know.
+ // (We previously bailed on this range too.)
+ // Addition, U+0E00-U+0E7F is Thai, U+0E80-U+0EFF is Laos,
+ // and U+1780-U+17FF is Khmer. It is no easy way to break each word.
+ // So these should use bi-gram too.
+ // cases: "aa", ".a", "&a"
+ if (c < 0xe00 ||
+ (c >= 0xf00 && c < 0x1780) ||
+ (c >= 0x1800 && c < 0x2000)) {
+ *state = BIGRAM_ALPHA; /* not really ASCII but same idea; tokenize it */
+ return 0;
+ }
+
+ // (at this point we must be a bi-grammable char or delimiter)
+
+ /* this is space character or delim character */
+ // cases: "a.", "..", "&."
+ if( IS_UNI_SPACE(c) || IS_JA_DELIM(c) ){
+ *state = BIGRAM_RESET; /* reset */
+ return 1; /* it actually is a delimiter; report as such */
+ }
+
+ // (at this point we must be a bi-grammable char)
+
+ // cases: "a&"
+ if( *state==BIGRAM_ALPHA ){
+ /* Previous is ascii and current maybe CJK */
+ *state = BIGRAM_UNKNOWN; /* mark as unknown */
+ return 1; /* break to emit the ASCII token*/
+ }
+
+ /* We have no rule for CJK!. use bi-gram */
+ // cases: "&&"
+ if( *state==BIGRAM_UNKNOWN || *state==BIGRAM_USE ){
+ /* previous state is unknown. mark as bi-gram */
+ *state = BIGRAM_USE;
+ return 1; /* break to emit the digram */
+ }
+
+ // cases: ".&" (*state == BIGRAM_RESET)
+ *state = BIGRAM_UNKNOWN; /* mark as unknown */
+ return 0; /* no need to break; nothing to emit */
+}
+
+/**
+ * Generate a new token. There are basically three types of token we can
+ * generate:
+ * - A porter stemmed token. This is a word entirely comprised of ASCII
+ * characters. We run the porter stemmer algorithm against the word.
+ * Because we have no way to know what is and is not an English word
+ * (the only language for which the porter stemmer was designed), this
+ * could theoretically map multiple words that are not variations of the
+ * same word down to the same root, resulting in potentially unexpected
+ * result inclusions in the search results. We accept this result because
+ * there's not a lot we can do about it and false positives are much
+ * better than false negatives.
+ * - A copied token; case/accent-folded but not stemmed. We call the porter
+ * stemmer for all non-CJK cases and it diverts to the copy stemmer if it
+ * sees any non-ASCII characters (after folding) or if the string is too
+ * long. The copy stemmer will shrink the string if it is deemed too long.
+ * - A bi-gram token; two CJK-ish characters. For query reasons we generate a
+ * series of overlapping bi-grams. (We can't require the user to start their
+ * search based on the arbitrary context of the indexed documents.)
+ *
+ * It may be useful to think of this function as operating at the points between
+ * characters. While we are considering the 'current' character (the one after
+ * the 'point'), we are also interested in the 'previous' character (the one
+ * preceding the point).
+ * At any 'point', there are a number of possible situations which I will
+ * illustrate with pairs of characters. 'a' means alphanumeric ASCII or a
+ * non-ASCII character that is not bi-grammable or a delimeter, '.'
+ * means a delimiter (space or punctuation), '&' means a bi-grammable
+ * character.
+ * - aa: We are in the midst of a token. State remains BIGRAM_ALPHA.
+ * - a.: We will generate a porter stemmed or copied token. State was
+ * BIGRAM_ALPHA, gets set to BIGRAM_RESET.
+ * - a&: We will generate a porter stemmed or copied token; we will set our
+ * state to BIGRAM_UNKNOWN to indicate we have seen one bigram character
+ * but that it is not yet time to emit a bigram.
+ * - .a: We are starting a token. State was BIGRAM_RESET, gets set to
+ * BIGRAM_ALPHA.
+ * - ..: We skip/eat the delimeters. State stays BIGRAM_RESET.
+ * - .&: State set to BIGRAM_UNKNOWN to indicate we have seen one bigram char.
+ * - &a: If the state was BIGRAM_USE, we generate a bi-gram token. If the state
+ * was BIGRAM_UNKNOWN we had only seen one CJK character and so don't do
+ * anything. State is set to BIGRAM_ALPHA.
+ * - &.: Same as the "&a" case, but state is set to BIGRAM_RESET.
+ * - &&: We will generate a bi-gram token. State was either BIGRAM_UNKNOWN or
+ * BIGRAM_USE, gets set to BIGRAM_USE.
+ */
+static int porterNext(
+ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
+ const char **pzToken, /* OUT: *pzToken is the token text */
+ int *pnBytes, /* OUT: Number of bytes in token */
+ int *piStartOffset, /* OUT: Starting offset of token */
+ int *piEndOffset, /* OUT: Ending offset of token */
+ int *piPosition /* OUT: Position integer of token */
+){
+ porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+ const unsigned char *z = (unsigned char *) c->zInput;
+ int len = 0;
+ int state;
+
+ while( c->iOffset < c->nInput ){
+ int iStartOffset, numChars;
+
+ /*
+ * This loop basically has two modes of operation:
+ * - general processing (iPrevBigramOffset == 0 here)
+ * - CJK processing (iPrevBigramOffset != 0 here)
+ *
+ * In an general processing pass we skip over all the delimiters, leaving us
+ * at a character that promises to produce a token. This could be a CJK
+ * token (state == BIGRAM_USE) or an ALPHA token (state == BIGRAM_ALPHA).
+ * If it was a CJK token, we transition into CJK state for the next loop.
+ * If it was an alpha token, our current offset is pointing at a delimiter
+ * (which could be a CJK character), so it is good that our next pass
+ * through the function and loop will skip over any delimiters. If the
+ * delimiter we hit was a CJK character, the next time through we will
+ * not treat it as a delimiter though; the entry state for that scan is
+ * BIGRAM_RESET so the transition is not treated as a delimiter!
+ *
+ * The CJK pass always starts with the second character in a bi-gram emitted
+ * as a token in the previous step. No delimiter skipping is required
+ * because we know that first character might produce a token for us. It
+ * only 'might' produce a token because the previous pass performed no
+ * lookahead and cannot be sure it is followed by another CJK character.
+ * This is why
+ */
+
+ // If we have a previous bigram offset
+ if (c->iPrevBigramOffset == 0) {
+ /* Scan past delimiter characters */
+ state = BIGRAM_RESET; /* reset */
+ while (c->iOffset < c->nInput &&
+ isDelim(z + c->iOffset, z + c->nInput, &len, &state)) {
+ c->iOffset += len;
+ }
+
+ } else {
+ /* for bigram indexing, use previous offset */
+ c->iOffset = c->iPrevBigramOffset;
+ }
+
+ /* Count non-delimiter characters. */
+ iStartOffset = c->iOffset;
+ numChars = 0;
+
+ // Start from a reset state. This means the first character we see
+ // (which will not be a delimiter) determines which of ALPHA or CJK modes
+ // we are operating in. (It won't be a delimiter because in a 'general'
+ // pass as defined above, we will have eaten all the delimiters, and in
+ // a CJK pass we are guaranteed that the first character is CJK.)
+ state = BIGRAM_RESET; /* state is reset */
+ // Advance until it is time to emit a token.
+ // For ALPHA characters, this means advancing until we encounter a delimiter
+ // or a CJK character. iOffset will be pointing at the delimiter or CJK
+ // character, aka one beyond the last ALPHA character.
+ // For CJK characters this means advancing until we encounter an ALPHA
+ // character, a delimiter, or we have seen two consecutive CJK
+ // characters. iOffset points at the ALPHA/delimiter in the first 2 cases
+ // and the second of two CJK characters in the last case.
+ // Because of the way this loop is structured, iOffset is only updated
+ // when we don't terminate. However, if we terminate, len still contains
+ // the number of bytes in the character found at iOffset. (This is useful
+ // in the CJK case.)
+ while (c->iOffset < c->nInput &&
+ !isDelim(z + c->iOffset, z + c->nInput, &len, &state)) {
+ c->iOffset += len;
+ numChars++;
+ }
+
+ if (state == BIGRAM_USE) {
+ /* Split word by bigram */
+ // Right now iOffset is pointing at the second character in a pair.
+ // Save this offset so next-time through we start with that as the
+ // first character.
+ c->iPrevBigramOffset = c->iOffset;
+ // And now advance so that iOffset is pointing at the character after
+ // the second character in the bi-gram pair. Also count the char.
+ c->iOffset += len;
+ numChars++;
+ } else {
+ /* Reset bigram offset */
+ c->iPrevBigramOffset = 0;
+ }
+
+ /* We emit a token if:
+ * - there are two ideograms together,
+ * - there are three chars or more,
+ * - we think this is a query and wildcard magic is desired.
+ * We think is a wildcard query when we have a single character, it starts
+ * at the start of the buffer, it's CJK, our current offset is one shy of
+ * nInput and the character at iOffset is '*'. Because the state gets
+ * clobbered by the incidence of '*' our requirement for CJK is that the
+ * implied character length is at least 3 given that it takes at least 3
+ * bytes to encode to 0x2000.
+ */
+ // It is possible we have no token to emit here if iPrevBigramOffset was not
+ // 0 on entry and there was no second CJK character. iPrevBigramOffset
+ // will now be 0 if that is the case (and c->iOffset == iStartOffset).
+ if (// allow two-character words only if in bigram
+ (numChars == 2 && state == BIGRAM_USE) ||
+ // otherwise, drop two-letter words (considered stop-words)
+ (numChars >=3) ||
+ // wildcard case:
+ (numChars == 1 && iStartOffset == 0 &&
+ (c->iOffset >= 3) &&
+ (c->iOffset == c->nInput - 1) &&
+ (z[c->iOffset] == '*'))) {
+ /* figure out the number of bytes to copy/stem */
+ int n = c->iOffset - iStartOffset;
+ /* make sure there is enough buffer space */
+ if (n * MAX_UTF8_GROWTH_FACTOR > c->nAllocated) {
+ c->nAllocated = n * MAX_UTF8_GROWTH_FACTOR + 20;
+ c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
+ if (c->zToken == NULL)
+ return SQLITE_NOMEM;
+ }
+
+ if (state == BIGRAM_USE) {
+ /* This is by bigram. So it is unnecessary to convert word */
+ copy_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+ } else {
+ porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+ }
+ *pzToken = (const char*)c->zToken;
+ *piStartOffset = iStartOffset;
+ *piEndOffset = c->iOffset;
+ *piPosition = c->iToken++;
+ return SQLITE_OK;
+ }
+ }
+ return SQLITE_DONE;
+}
+
+/*
+** The set of routines that implement the porter-stemmer tokenizer
+*/
+static const sqlite3_tokenizer_module porterTokenizerModule = {
+ 0,
+ porterCreate,
+ porterDestroy,
+ porterOpen,
+ porterClose,
+ porterNext,
+};
+
+/*
+** Allocate a new porter tokenizer. Return a pointer to the new
+** tokenizer in *ppModule
+*/
+void sqlite3Fts3PorterTokenizerModule(
+ sqlite3_tokenizer_module const**ppModule
+){
+ *ppModule = &porterTokenizerModule;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/mailnews/extensions/fts3/src/fts3_tokenizer.h b/mailnews/extensions/fts3/src/fts3_tokenizer.h
new file mode 100644
index 000000000..906303db4
--- /dev/null
+++ b/mailnews/extensions/fts3/src/fts3_tokenizer.h
@@ -0,0 +1,148 @@
+/*
+** 2006 July 10
+**
+** The author disclaims copyright to this source code.
+**
+*************************************************************************
+** Defines the interface to tokenizers used by fulltext-search. There
+** are three basic components:
+**
+** sqlite3_tokenizer_module is a singleton defining the tokenizer
+** interface functions. This is essentially the class structure for
+** tokenizers.
+**
+** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
+** including customization information defined at creation time.
+**
+** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
+** tokens from a particular input.
+*/
+#ifndef _FTS3_TOKENIZER_H_
+#define _FTS3_TOKENIZER_H_
+
+/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
+** If tokenizers are to be allowed to call sqlite3_*() functions, then
+** we will need a way to register the API consistently.
+*/
+#include "sqlite3.h"
+
+/*
+** Structures used by the tokenizer interface. When a new tokenizer
+** implementation is registered, the caller provides a pointer to
+** an sqlite3_tokenizer_module containing pointers to the callback
+** functions that make up an implementation.
+**
+** When an fts3 table is created, it passes any arguments passed to
+** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
+** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
+** implementation. The xCreate() function in turn returns an
+** sqlite3_tokenizer structure representing the specific tokenizer to
+** be used for the fts3 table (customized by the tokenizer clause arguments).
+**
+** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
+** method is called. It returns an sqlite3_tokenizer_cursor object
+** that may be used to tokenize a specific input buffer based on
+** the tokenization rules supplied by a specific sqlite3_tokenizer
+** object.
+*/
+typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
+typedef struct sqlite3_tokenizer sqlite3_tokenizer;
+typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
+
+struct sqlite3_tokenizer_module {
+
+ /*
+ ** Structure version. Should always be set to 0.
+ */
+ int iVersion;
+
+ /*
+ ** Create a new tokenizer. The values in the argv[] array are the
+ ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
+ ** TABLE statement that created the fts3 table. For example, if
+ ** the following SQL is executed:
+ **
+ ** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
+ **
+ ** then argc is set to 2, and the argv[] array contains pointers
+ ** to the strings "arg1" and "arg2".
+ **
+ ** This method should return either SQLITE_OK (0), or an SQLite error
+ ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
+ ** to point at the newly created tokenizer structure. The generic
+ ** sqlite3_tokenizer.pModule variable should not be initialised by
+ ** this callback. The caller will do so.
+ */
+ int (*xCreate)(
+ int argc, /* Size of argv array */
+ const char *const*argv, /* Tokenizer argument strings */
+ sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
+ );
+
+ /*
+ ** Destroy an existing tokenizer. The fts3 module calls this method
+ ** exactly once for each successful call to xCreate().
+ */
+ int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
+
+ /*
+ ** Create a tokenizer cursor to tokenize an input buffer. The caller
+ ** is responsible for ensuring that the input buffer remains valid
+ ** until the cursor is closed (using the xClose() method).
+ */
+ int (*xOpen)(
+ sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
+ const char *pInput, int nBytes, /* Input buffer */
+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
+ );
+
+ /*
+ ** Destroy an existing tokenizer cursor. The fts3 module calls this
+ ** method exactly once for each successful call to xOpen().
+ */
+ int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
+
+ /*
+ ** Retrieve the next token from the tokenizer cursor pCursor. This
+ ** method should either return SQLITE_OK and set the values of the
+ ** "OUT" variables identified below, or SQLITE_DONE to indicate that
+ ** the end of the buffer has been reached, or an SQLite error code.
+ **
+ ** *ppToken should be set to point at a buffer containing the
+ ** normalized version of the token (i.e. after any case-folding and/or
+ ** stemming has been performed). *pnBytes should be set to the length
+ ** of this buffer in bytes. The input text that generated the token is
+ ** identified by the byte offsets returned in *piStartOffset and
+ ** *piEndOffset. *piStartOffset should be set to the index of the first
+ ** byte of the token in the input buffer. *piEndOffset should be set
+ ** to the index of the first byte just past the end of the token in
+ ** the input buffer.
+ **
+ ** The buffer *ppToken is set to point at is managed by the tokenizer
+ ** implementation. It is only required to be valid until the next call
+ ** to xNext() or xClose().
+ */
+ /* TODO(shess) current implementation requires pInput to be
+ ** nul-terminated. This should either be fixed, or pInput/nBytes
+ ** should be converted to zInput.
+ */
+ int (*xNext)(
+ sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
+ const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
+ int *piStartOffset, /* OUT: Byte offset of token in input buffer */
+ int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
+ int *piPosition /* OUT: Number of tokens returned before this one */
+ );
+};
+
+struct sqlite3_tokenizer {
+ const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
+ /* Tokenizer implementations will typically add additional fields */
+};
+
+struct sqlite3_tokenizer_cursor {
+ sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
+ /* Tokenizer implementations will typically add additional fields */
+};
+
+#endif /* _FTS3_TOKENIZER_H_ */
diff --git a/mailnews/extensions/fts3/src/moz.build b/mailnews/extensions/fts3/src/moz.build
new file mode 100644
index 000000000..a2b3c60d5
--- /dev/null
+++ b/mailnews/extensions/fts3/src/moz.build
@@ -0,0 +1,18 @@
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+ 'fts3_porter.c',
+ 'Normalize.c',
+]
+
+SOURCES += [
+ 'nsFts3Tokenizer.cpp',
+ 'nsGlodaRankerFunction.cpp',
+]
+
+FINAL_LIBRARY = 'mail'
+
+CXXFLAGS += CONFIG['SQLITE_CFLAGS']
diff --git a/mailnews/extensions/fts3/src/nsFts3Tokenizer.cpp b/mailnews/extensions/fts3/src/nsFts3Tokenizer.cpp
new file mode 100644
index 000000000..12bd70ead
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3Tokenizer.cpp
@@ -0,0 +1,72 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsFts3Tokenizer.h"
+
+#include "nsGlodaRankerFunction.h"
+
+#include "nsIFts3Tokenizer.h"
+#include "mozIStorageConnection.h"
+#include "mozIStorageStatement.h"
+#include "nsStringGlue.h"
+
+extern "C" void sqlite3Fts3PorterTokenizerModule(
+ sqlite3_tokenizer_module const**ppModule);
+
+extern "C" void glodaRankFunc(sqlite3_context *pCtx,
+ int nVal,
+ sqlite3_value **apVal);
+
+NS_IMPL_ISUPPORTS(nsFts3Tokenizer,nsIFts3Tokenizer)
+
+nsFts3Tokenizer::nsFts3Tokenizer()
+{
+}
+
+nsFts3Tokenizer::~nsFts3Tokenizer()
+{
+}
+
+NS_IMETHODIMP
+nsFts3Tokenizer::RegisterTokenizer(mozIStorageConnection *connection)
+{
+ nsresult rv;
+ nsCOMPtr<mozIStorageStatement> selectStatement;
+
+ // -- register the tokenizer
+ rv = connection->CreateStatement(NS_LITERAL_CSTRING(
+ "SELECT fts3_tokenizer(?1, ?2)"),
+ getter_AddRefs(selectStatement));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ const sqlite3_tokenizer_module* module = nullptr;
+ sqlite3Fts3PorterTokenizerModule(&module);
+ if (!module)
+ return NS_ERROR_FAILURE;
+
+ rv = selectStatement->BindUTF8StringParameter(
+ 0, NS_LITERAL_CSTRING("mozporter"));
+ NS_ENSURE_SUCCESS(rv, rv);
+ rv = selectStatement->BindBlobParameter(1,
+ (uint8_t*)&module,
+ sizeof(module));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ bool hasMore;
+ rv = selectStatement->ExecuteStep(&hasMore);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // -- register the ranking function
+ nsCOMPtr<mozIStorageFunction> func = new nsGlodaRankerFunction();
+ NS_ENSURE_TRUE(func, NS_ERROR_OUT_OF_MEMORY);
+ rv = connection->CreateFunction(
+ NS_LITERAL_CSTRING("glodaRank"),
+ -1, // variable argument support
+ func
+ );
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return rv;
+}
diff --git a/mailnews/extensions/fts3/src/nsFts3Tokenizer.h b/mailnews/extensions/fts3/src/nsFts3Tokenizer.h
new file mode 100644
index 000000000..eb1e83bd5
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3Tokenizer.h
@@ -0,0 +1,26 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsFts3Tokenizer_h__
+#define nsFts3Tokenizer_h__
+
+#include "nsCOMPtr.h"
+#include "nsIFts3Tokenizer.h"
+#include "fts3_tokenizer.h"
+
+extern const sqlite3_tokenizer_module* getWindowsTokenizer();
+
+class nsFts3Tokenizer final : public nsIFts3Tokenizer {
+public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIFTS3TOKENIZER
+
+ nsFts3Tokenizer();
+
+private:
+ ~nsFts3Tokenizer();
+};
+
+#endif
diff --git a/mailnews/extensions/fts3/src/nsFts3TokenizerCID.h b/mailnews/extensions/fts3/src/nsFts3TokenizerCID.h
new file mode 100644
index 000000000..1f9c101bc
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3TokenizerCID.h
@@ -0,0 +1,16 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsFts3TokenizerCID_h__
+#define nsFts3TokenizerCID_h__
+
+#define NS_FTS3TOKENIZER_CONTRACTID \
+ "@mozilla.org/messenger/fts3tokenizer;1"
+#define NS_FTS3TOKENIZER_CID \
+{ /* a67d724d-0015-4e2e-8cad-b84775330924 */ \
+ 0xa67d724d, 0x0015, 0x4e2e, \
+ { 0x8c, 0xad, 0xb8, 0x47, 0x75, 0x33, 0x09, 0x24 }}
+
+#endif /* nsFts3TokenizerCID_h__ */
diff --git a/mailnews/extensions/fts3/src/nsGlodaRankerFunction.cpp b/mailnews/extensions/fts3/src/nsGlodaRankerFunction.cpp
new file mode 100644
index 000000000..fb576685d
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsGlodaRankerFunction.cpp
@@ -0,0 +1,145 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Thunderbird Global Database.
+ *
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Andrew Sutherland <asutherland@asutherland.org>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsGlodaRankerFunction.h"
+#include "mozIStorageValueArray.h"
+
+#include "sqlite3.h"
+
+#include "nsCOMPtr.h"
+#include "nsVariant.h"
+#include "nsComponentManagerUtils.h"
+
+#ifndef SQLITE_VERSION_NUMBER
+#error "We need SQLITE_VERSION_NUMBER defined!"
+#endif
+
+NS_IMPL_ISUPPORTS(nsGlodaRankerFunction, mozIStorageFunction)
+
+nsGlodaRankerFunction::nsGlodaRankerFunction()
+{
+}
+
+nsGlodaRankerFunction::~nsGlodaRankerFunction()
+{
+}
+
+static uint32_t COLUMN_SATURATION[] = {10, 1, 1, 1, 1};
+
+/**
+ * Our ranking function basically just multiplies the weight of the column
+ * against the number of (saturating) matches.
+ *
+ * The original code is a SQLite example ranking function, although somewhat
+ * rather modified at this point. All SQLite code is public domain, so we are
+ * subsuming it to MPL1.1/LGPL2/GPL2.
+ */
+NS_IMETHODIMP
+nsGlodaRankerFunction::OnFunctionCall(mozIStorageValueArray *aArguments,
+ nsIVariant **_result)
+{
+ // all argument names are maintained from the original SQLite code.
+ uint32_t nVal;
+ nsresult rv = aArguments->GetNumEntries(&nVal);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ /* Check that the number of arguments passed to this function is correct.
+ * If not, return an error. Set aArgsData to point to the array
+ * of unsigned integer values returned by FTS3 function. Set nPhrase
+ * to contain the number of reportable phrases in the users full-text
+ * query, and nCol to the number of columns in the table.
+ */
+ if (nVal < 1)
+ return NS_ERROR_INVALID_ARG;
+
+ uint32_t lenArgsData;
+ uint32_t *aArgsData = (uint32_t *)aArguments->AsSharedBlob(0, &lenArgsData);
+
+ uint32_t nPhrase = aArgsData[0];
+ uint32_t nCol = aArgsData[1];
+ if (nVal != (1 + nCol))
+ return NS_ERROR_INVALID_ARG;
+
+ double score = 0.0;
+
+ // SQLite 3.6.22 has a different matchinfo layout than SQLite 3.6.23+
+#if SQLITE_VERSION_NUMBER <= 3006022
+ /* Iterate through each phrase in the users query. */
+ for (uint32_t iPhrase = 0; iPhrase < nPhrase; iPhrase++) {
+ // in SQ
+ for (uint32_t iCol = 0; iCol < nCol; iCol++) {
+ uint32_t nHitCount = aArgsData[2 + (iPhrase+1)*nCol + iCol];
+ double weight = aArguments->AsDouble(iCol+1);
+ if (nHitCount > 0) {
+ score += (nHitCount > COLUMN_SATURATION[iCol]) ?
+ (COLUMN_SATURATION[iCol] * weight) :
+ (nHitCount * weight);
+ }
+ }
+ }
+#else
+ /* Iterate through each phrase in the users query. */
+ for (uint32_t iPhrase = 0; iPhrase < nPhrase; iPhrase++) {
+ /* Now iterate through each column in the users query. For each column,
+ ** increment the relevancy score by:
+ **
+ ** (<hit count> / <global hit count>) * <column weight>
+ **
+ ** aPhraseinfo[] points to the start of the data for phrase iPhrase. So
+ ** the hit count and global hit counts for each column are found in
+ ** aPhraseinfo[iCol*3] and aPhraseinfo[iCol*3+1], respectively.
+ */
+ uint32_t *aPhraseinfo = &aArgsData[2 + iPhrase*nCol*3];
+ for (uint32_t iCol = 0; iCol < nCol; iCol++) {
+ uint32_t nHitCount = aPhraseinfo[3 * iCol];
+ double weight = aArguments->AsDouble(iCol+1);
+ if (nHitCount > 0) {
+ score += (nHitCount > COLUMN_SATURATION[iCol]) ?
+ (COLUMN_SATURATION[iCol] * weight) :
+ (nHitCount * weight);
+ }
+ }
+ }
+#endif
+
+ nsCOMPtr<nsIWritableVariant> result = new nsVariant();
+
+ rv = result->SetAsDouble(score);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ NS_ADDREF(*_result = result);
+ return NS_OK;
+}
diff --git a/mailnews/extensions/fts3/src/nsGlodaRankerFunction.h b/mailnews/extensions/fts3/src/nsGlodaRankerFunction.h
new file mode 100644
index 000000000..5c5c920d0
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsGlodaRankerFunction.h
@@ -0,0 +1,25 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _nsGlodaRankerFunction_h_
+#define _nsGlodaRankerFunction_h_
+
+#include "mozIStorageFunction.h"
+
+/**
+ * Basically a port of the example FTS3 ranking function to mozStorage's
+ * view of the universe. This might get fancier at some point.
+ */
+class nsGlodaRankerFunction final : public mozIStorageFunction
+{
+public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_MOZISTORAGEFUNCTION
+
+ nsGlodaRankerFunction();
+private:
+ ~nsGlodaRankerFunction();
+};
+
+#endif // _nsGlodaRankerFunction_h_