summaryrefslogtreecommitdiffstats
path: root/extensions/universalchardet/src
diff options
context:
space:
mode:
Diffstat (limited to 'extensions/universalchardet/src')
-rw-r--r--extensions/universalchardet/src/base/CharDistribution.cpp46
-rw-r--r--extensions/universalchardet/src/base/CharDistribution.h204
-rw-r--r--extensions/universalchardet/src/base/JISFreq.tab554
-rw-r--r--extensions/universalchardet/src/base/JpCntx.cpp198
-rw-r--r--extensions/universalchardet/src/base/JpCntx.h107
-rw-r--r--extensions/universalchardet/src/base/moz.build22
-rw-r--r--extensions/universalchardet/src/base/nsCharSetProber.cpp92
-rw-r--r--extensions/universalchardet/src/base/nsCharSetProber.h42
-rw-r--r--extensions/universalchardet/src/base/nsCodingStateMachine.h85
-rw-r--r--extensions/universalchardet/src/base/nsEUCJPProber.cpp69
-rw-r--r--extensions/universalchardet/src/base/nsEUCJPProber.h43
-rw-r--r--extensions/universalchardet/src/base/nsEscCharsetProber.cpp46
-rw-r--r--extensions/universalchardet/src/base/nsEscCharsetProber.h32
-rw-r--r--extensions/universalchardet/src/base/nsEscSM.cpp63
-rw-r--r--extensions/universalchardet/src/base/nsLatin1Prober.cpp149
-rw-r--r--extensions/universalchardet/src/base/nsLatin1Prober.h36
-rw-r--r--extensions/universalchardet/src/base/nsMBCSGroupProber.cpp177
-rw-r--r--extensions/universalchardet/src/base/nsMBCSGroupProber.h42
-rw-r--r--extensions/universalchardet/src/base/nsMBCSSM.cpp188
-rw-r--r--extensions/universalchardet/src/base/nsPkgInt.h57
-rw-r--r--extensions/universalchardet/src/base/nsSJISProber.cpp68
-rw-r--r--extensions/universalchardet/src/base/nsSJISProber.h44
-rw-r--r--extensions/universalchardet/src/base/nsUTF8Prober.cpp55
-rw-r--r--extensions/universalchardet/src/base/nsUTF8Prober.h31
-rw-r--r--extensions/universalchardet/src/base/nsUniversalDetector.cpp240
-rw-r--r--extensions/universalchardet/src/base/nsUniversalDetector.h44
-rw-r--r--extensions/universalchardet/src/moz.build8
-rw-r--r--extensions/universalchardet/src/xpcom/moz.build16
-rw-r--r--extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.cpp130
-rw-r--r--extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.h77
-rw-r--r--extensions/universalchardet/src/xpcom/nsUniversalCharDetDll.h11
-rw-r--r--extensions/universalchardet/src/xpcom/nsUniversalCharDetModule.cpp52
32 files changed, 3028 insertions, 0 deletions
diff --git a/extensions/universalchardet/src/base/CharDistribution.cpp b/extensions/universalchardet/src/base/CharDistribution.cpp
new file mode 100644
index 000000000..7030bd85e
--- /dev/null
+++ b/extensions/universalchardet/src/base/CharDistribution.cpp
@@ -0,0 +1,46 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "CharDistribution.h"
+
+#include "JISFreq.tab"
+#include "mozilla/ArrayUtils.h"
+
+#define SURE_YES 0.99f
+#define SURE_NO 0.01f
+
+//return confidence base on received data
+float CharDistributionAnalysis::GetConfidence(void)
+{
+ //if we didn't receive any character in our consideration range, or the
+ // number of frequent characters is below the minimum threshold, return
+ // negative answer
+ if (mTotalChars <= 0 || mFreqChars <= mDataThreshold)
+ return SURE_NO;
+
+ if (mTotalChars != mFreqChars) {
+ float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
+
+ if (r < SURE_YES)
+ return r;
+ }
+ //normalize confidence, (we don't want to be 100% sure)
+ return SURE_YES;
+}
+
+SJISDistributionAnalysis::SJISDistributionAnalysis()
+{
+ mCharToFreqOrder = JISCharToFreqOrder;
+ mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
+ mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
+}
+
+EUCJPDistributionAnalysis::EUCJPDistributionAnalysis()
+{
+ mCharToFreqOrder = JISCharToFreqOrder;
+ mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
+ mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
+}
+
diff --git a/extensions/universalchardet/src/base/CharDistribution.h b/extensions/universalchardet/src/base/CharDistribution.h
new file mode 100644
index 000000000..bd5143b8e
--- /dev/null
+++ b/extensions/universalchardet/src/base/CharDistribution.h
@@ -0,0 +1,204 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CharDistribution_h__
+#define CharDistribution_h__
+
+#include "nscore.h"
+
+#define ENOUGH_DATA_THRESHOLD 1024
+
+class CharDistributionAnalysis
+{
+public:
+ CharDistributionAnalysis() {Reset();}
+
+ //feed a block of data and do distribution analysis
+ void HandleData(const char* aBuf, uint32_t aLen) {}
+
+ //Feed a character with known length
+ void HandleOneChar(const char* aStr, uint32_t aCharLen)
+ {
+ int32_t order;
+
+ //we only care about 2-bytes character in our distribution analysis
+ order = (aCharLen == 2) ? GetOrder(aStr) : -1;
+
+ if (order >= 0)
+ {
+ mTotalChars++;
+ //order is valid
+ if ((uint32_t)order < mTableSize)
+ {
+ if (512 > mCharToFreqOrder[order])
+ mFreqChars++;
+ }
+ }
+ }
+
+ //return confidence base on existing data
+ float GetConfidence(void);
+
+ //Reset analyser, clear any state
+ void Reset()
+ {
+ mDone = false;
+ mTotalChars = 0;
+ mFreqChars = 0;
+ mDataThreshold = 0;
+ }
+
+ //It is not necessary to receive all data to draw conclusion. For charset detection,
+ // certain amount of data is enough
+ bool GotEnoughData() {return mTotalChars > ENOUGH_DATA_THRESHOLD;}
+
+protected:
+ //we do not handle character base on its original encoding string, but
+ //convert this encoding string to a number, here called order.
+ //This allow multiple encoding of a language to share one frequency table
+ virtual int32_t GetOrder(const char* str) {return -1;}
+
+ //If this flag is set to true, detection is done and conclusion has been made
+ bool mDone;
+
+ //The number of characters whose frequency order is less than 512
+ uint32_t mFreqChars;
+
+ //Total character encounted.
+ uint32_t mTotalChars;
+
+ //Number of hi-byte characters needed to trigger detection
+ uint32_t mDataThreshold;
+
+ //Mapping table to get frequency order from char order (get from GetOrder())
+ const int16_t *mCharToFreqOrder;
+
+ //Size of above table
+ uint32_t mTableSize;
+
+ //This is a constant value varies from language to language, it is used in
+ //calculating confidence. See my paper for further detail.
+ float mTypicalDistributionRatio;
+};
+
+
+class EUCTWDistributionAnalysis: public CharDistributionAnalysis
+{
+public:
+ EUCTWDistributionAnalysis();
+protected:
+
+ //for euc-TW encoding, we are interested
+ // first byte range: 0xc4 -- 0xfe
+ // second byte range: 0xa1 -- 0xfe
+ //no validation needed here. State machine has done that
+ int32_t GetOrder(const char* str)
+ { if ((unsigned char)*str >= (unsigned char)0xc4)
+ return 94*((unsigned char)str[0]-(unsigned char)0xc4) + (unsigned char)str[1] - (unsigned char)0xa1;
+ else
+ return -1;
+ }
+};
+
+
+class EUCKRDistributionAnalysis : public CharDistributionAnalysis
+{
+public:
+ EUCKRDistributionAnalysis();
+protected:
+ //for euc-KR encoding, we are interested
+ // first byte range: 0xb0 -- 0xfe
+ // second byte range: 0xa1 -- 0xfe
+ //no validation needed here. State machine has done that
+ int32_t GetOrder(const char* str)
+ { if ((unsigned char)*str >= (unsigned char)0xb0)
+ return 94*((unsigned char)str[0]-(unsigned char)0xb0) + (unsigned char)str[1] - (unsigned char)0xa1;
+ else
+ return -1;
+ }
+};
+
+class GB2312DistributionAnalysis : public CharDistributionAnalysis
+{
+public:
+ GB2312DistributionAnalysis();
+protected:
+ //for GB2312 encoding, we are interested
+ // first byte range: 0xb0 -- 0xfe
+ // second byte range: 0xa1 -- 0xfe
+ //no validation needed here. State machine has done that
+ int32_t GetOrder(const char* str)
+ { if ((unsigned char)*str >= (unsigned char)0xb0 && (unsigned char)str[1] >= (unsigned char)0xa1)
+ return 94*((unsigned char)str[0]-(unsigned char)0xb0) + (unsigned char)str[1] - (unsigned char)0xa1;
+ else
+ return -1;
+ }
+};
+
+
+class Big5DistributionAnalysis : public CharDistributionAnalysis
+{
+public:
+ Big5DistributionAnalysis();
+protected:
+ //for big5 encoding, we are interested
+ // first byte range: 0xa4 -- 0xfe
+ // second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
+ //no validation needed here. State machine has done that
+ int32_t GetOrder(const char* str)
+ { if ((unsigned char)*str >= (unsigned char)0xa4)
+ if ((unsigned char)str[1] >= (unsigned char)0xa1)
+ return 157*((unsigned char)str[0]-(unsigned char)0xa4) + (unsigned char)str[1] - (unsigned char)0xa1 +63;
+ else
+ return 157*((unsigned char)str[0]-(unsigned char)0xa4) + (unsigned char)str[1] - (unsigned char)0x40;
+ else
+ return -1;
+ }
+};
+
+class SJISDistributionAnalysis : public CharDistributionAnalysis
+{
+public:
+ SJISDistributionAnalysis();
+protected:
+ //for sjis encoding, we are interested
+ // first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
+ // second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
+ //no validation needed here. State machine has done that
+ int32_t GetOrder(const char* str)
+ {
+ int32_t order;
+ if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f)
+ order = 188 * ((unsigned char)str[0]-(unsigned char)0x81);
+ else if ((unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xef)
+ order = 188 * ((unsigned char)str[0]-(unsigned char)0xe0 + 31);
+ else
+ return -1;
+ order += (unsigned char)*(str+1) - 0x40;
+ if ((unsigned char)str[1] > (unsigned char)0x7f)
+ order--;
+ return order;
+ }
+};
+
+class EUCJPDistributionAnalysis : public CharDistributionAnalysis
+{
+public:
+ EUCJPDistributionAnalysis();
+protected:
+ //for euc-JP encoding, we are interested
+ // first byte range: 0xa0 -- 0xfe
+ // second byte range: 0xa1 -- 0xfe
+ //no validation needed here. State machine has done that
+ int32_t GetOrder(const char* str)
+ { if ((unsigned char)*str >= (unsigned char)0xa0)
+ return 94*((unsigned char)str[0]-(unsigned char)0xa1) + (unsigned char)str[1] - (unsigned char)0xa1;
+ else
+ return -1;
+ }
+};
+
+#endif //CharDistribution_h__
+
diff --git a/extensions/universalchardet/src/base/JISFreq.tab b/extensions/universalchardet/src/base/JISFreq.tab
new file mode 100644
index 000000000..d3815caa4
--- /dev/null
+++ b/extensions/universalchardet/src/base/JISFreq.tab
@@ -0,0 +1,554 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//Sampling from about 20M text materials include literature and computer technology
+
+// Japanese frequency table, applied to both S-JIS and EUC-JP
+//They are sorted in order.
+
+/******************************************************************************
+ * 128 --> 0.77094
+ * 256 --> 0.85710
+ * 512 --> 0.92635
+ * 1024 --> 0.97130
+ * 2048 --> 0.99431
+ *
+ * Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
+ * Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
+ *
+ * Typical Distribution Ratio, 25% of IDR
+ *****************************************************************************/
+
+#define JIS_TYPICAL_DISTRIBUTION_RATIO (float) 3.0
+
+// Char to FreqOrder table
+static const int16_t JISCharToFreqOrder[] =
+{
+ 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, // 16
+3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, // 32
+1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, // 48
+2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, // 64
+2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, // 80
+5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, // 96
+1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, // 112
+5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, // 128
+5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, // 144
+5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, // 160
+5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, // 176
+5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, // 192
+5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, // 208
+1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, // 224
+1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, // 240
+1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, // 256
+2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, // 272
+3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, // 288
+3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, // 304
+ 4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, // 320
+ 12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, // 336
+1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, // 352
+ 109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, // 368
+5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, // 384
+ 271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, // 400
+ 32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, // 416
+ 43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, // 432
+ 280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, // 448
+ 54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, // 464
+5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, // 480
+5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, // 496
+5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, // 512
+4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, // 528
+5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, // 544
+5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, // 560
+5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, // 576
+5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, // 592
+5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, // 608
+5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, // 624
+5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, // 640
+5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, // 656
+5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, // 672
+3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, // 688
+5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, // 704
+5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, // 720
+5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, // 736
+5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, // 752
+5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, // 768
+5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, // 784
+5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, // 800
+5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, // 816
+5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, // 832
+5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, // 848
+5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, // 864
+5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, // 880
+5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, // 896
+5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, // 912
+5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, // 928
+5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, // 944
+5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, // 960
+5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, // 976
+5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, // 992
+5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, // 1008
+5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, // 1024
+5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, // 1040
+5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, // 1056
+5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, // 1072
+5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, // 1088
+5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, // 1104
+5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, // 1120
+5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, // 1136
+5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, // 1152
+5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, // 1168
+5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, // 1184
+5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, // 1200
+5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, // 1216
+5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, // 1232
+5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, // 1248
+5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, // 1264
+5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, // 1280
+5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, // 1296
+6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, // 1312
+6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, // 1328
+6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, // 1344
+6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, // 1360
+6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, // 1376
+6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, // 1392
+6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, // 1408
+6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, // 1424
+4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, // 1440
+ 854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, // 1456
+ 665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, // 1472
+1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, // 1488
+1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, // 1504
+ 896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, // 1520
+3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, // 1536
+3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, // 1552
+ 804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, // 1568
+3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, // 1584
+3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, // 1600
+ 586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, // 1616
+2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, // 1632
+ 277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, // 1648
+3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, // 1664
+1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, // 1680
+ 380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, // 1696
+1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, // 1712
+ 850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, // 1728
+2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, // 1744
+2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, // 1760
+2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, // 1776
+2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, // 1792
+1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, // 1808
+1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, // 1824
+1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, // 1840
+1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, // 1856
+2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, // 1872
+1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, // 1888
+2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, // 1904
+1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, // 1920
+1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, // 1936
+1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, // 1952
+1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, // 1968
+1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, // 1984
+1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, // 2000
+ 606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, // 2016
+ 684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, // 2032
+1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, // 2048
+2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, // 2064
+2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, // 2080
+2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, // 2096
+3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, // 2112
+3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, // 2128
+ 884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, // 2144
+3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, // 2160
+1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, // 2176
+ 861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, // 2192
+2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, // 2208
+1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, // 2224
+ 576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, // 2240
+3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, // 2256
+4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, // 2272
+2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, // 2288
+1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, // 2304
+2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, // 2320
+1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, // 2336
+ 385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, // 2352
+ 178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, // 2368
+1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, // 2384
+2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, // 2400
+2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, // 2416
+2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, // 2432
+3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, // 2448
+1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, // 2464
+2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, // 2480
+ 359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, // 2496
+ 837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, // 2512
+ 855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, // 2528
+1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, // 2544
+2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, // 2560
+ 633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, // 2576
+1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, // 2592
+1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, // 2608
+ 353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, // 2624
+1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, // 2640
+1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, // 2656
+1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, // 2672
+ 764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, // 2688
+2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, // 2704
+ 278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, // 2720
+2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, // 2736
+3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, // 2752
+2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, // 2768
+1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, // 2784
+6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, // 2800
+1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, // 2816
+2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, // 2832
+1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, // 2848
+ 470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, // 2864
+ 72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, // 2880
+3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, // 2896
+3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, // 2912
+1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, // 2928
+1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, // 2944
+1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, // 2960
+1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, // 2976
+ 123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, // 2992
+ 913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, // 3008
+2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, // 3024
+ 900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, // 3040
+3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, // 3056
+2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, // 3072
+ 423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, // 3088
+1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, // 3104
+2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, // 3120
+ 220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, // 3136
+1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, // 3152
+ 745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, // 3168
+4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, // 3184
+2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, // 3200
+1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, // 3216
+ 666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, // 3232
+1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, // 3248
+2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, // 3264
+ 376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, // 3280
+6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, // 3296
+1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, // 3312
+1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, // 3328
+2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, // 3344
+3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, // 3360
+ 914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, // 3376
+3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, // 3392
+1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, // 3408
+ 674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, // 3424
+1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, // 3440
+ 199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, // 3456
+3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, // 3472
+ 370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, // 3488
+2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, // 3504
+ 414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, // 3520
+4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, // 3536
+2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, // 3552
+1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, // 3568
+1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, // 3584
+1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, // 3600
+ 166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, // 3616
+1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, // 3632
+3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, // 3648
+1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, // 3664
+3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, // 3680
+ 264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, // 3696
+ 543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, // 3712
+ 983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, // 3728
+2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, // 3744
+1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, // 3760
+ 867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, // 3776
+1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, // 3792
+ 894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, // 3808
+1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, // 3824
+ 530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, // 3840
+ 839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, // 3856
+ 480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, // 3872
+1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, // 3888
+1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, // 3904
+2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, // 3920
+4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, // 3936
+ 227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, // 3952
+1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, // 3968
+ 328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, // 3984
+1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, // 4000
+3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, // 4016
+1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, // 4032
+2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, // 4048
+2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, // 4064
+1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, // 4080
+1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, // 4096
+2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, // 4112
+ 455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, // 4128
+2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, // 4144
+1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, // 4160
+1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, // 4176
+1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, // 4192
+1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, // 4208
+3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, // 4224
+2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, // 4240
+2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, // 4256
+ 575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, // 4272
+3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, // 4288
+3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, // 4304
+1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, // 4320
+2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, // 4336
+1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, // 4352
+2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, // 4368 //last 512
+
+/***************************************************************************************
+ *Everything below is of no interest for detection purpose *
+ ***************************************************************************************
+
+2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, // 4384
+6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, // 4400
+6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, // 4416
+6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, // 4432
+6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, // 4448
+4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, // 4464
+4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, // 4480
+3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, // 4496
+3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, // 4512
+4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, // 4528
+3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, // 4544
+6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, // 4560
+4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, // 4576
+6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, // 4592
+6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, // 4608
+6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, // 4624
+6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, // 4640
+6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, // 4656
+6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, // 4672
+3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, // 4688
+3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, // 4704
+6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, // 4720
+2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, // 4736
+4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, // 4752
+4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, // 4768
+4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, // 4784
+6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, // 4800
+3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, // 4816
+4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, // 4832
+4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, // 4848
+6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, // 4864
+4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, // 4880
+6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, // 4896
+3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, // 4912
+2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, // 4928
+4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, // 4944
+2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, // 4960
+6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, // 4976
+4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, // 4992
+6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, // 5008
+6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, // 5024
+6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, // 5040
+4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, // 5056
+6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, // 5072
+2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, // 5088
+6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, // 5104
+4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, // 5120
+6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, // 5136
+4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, // 5152
+4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, // 5168
+6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, // 5184
+6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, // 5200
+6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, // 5216
+3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, // 5232
+1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, // 5248
+3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, // 5264
+3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, // 5280
+4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, // 5296
+6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, // 5312
+3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, // 5328
+6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, // 5344
+3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, // 5360
+3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, // 5376
+2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, // 5392
+6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, // 5408
+6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, // 5424
+3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, // 5440
+6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, // 5456
+3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, // 5472
+6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, // 5488
+6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, // 5504
+6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, // 5520
+4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, // 5536
+6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, // 5552
+4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, // 5568
+3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, // 5584
+3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, // 5600
+6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, // 5616
+6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, // 5632
+4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, // 5648
+6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, // 5664
+6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, // 5680
+6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, // 5696
+6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, // 5712
+6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, // 5728
+6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, // 5744
+4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, // 5760
+4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, // 5776
+3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, // 5792
+6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, // 5808
+4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, // 5824
+2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, // 5840
+6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, // 5856
+6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, // 5872
+4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, // 5888
+2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, // 5904
+4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, // 5920
+2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, // 5936
+4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, // 5952
+4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, // 5968
+4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, // 5984
+6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, // 6000
+3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, // 6016
+6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, // 6032
+3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, // 6048
+6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, // 6064
+2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, // 6080
+3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, // 6096
+7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, // 6112
+2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, // 6128
+3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, // 6144
+3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, // 6160
+3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, // 6176
+3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, // 6192
+7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, // 6208
+7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, // 6224
+7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, // 6240
+7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, // 6256
+7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, // 6272
+4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, // 6288
+3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, // 6304
+3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, // 6320
+4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, // 6336
+3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, // 6352
+3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, // 6368
+7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, // 6384
+4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, // 6400
+7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, // 6416
+7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, // 6432
+7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, // 6448
+7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, // 6464
+7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, // 6480
+4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, // 6496
+4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, // 6512
+7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, // 6528
+3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, // 6544
+4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, // 6560
+7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, // 6576
+7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, // 6592
+4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, // 6608
+3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, // 6624
+3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, // 6640
+7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, // 6656
+4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, // 6672
+4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, // 6688
+4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, // 6704
+4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, // 6720
+4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, // 6736
+4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, // 6752
+7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, // 6768
+7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, // 6784
+7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, // 6800
+7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, // 6816
+7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, // 6832
+2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, // 6848
+3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, // 6864
+7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, // 6880
+7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, // 6896
+3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, // 6912
+4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, // 6928
+3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, // 6944
+3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, // 6960
+2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, // 6976
+7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, // 6992
+7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, // 7008
+4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, // 7024
+3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, // 7040
+3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, // 7056
+7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, // 7072
+7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, // 7088
+7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, // 7104
+4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, // 7120
+7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, // 7136
+2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, // 7152
+3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, // 7168
+4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, // 7184
+7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, // 7200
+4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, // 7216
+4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, // 7232
+7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, // 7248
+7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, // 7264
+5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, // 7280
+7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, // 7296
+7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, // 7312
+7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, // 7328
+7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, // 7344
+7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, // 7360
+5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, // 7376
+5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, // 7392
+7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, // 7408
+3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, // 7424
+7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, // 7440
+7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, // 7456
+3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, // 7472
+7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, // 7488
+7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, // 7504
+1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, // 7520
+3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, // 7536
+4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, // 7552
+2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, // 7568
+3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, // 7584
+2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, // 7600
+5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, // 7616
+4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, // 7632
+4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, // 7648
+5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, // 7664
+7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, // 7680
+7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, // 7696
+7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, // 7712
+7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, // 7728
+3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, // 7744
+7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, // 7760
+3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, // 7776
+7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, // 7792
+4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, // 7808
+7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, // 7824
+7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, // 7840
+7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, // 7856
+7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, // 7872
+7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, // 7888
+7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, // 7904
+7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, // 7920
+7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, // 7936
+7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, // 7952
+7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, // 7968
+7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, // 7984
+7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, // 8000
+8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, // 8016
+8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, // 8032
+8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, // 8048
+8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, // 8064
+8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, // 8080
+8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, // 8096
+8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, // 8112
+8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, // 8128
+8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, // 8144
+8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, // 8160
+8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, // 8176
+8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, // 8192
+8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, // 8208
+8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, // 8224
+8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, // 8240
+8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256
+8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272
+****************************************************************************************/
+
+};
+
diff --git a/extensions/universalchardet/src/base/JpCntx.cpp b/extensions/universalchardet/src/base/JpCntx.cpp
new file mode 100644
index 000000000..28ab64336
--- /dev/null
+++ b/extensions/universalchardet/src/base/JpCntx.cpp
@@ -0,0 +1,198 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nscore.h"
+#include "JpCntx.h"
+
+//This is hiragana 2-char sequence table, the number in each cell represents its frequency category
+const uint8_t jp2CharContext[83][83] =
+{
+{ 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,},
+{ 2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4,},
+{ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,},
+{ 0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4,},
+{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
+{ 0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4,},
+{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
+{ 0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3,},
+{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
+{ 0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4,},
+{ 1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4,},
+{ 0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3,},
+{ 0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3,},
+{ 0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3,},
+{ 0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4,},
+{ 0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3,},
+{ 2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4,},
+{ 0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3,},
+{ 0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5,},
+{ 0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3,},
+{ 2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5,},
+{ 0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4,},
+{ 1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4,},
+{ 0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3,},
+{ 0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3,},
+{ 0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3,},
+{ 0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5,},
+{ 0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4,},
+{ 0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5,},
+{ 0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3,},
+{ 0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4,},
+{ 0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4,},
+{ 0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4,},
+{ 0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,},
+{ 0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,},
+{ 1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3,},
+{ 0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0,},
+{ 0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3,},
+{ 0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3,},
+{ 0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5,},
+{ 0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4,},
+{ 2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5,},
+{ 0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3,},
+{ 0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3,},
+{ 0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3,},
+{ 0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3,},
+{ 0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4,},
+{ 0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4,},
+{ 0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2,},
+{ 0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3,},
+{ 0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3,},
+{ 0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3,},
+{ 0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3,},
+{ 0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4,},
+{ 0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3,},
+{ 0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4,},
+{ 0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3,},
+{ 0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3,},
+{ 0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4,},
+{ 0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4,},
+{ 0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3,},
+{ 2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4,},
+{ 0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4,},
+{ 0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3,},
+{ 0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4,},
+{ 0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4,},
+{ 1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4,},
+{ 0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3,},
+{ 0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,},
+{ 0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2,},
+{ 0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3,},
+{ 0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3,},
+{ 0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5,},
+{ 0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3,},
+{ 0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4,},
+{ 1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4,},
+{ 0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4,},
+{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
+{ 0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3,},
+{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1,},
+{ 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2,},
+{ 0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3,},
+{ 0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1,},
+};
+
+#define MINIMUM_DATA_THRESHOLD 4
+
+void JapaneseContextAnalysis::HandleData(const char* aBuf, uint32_t aLen)
+{
+ uint32_t charLen;
+ int32_t order;
+ uint32_t i;
+
+ if (mDone)
+ return;
+
+ //The buffer we got is byte oriented, and a character may span in more than one
+ //buffers. In case the last one or two byte in last buffer is not complete, we
+ //record how many byte needed to complete that character and skip these bytes here.
+ //We can choose to record those bytes as well and analyse the character once it
+ //is complete, but since a character will not make much difference, by simply skipping
+ //this character will simply our logic and improve performance.
+ for (i = mNeedToSkipCharNum; i < aLen; )
+ {
+ order = GetOrder(aBuf+i, &charLen);
+ i+= charLen;
+ if (i > aLen){
+ mNeedToSkipCharNum = i - aLen;
+ mLastCharOrder = -1;
+ }
+ else
+ {
+ if (order != -1 && mLastCharOrder != -1)
+ {
+ mTotalRel ++;
+ if (mTotalRel > MAX_REL_THRESHOLD)
+ {
+ mDone = true;
+ break;
+ }
+ mRelSample[jp2CharContext[mLastCharOrder][order]]++;
+ }
+ mLastCharOrder = order;
+ }
+ }
+
+ return;
+}
+
+void JapaneseContextAnalysis::Reset()
+{
+ mTotalRel = 0;
+ for (uint32_t i = 0; i < NUM_OF_CATEGORY; i++)
+ mRelSample[i] = 0;
+ mNeedToSkipCharNum = 0;
+ mLastCharOrder = -1;
+ mDone = false;
+ mDataThreshold = 0;
+}
+#define DONT_KNOW (float)-1
+
+float JapaneseContextAnalysis::GetConfidence(void)
+{
+ //This is just one way to calculate confidence. It works well for me.
+ if (mTotalRel > mDataThreshold)
+ return ((float)(mTotalRel - mRelSample[0]))/mTotalRel;
+ else
+ return (float)DONT_KNOW;
+}
+
+
+int32_t SJISContextAnalysis::GetOrder(const char* str, uint32_t *charLen)
+{
+ //find out current char's byte length
+ if (((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f) ||
+ ((unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc) )
+ *charLen = 2;
+ else
+ *charLen = 1;
+
+ //return its order if it is hiragana
+ if (*str == '\202' &&
+ (unsigned char)*(str+1) >= (unsigned char)0x9f &&
+ (unsigned char)*(str+1) <= (unsigned char)0xf1)
+ return (unsigned char)*(str+1) - (unsigned char)0x9f;
+ return -1;
+}
+
+int32_t EUCJPContextAnalysis::GetOrder(const char* str, uint32_t *charLen)
+{
+ //find out current char's byte length
+ if ((unsigned char)*str == (unsigned char)0x8e ||
+ ((unsigned char)*str >= (unsigned char)0xa1 &&
+ (unsigned char)*str <= (unsigned char)0xfe))
+ *charLen = 2;
+ else if ((unsigned char)*str == (unsigned char)0x8f)
+ *charLen = 3;
+ else
+ *charLen = 1;
+
+ //return its order if it is hiragana
+ if ((unsigned char)*str == (unsigned char)0xa4 &&
+ (unsigned char)*(str+1) >= (unsigned char)0xa1 &&
+ (unsigned char)*(str+1) <= (unsigned char)0xf3)
+ return (unsigned char)*(str+1) - (unsigned char)0xa1;
+ return -1;
+}
diff --git a/extensions/universalchardet/src/base/JpCntx.h b/extensions/universalchardet/src/base/JpCntx.h
new file mode 100644
index 000000000..88e096432
--- /dev/null
+++ b/extensions/universalchardet/src/base/JpCntx.h
@@ -0,0 +1,107 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __JPCNTX_H__
+#define __JPCNTX_H__
+
+#define NUM_OF_CATEGORY 6
+
+#include "nscore.h"
+
+#define ENOUGH_REL_THRESHOLD 100
+#define MAX_REL_THRESHOLD 1000
+
+//hiragana frequency category table
+extern const uint8_t jp2CharContext[83][83];
+
+class JapaneseContextAnalysis
+{
+public:
+ JapaneseContextAnalysis() {Reset();}
+
+ void HandleData(const char* aBuf, uint32_t aLen);
+
+ void HandleOneChar(const char* aStr, uint32_t aCharLen)
+ {
+ int32_t order;
+
+ //if we received enough data, stop here
+ if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
+ if (mDone) return;
+
+ //Only 2-bytes characters are of our interest
+ order = (aCharLen == 2) ? GetOrder(aStr) : -1;
+ if (order != -1 && mLastCharOrder != -1)
+ {
+ mTotalRel++;
+ //count this sequence to its category counter
+ mRelSample[jp2CharContext[mLastCharOrder][order]]++;
+ }
+ mLastCharOrder = order;
+ }
+
+ float GetConfidence(void);
+ void Reset();
+ bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;}
+
+protected:
+ virtual int32_t GetOrder(const char* str, uint32_t *charLen) = 0;
+ virtual int32_t GetOrder(const char* str) = 0;
+
+ //category counters, each integer counts sequences in its category
+ uint32_t mRelSample[NUM_OF_CATEGORY];
+
+ //total sequence received
+ uint32_t mTotalRel;
+
+ //Number of sequences needed to trigger detection
+ uint32_t mDataThreshold;
+
+ //The order of previous char
+ int32_t mLastCharOrder;
+
+ //if last byte in current buffer is not the last byte of a character, we
+ //need to know how many byte to skip in next buffer.
+ uint32_t mNeedToSkipCharNum;
+
+ //If this flag is set to true, detection is done and conclusion has been made
+ bool mDone;
+};
+
+
+class SJISContextAnalysis : public JapaneseContextAnalysis
+{
+ //SJISContextAnalysis(){};
+protected:
+ int32_t GetOrder(const char* str, uint32_t *charLen);
+
+ int32_t GetOrder(const char* str)
+ {
+ //We only interested in Hiragana, so first byte is '\202'
+ if (*str == '\202' &&
+ (unsigned char)*(str+1) >= (unsigned char)0x9f &&
+ (unsigned char)*(str+1) <= (unsigned char)0xf1)
+ return (unsigned char)*(str+1) - (unsigned char)0x9f;
+ return -1;
+ }
+};
+
+class EUCJPContextAnalysis : public JapaneseContextAnalysis
+{
+protected:
+ int32_t GetOrder(const char* str, uint32_t *charLen);
+ int32_t GetOrder(const char* str)
+ //We only interested in Hiragana, so first byte is '\244'
+ {
+ if (*str == '\244' &&
+ (unsigned char)*(str+1) >= (unsigned char)0xa1 &&
+ (unsigned char)*(str+1) <= (unsigned char)0xf3)
+ return (unsigned char)*(str+1) - (unsigned char)0xa1;
+ return -1;
+ }
+};
+
+#endif /* __JPCNTX_H__ */
+
diff --git a/extensions/universalchardet/src/base/moz.build b/extensions/universalchardet/src/base/moz.build
new file mode 100644
index 000000000..442579d4a
--- /dev/null
+++ b/extensions/universalchardet/src/base/moz.build
@@ -0,0 +1,22 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+ 'CharDistribution.cpp',
+ 'JpCntx.cpp',
+ 'nsCharSetProber.cpp',
+ 'nsEscCharsetProber.cpp',
+ 'nsEscSM.cpp',
+ 'nsEUCJPProber.cpp',
+ 'nsLatin1Prober.cpp',
+ 'nsMBCSGroupProber.cpp',
+ 'nsMBCSSM.cpp',
+ 'nsSJISProber.cpp',
+ 'nsUniversalDetector.cpp',
+ 'nsUTF8Prober.cpp',
+]
+
+FINAL_LIBRARY = 'xul'
diff --git a/extensions/universalchardet/src/base/nsCharSetProber.cpp b/extensions/universalchardet/src/base/nsCharSetProber.cpp
new file mode 100644
index 000000000..cd15a10c3
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsCharSetProber.cpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsCharSetProber.h"
+#include "prmem.h"
+
+//This filter applies to all scripts which do not use English characters
+bool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf, uint32_t aLen, char** newBuf, uint32_t& newLen)
+{
+ char *newptr;
+ char *prevPtr, *curPtr;
+
+ bool meetMSB = false;
+ newptr = *newBuf = (char*)PR_Malloc(aLen);
+ if (!newptr)
+ return false;
+
+ for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
+ {
+ if (*curPtr & 0x80)
+ {
+ meetMSB = true;
+ }
+ else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z')
+ {
+ //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
+ if (meetMSB && curPtr > prevPtr)
+ //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
+ {
+ while (prevPtr < curPtr) *newptr++ = *prevPtr++;
+ prevPtr++;
+ *newptr++ = ' ';
+ meetMSB = false;
+ }
+ else //ignore current segment. (either because it is just a symbol or just an English word)
+ prevPtr = curPtr+1;
+ }
+ }
+ if (meetMSB && curPtr > prevPtr)
+ while (prevPtr < curPtr) *newptr++ = *prevPtr++;
+
+ newLen = newptr - *newBuf;
+
+ return true;
+}
+
+//This filter applies to all scripts which contain both English characters and upper ASCII characters.
+bool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, uint32_t aLen, char** newBuf, uint32_t& newLen)
+{
+ //do filtering to reduce load to probers
+ char *newptr;
+ char *prevPtr, *curPtr;
+ bool isInTag = false;
+
+ newptr = *newBuf = (char*)PR_Malloc(aLen);
+ if (!newptr)
+ return false;
+
+ for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
+ {
+ if (*curPtr == '>')
+ isInTag = false;
+ else if (*curPtr == '<')
+ isInTag = true;
+
+ if (!(*curPtr & 0x80) &&
+ (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
+ {
+ if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol
+ // and it is not inside a tag, keep it.
+ {
+ while (prevPtr < curPtr) *newptr++ = *prevPtr++;
+ prevPtr++;
+ *newptr++ = ' ';
+ }
+ else
+ prevPtr = curPtr+1;
+ }
+ }
+
+ // If the current segment contains more than just a symbol
+ // and it is not inside a tag then keep it.
+ if (!isInTag)
+ while (prevPtr < curPtr)
+ *newptr++ = *prevPtr++;
+
+ newLen = newptr - *newBuf;
+
+ return true;
+}
diff --git a/extensions/universalchardet/src/base/nsCharSetProber.h b/extensions/universalchardet/src/base/nsCharSetProber.h
new file mode 100644
index 000000000..4cb12794f
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsCharSetProber.h
@@ -0,0 +1,42 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsCharSetProber_h__
+#define nsCharSetProber_h__
+
+#include "nscore.h"
+
+//#define DEBUG_chardet // Uncomment this for debug dump.
+
+typedef enum {
+ eDetecting = 0, //We are still detecting, no sure answer yet, but caller can ask for confidence.
+ eFoundIt = 1, //That's a positive answer
+ eNotMe = 2 //Negative answer
+} nsProbingState;
+
+#define SHORTCUT_THRESHOLD (float)0.95
+
+class nsCharSetProber {
+public:
+ virtual ~nsCharSetProber() {}
+ virtual const char* GetCharSetName() = 0;
+ virtual nsProbingState HandleData(const char* aBuf, uint32_t aLen) = 0;
+ virtual nsProbingState GetState(void) = 0;
+ virtual void Reset(void) = 0;
+ virtual float GetConfidence(void) = 0;
+
+#ifdef DEBUG_chardet
+ virtual void DumpStatus() {};
+#endif
+
+ // Helper functions used in the Latin1 and Group probers.
+ // both functions Allocate a new buffer for newBuf. This buffer should be
+ // freed by the caller using PR_FREEIF.
+ // Both functions return false in case of memory allocation failure.
+ static bool FilterWithoutEnglishLetters(const char* aBuf, uint32_t aLen, char** newBuf, uint32_t& newLen);
+ static bool FilterWithEnglishLetters(const char* aBuf, uint32_t aLen, char** newBuf, uint32_t& newLen);
+
+};
+
+#endif /* nsCharSetProber_h__ */
diff --git a/extensions/universalchardet/src/base/nsCodingStateMachine.h b/extensions/universalchardet/src/base/nsCodingStateMachine.h
new file mode 100644
index 000000000..53779959c
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsCodingStateMachine.h
@@ -0,0 +1,85 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsCodingStateMachine_h__
+#define nsCodingStateMachine_h__
+
+#include "mozilla/ArrayUtils.h"
+
+#include "nsPkgInt.h"
+
+typedef enum {
+ eStart = 0,
+ eError = 1,
+ eItsMe = 2
+} nsSMState;
+
+#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
+
+//state machine model
+typedef struct
+{
+ nsPkgInt classTable;
+ uint32_t classFactor;
+ nsPkgInt stateTable;
+ const uint32_t* charLenTable;
+#ifdef DEBUG
+ const size_t charLenTableLength;
+#endif
+ const char* name;
+} SMModel;
+
+class nsCodingStateMachine {
+public:
+ explicit nsCodingStateMachine(const SMModel* sm) : mModel(sm) { mCurrentState = eStart; }
+ nsSMState NextState(char c){
+ //for each byte we get its class , if it is first byte, we also get byte length
+ uint32_t byteCls = GETCLASS(c);
+ if (mCurrentState == eStart)
+ {
+ mCurrentBytePos = 0;
+ MOZ_ASSERT(byteCls < mModel->charLenTableLength);
+ mCurrentCharLen = mModel->charLenTable[byteCls];
+ }
+ //from byte's class and stateTable, we get its next state
+ mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls,
+ mModel->stateTable);
+ mCurrentBytePos++;
+ return mCurrentState;
+ }
+ uint32_t GetCurrentCharLen(void) {return mCurrentCharLen;}
+ void Reset(void) {mCurrentState = eStart;}
+ const char * GetCodingStateMachine() {return mModel->name;}
+
+protected:
+ nsSMState mCurrentState;
+ uint32_t mCurrentCharLen;
+ uint32_t mCurrentBytePos;
+
+ const SMModel *mModel;
+};
+
+extern const SMModel UTF8SMModel;
+extern const SMModel Big5SMModel;
+extern const SMModel EUCJPSMModel;
+extern const SMModel EUCKRSMModel;
+extern const SMModel EUCTWSMModel;
+extern const SMModel GB18030SMModel;
+extern const SMModel SJISSMModel;
+
+
+extern const SMModel HZSMModel;
+extern const SMModel ISO2022CNSMModel;
+extern const SMModel ISO2022JPSMModel;
+extern const SMModel ISO2022KRSMModel;
+
+#undef CHAR_LEN_TABLE
+#ifdef DEBUG
+#define CHAR_LEN_TABLE(x) x, mozilla::ArrayLength(x)
+#else
+#define CHAR_LEN_TABLE(x) x
+#endif
+
+#endif /* nsCodingStateMachine_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsEUCJPProber.cpp b/extensions/universalchardet/src/base/nsEUCJPProber.cpp
new file mode 100644
index 000000000..663421f03
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsEUCJPProber.cpp
@@ -0,0 +1,69 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// for japanese encoding, obeserve characteristic:
+// 1, kana character (or hankaku?) often have hight frequency of appereance
+// 2, kana character often exist in group
+// 3, certain combination of kana is never used in japanese language
+
+#include "nsEUCJPProber.h"
+#include "nsDebug.h"
+
+void nsEUCJPProber::Reset(void)
+{
+ mCodingSM->Reset();
+ mState = eDetecting;
+ mContextAnalyser.Reset();
+ mDistributionAnalyser.Reset();
+}
+
+nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen)
+{
+ NS_ASSERTION(aLen, "HandleData called with empty buffer");
+ nsSMState codingState;
+
+ for (uint32_t i = 0; i < aLen; i++)
+ {
+ codingState = mCodingSM->NextState(aBuf[i]);
+ if (codingState == eItsMe)
+ {
+ mState = eFoundIt;
+ break;
+ }
+ if (codingState == eStart)
+ {
+ uint32_t charLen = mCodingSM->GetCurrentCharLen();
+
+ if (i == 0)
+ {
+ mLastChar[1] = aBuf[0];
+ mContextAnalyser.HandleOneChar(mLastChar, charLen);
+ mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
+ }
+ else
+ {
+ mContextAnalyser.HandleOneChar(aBuf+i-1, charLen);
+ mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
+ }
+ }
+ }
+
+ mLastChar[0] = aBuf[aLen-1];
+
+ if (mState == eDetecting)
+ if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ mState = eFoundIt;
+
+ return mState;
+}
+
+float nsEUCJPProber::GetConfidence(void)
+{
+ float contxtCf = mContextAnalyser.GetConfidence();
+ float distribCf = mDistributionAnalyser.GetConfidence();
+
+ return (contxtCf > distribCf ? contxtCf : distribCf);
+}
+
diff --git a/extensions/universalchardet/src/base/nsEUCJPProber.h b/extensions/universalchardet/src/base/nsEUCJPProber.h
new file mode 100644
index 000000000..4880151e5
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsEUCJPProber.h
@@ -0,0 +1,43 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// for S-JIS encoding, obeserve characteristic:
+// 1, kana character (or hankaku?) often have hight frequency of appereance
+// 2, kana character often exist in group
+// 3, certain combination of kana is never used in japanese language
+
+#ifndef nsEUCJPProber_h__
+#define nsEUCJPProber_h__
+
+#include "nsCharSetProber.h"
+#include "nsCodingStateMachine.h"
+#include "JpCntx.h"
+#include "CharDistribution.h"
+
+class nsEUCJPProber: public nsCharSetProber {
+public:
+ nsEUCJPProber()
+ {mCodingSM = new nsCodingStateMachine(&EUCJPSMModel);
+ Reset();}
+ virtual ~nsEUCJPProber(void){delete mCodingSM;}
+ nsProbingState HandleData(const char* aBuf, uint32_t aLen);
+ const char* GetCharSetName() {return "EUC-JP";}
+ nsProbingState GetState(void) {return mState;}
+ void Reset(void);
+ float GetConfidence(void);
+
+protected:
+ nsCodingStateMachine* mCodingSM;
+ nsProbingState mState;
+
+ EUCJPContextAnalysis mContextAnalyser;
+ EUCJPDistributionAnalysis mDistributionAnalyser;
+
+ char mLastChar[2];
+};
+
+
+#endif /* nsEUCJPProber_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsEscCharsetProber.cpp b/extensions/universalchardet/src/base/nsEscCharsetProber.cpp
new file mode 100644
index 000000000..b4fbfeb00
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsEscCharsetProber.cpp
@@ -0,0 +1,46 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+#include "nsEscCharsetProber.h"
+#include "nsUniversalDetector.h"
+
+nsEscCharSetProber::nsEscCharSetProber()
+{
+ mCodingSM = new nsCodingStateMachine(&ISO2022JPSMModel);
+ mState = eDetecting;
+ mDetectedCharset = nullptr;
+}
+
+nsEscCharSetProber::~nsEscCharSetProber(void)
+{
+}
+
+void nsEscCharSetProber::Reset(void)
+{
+ mState = eDetecting;
+ mCodingSM->Reset();
+ mDetectedCharset = nullptr;
+}
+
+nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, uint32_t aLen)
+{
+ nsSMState codingState;
+ uint32_t i;
+
+ for ( i = 0; i < aLen && mState == eDetecting; i++)
+ {
+ codingState = mCodingSM->NextState(aBuf[i]);
+ if (codingState == eItsMe)
+ {
+ mState = eFoundIt;
+ mDetectedCharset = mCodingSM->GetCodingStateMachine();
+ return mState;
+ }
+ }
+
+ return mState;
+}
+
diff --git a/extensions/universalchardet/src/base/nsEscCharsetProber.h b/extensions/universalchardet/src/base/nsEscCharsetProber.h
new file mode 100644
index 000000000..4507972d0
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsEscCharsetProber.h
@@ -0,0 +1,32 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsEscCharSetProber_h__
+#define nsEscCharSetProber_h__
+
+#include "nsCharSetProber.h"
+#include "nsCodingStateMachine.h"
+#include "nsAutoPtr.h"
+
+class nsEscCharSetProber: public nsCharSetProber {
+public:
+ nsEscCharSetProber();
+ virtual ~nsEscCharSetProber(void);
+ nsProbingState HandleData(const char* aBuf, uint32_t aLen);
+ const char* GetCharSetName() {return mDetectedCharset;}
+ nsProbingState GetState(void) {return mState;}
+ void Reset(void);
+ float GetConfidence(void){return (float)0.99;}
+
+protected:
+ void GetDistribution(uint32_t aCharLen, const char* aStr);
+
+ nsAutoPtr<nsCodingStateMachine> mCodingSM;
+ nsProbingState mState;
+ const char * mDetectedCharset;
+};
+
+#endif /* nsEscCharSetProber_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsEscSM.cpp b/extensions/universalchardet/src/base/nsEscSM.cpp
new file mode 100644
index 000000000..77a223fec
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsEscSM.cpp
@@ -0,0 +1,63 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "nsCodingStateMachine.h"
+
+static const uint32_t ISO2022JP_cls [ 256 / 8 ] = {
+PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
+PCK4BITS(0,0,0,0,0,0,2,2), // 08 - 0f
+PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
+PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
+PCK4BITS(0,0,0,0,7,0,0,0), // 20 - 27
+PCK4BITS(3,0,0,0,0,0,0,0), // 28 - 2f
+PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
+PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
+PCK4BITS(6,0,4,0,8,0,0,0), // 40 - 47
+PCK4BITS(0,9,5,0,0,0,0,0), // 48 - 4f
+PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
+PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
+PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
+PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
+PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
+PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
+PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
+PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
+PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
+PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
+PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
+PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
+PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
+PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
+PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
+PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
+PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
+PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
+PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
+PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
+PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
+PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
+};
+
+
+static const uint32_t ISO2022JP_st [ 9] = {
+PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
+PCK4BITS(eStart,eStart,eError,eError,eError,eError,eError,eError),//08-0f
+PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError),//18-1f
+PCK4BITS(eError, 5,eError,eError,eError, 4,eError,eError),//20-27
+PCK4BITS(eError,eError,eError, 6,eItsMe,eError,eItsMe,eError),//28-2f
+PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//30-37
+PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//38-3f
+PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47
+};
+
+static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+const SMModel ISO2022JPSMModel = {
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
+ 10,
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
+ CHAR_LEN_TABLE(ISO2022JPCharLenTable),
+ "ISO-2022-JP",
+};
diff --git a/extensions/universalchardet/src/base/nsLatin1Prober.cpp b/extensions/universalchardet/src/base/nsLatin1Prober.cpp
new file mode 100644
index 000000000..e2769486e
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsLatin1Prober.cpp
@@ -0,0 +1,149 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsLatin1Prober.h"
+#include "prmem.h"
+#include <stdio.h>
+
+#define UDF 0 // undefined
+#define OTH 1 //other
+#define ASC 2 // ascii capital letter
+#define ASS 3 // ascii small letter
+#define ACV 4 // accent capital vowel
+#define ACO 5 // accent capital other
+#define ASV 6 // accent small vowel
+#define ASO 7 // accent small other
+#define CLASS_NUM 8 // total classes
+
+static const unsigned char Latin1_CharToClass[] =
+{
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F
+ OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57
+ ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F
+ OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77
+ ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F
+ OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87
+ OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F
+ UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97
+ OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF
+ ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7
+ ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF
+ ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7
+ ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF
+ ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7
+ ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF
+ ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7
+ ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
+};
+
+
+/* 0 : illegal
+ 1 : very unlikely
+ 2 : normal
+ 3 : very likely
+*/
+static const unsigned char Latin1ClassModel[] =
+{
+/* UDF OTH ASC ASS ACV ACO ASV ASO */
+/*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
+/*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
+/*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
+/*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
+/*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
+/*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
+/*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
+/*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
+};
+
+void nsLatin1Prober::Reset(void)
+{
+ mState = eDetecting;
+ mLastCharClass = OTH;
+ for (int i = 0; i < FREQ_CAT_NUM; i++)
+ mFreqCounter[i] = 0;
+}
+
+
+nsProbingState nsLatin1Prober::HandleData(const char* aBuf, uint32_t aLen)
+{
+ char *newBuf1 = 0;
+ uint32_t newLen1 = 0;
+
+ if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
+ newBuf1 = (char*)aBuf;
+ newLen1 = aLen;
+ }
+
+ unsigned char charClass;
+ unsigned char freq;
+ for (uint32_t i = 0; i < newLen1; i++)
+ {
+ charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
+ freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass];
+ if (freq == 0) {
+ mState = eNotMe;
+ break;
+ }
+ mFreqCounter[freq]++;
+ mLastCharClass = charClass;
+ }
+
+ if (newBuf1 != aBuf)
+ PR_FREEIF(newBuf1);
+
+ return mState;
+}
+
+float nsLatin1Prober::GetConfidence(void)
+{
+ if (mState == eNotMe)
+ return 0.01f;
+
+ float confidence;
+ uint32_t total = 0;
+ for (int32_t i = 0; i < FREQ_CAT_NUM; i++)
+ total += mFreqCounter[i];
+
+ if(!total)
+ confidence = 0.0f;
+ else
+ {
+ confidence = mFreqCounter[3]*1.0f / total;
+ confidence -= mFreqCounter[1]*20.0f/total;
+ }
+
+ if (confidence < 0.0f)
+ confidence = 0.0f;
+
+ // lower the confidence of latin1 so that other more accurate detector
+ // can take priority.
+ confidence *= 0.50f;
+
+ return confidence;
+}
+
+#ifdef DEBUG_chardet
+void nsLatin1Prober::DumpStatus()
+{
+ printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
+}
+#endif
+
+
diff --git a/extensions/universalchardet/src/base/nsLatin1Prober.h b/extensions/universalchardet/src/base/nsLatin1Prober.h
new file mode 100644
index 000000000..df25d083b
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsLatin1Prober.h
@@ -0,0 +1,36 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsLatin1Prober_h__
+#define nsLatin1Prober_h__
+
+#include "nsCharSetProber.h"
+
+#define FREQ_CAT_NUM 4
+
+class nsLatin1Prober: public nsCharSetProber {
+public:
+ nsLatin1Prober(void){Reset();}
+ virtual ~nsLatin1Prober(void){}
+ nsProbingState HandleData(const char* aBuf, uint32_t aLen);
+ const char* GetCharSetName() {return "windows-1252";}
+ nsProbingState GetState(void) {return mState;}
+ void Reset(void);
+ float GetConfidence(void);
+
+#ifdef DEBUG_chardet
+ virtual void DumpStatus();
+#endif
+
+protected:
+
+ nsProbingState mState;
+ char mLastCharClass;
+ uint32_t mFreqCounter[FREQ_CAT_NUM];
+};
+
+
+#endif /* nsLatin1Prober_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsMBCSGroupProber.cpp b/extensions/universalchardet/src/base/nsMBCSGroupProber.cpp
new file mode 100644
index 000000000..b47691443
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsMBCSGroupProber.cpp
@@ -0,0 +1,177 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include <stdio.h>
+
+#include "nsMBCSGroupProber.h"
+#include "nsUniversalDetector.h"
+
+#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
+const char *ProberName[] =
+{
+ "UTF8",
+ "SJIS",
+ "EUCJP",
+};
+
+#endif
+
+nsMBCSGroupProber::nsMBCSGroupProber()
+{
+ mProbers[0] = new nsUTF8Prober();
+ mProbers[1] = new nsSJISProber();
+ mProbers[2] = new nsEUCJPProber();
+ Reset();
+}
+
+nsMBCSGroupProber::~nsMBCSGroupProber()
+{
+ for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
+ {
+ delete mProbers[i];
+ }
+}
+
+const char* nsMBCSGroupProber::GetCharSetName()
+{
+ if (mBestGuess == -1)
+ {
+ GetConfidence();
+ if (mBestGuess == -1)
+ mBestGuess = 0;
+ }
+ return mProbers[mBestGuess]->GetCharSetName();
+}
+
+void nsMBCSGroupProber::Reset(void)
+{
+ mActiveNum = 0;
+ for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
+ {
+ if (mProbers[i])
+ {
+ mProbers[i]->Reset();
+ mIsActive[i] = true;
+ ++mActiveNum;
+ }
+ else
+ mIsActive[i] = false;
+ }
+ mBestGuess = -1;
+ mState = eDetecting;
+ mKeepNext = 0;
+}
+
+nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen)
+{
+ nsProbingState st;
+ uint32_t start = 0;
+ uint32_t keepNext = mKeepNext;
+
+ //do filtering to reduce load to probers
+ for (uint32_t pos = 0; pos < aLen; ++pos)
+ {
+ if (aBuf[pos] & 0x80)
+ {
+ if (!keepNext)
+ start = pos;
+ keepNext = 2;
+ }
+ else if (keepNext)
+ {
+ if (--keepNext == 0)
+ {
+ for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
+ {
+ if (!mIsActive[i])
+ continue;
+ st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
+ if (st == eFoundIt)
+ {
+ mBestGuess = i;
+ mState = eFoundIt;
+ return mState;
+ }
+ }
+ }
+ }
+ }
+
+ if (keepNext) {
+ for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
+ {
+ if (!mIsActive[i])
+ continue;
+ st = mProbers[i]->HandleData(aBuf + start, aLen - start);
+ if (st == eFoundIt)
+ {
+ mBestGuess = i;
+ mState = eFoundIt;
+ return mState;
+ }
+ }
+ }
+ mKeepNext = keepNext;
+
+ return mState;
+}
+
+float nsMBCSGroupProber::GetConfidence(void)
+{
+ uint32_t i;
+ float bestConf = 0.0, cf;
+
+ switch (mState)
+ {
+ case eFoundIt:
+ return (float)0.99;
+ case eNotMe:
+ return (float)0.01;
+ default:
+ for (i = 0; i < NUM_OF_PROBERS; i++)
+ {
+ if (!mIsActive[i])
+ continue;
+ cf = mProbers[i]->GetConfidence();
+ if (bestConf < cf)
+ {
+ bestConf = cf;
+ mBestGuess = i;
+ }
+ }
+ }
+ return bestConf;
+}
+
+#ifdef DEBUG_chardet
+void nsMBCSGroupProber::DumpStatus()
+{
+ uint32_t i;
+ float cf;
+
+ GetConfidence();
+ for (i = 0; i < NUM_OF_PROBERS; i++)
+ {
+ if (!mIsActive[i])
+ printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
+ else
+ {
+ cf = mProbers[i]->GetConfidence();
+ printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
+ }
+ }
+}
+#endif
+
+#ifdef DEBUG_jgmyers
+void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], uint32_t &offset)
+{
+ for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) {
+ states[offset].name = ProberName[i];
+ states[offset].isActive = mIsActive[i];
+ states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
+ ++offset;
+ }
+}
+#endif /* DEBUG_jgmyers */
diff --git a/extensions/universalchardet/src/base/nsMBCSGroupProber.h b/extensions/universalchardet/src/base/nsMBCSGroupProber.h
new file mode 100644
index 000000000..444b31d09
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsMBCSGroupProber.h
@@ -0,0 +1,42 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsMBCSGroupProber_h__
+#define nsMBCSGroupProber_h__
+
+#include "nsSJISProber.h"
+#include "nsUTF8Prober.h"
+#include "nsEUCJPProber.h"
+
+#define NUM_OF_PROBERS 3
+
+class nsMBCSGroupProber: public nsCharSetProber {
+public:
+ nsMBCSGroupProber();
+ virtual ~nsMBCSGroupProber();
+ nsProbingState HandleData(const char* aBuf, uint32_t aLen);
+ const char* GetCharSetName();
+ nsProbingState GetState(void) {return mState;}
+ void Reset(void);
+ float GetConfidence(void);
+
+#ifdef DEBUG_chardet
+ void DumpStatus();
+#endif
+#ifdef DEBUG_jgmyers
+ void GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], uint32_t &offset);
+#endif
+
+protected:
+ nsProbingState mState;
+ nsCharSetProber* mProbers[NUM_OF_PROBERS];
+ bool mIsActive[NUM_OF_PROBERS];
+ int32_t mBestGuess;
+ uint32_t mActiveNum;
+ uint32_t mKeepNext;
+};
+
+#endif /* nsMBCSGroupProber_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsMBCSSM.cpp b/extensions/universalchardet/src/base/nsMBCSSM.cpp
new file mode 100644
index 000000000..6a1adbf86
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsMBCSSM.cpp
@@ -0,0 +1,188 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "nsCodingStateMachine.h"
+
+/*
+Modification from frank tang's original work:
+. 0x00 is allowed as a legal character. Since some web pages contains this char in
+ text stream.
+*/
+
+static const uint32_t EUCJP_cls [ 256 / 8 ] = {
+//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
+PCK4BITS(4,4,4,4,4,4,4,4), // 00 - 07
+PCK4BITS(4,4,4,4,4,4,5,5), // 08 - 0f
+PCK4BITS(4,4,4,4,4,4,4,4), // 10 - 17
+PCK4BITS(4,4,4,5,4,4,4,4), // 18 - 1f
+PCK4BITS(4,4,4,4,4,4,4,4), // 20 - 27
+PCK4BITS(4,4,4,4,4,4,4,4), // 28 - 2f
+PCK4BITS(4,4,4,4,4,4,4,4), // 30 - 37
+PCK4BITS(4,4,4,4,4,4,4,4), // 38 - 3f
+PCK4BITS(4,4,4,4,4,4,4,4), // 40 - 47
+PCK4BITS(4,4,4,4,4,4,4,4), // 48 - 4f
+PCK4BITS(4,4,4,4,4,4,4,4), // 50 - 57
+PCK4BITS(4,4,4,4,4,4,4,4), // 58 - 5f
+PCK4BITS(4,4,4,4,4,4,4,4), // 60 - 67
+PCK4BITS(4,4,4,4,4,4,4,4), // 68 - 6f
+PCK4BITS(4,4,4,4,4,4,4,4), // 70 - 77
+PCK4BITS(4,4,4,4,4,4,4,4), // 78 - 7f
+PCK4BITS(5,5,5,5,5,5,5,5), // 80 - 87
+PCK4BITS(5,5,5,5,5,5,1,3), // 88 - 8f
+PCK4BITS(5,5,5,5,5,5,5,5), // 90 - 97
+PCK4BITS(5,5,5,5,5,5,5,5), // 98 - 9f
+PCK4BITS(5,2,2,2,2,2,2,2), // a0 - a7
+PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
+PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
+PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
+PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
+PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
+PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
+PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
+PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7
+PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef
+PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7
+PCK4BITS(0,0,0,0,0,0,0,5) // f8 - ff
+};
+
+
+static const uint32_t EUCJP_st [ 5] = {
+PCK4BITS( 3, 4, 3, 5,eStart,eError,eError,eError),//00-07
+PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
+PCK4BITS(eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError),//10-17
+PCK4BITS(eError,eError,eStart,eError,eError,eError, 3,eError),//18-1f
+PCK4BITS( 3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27
+};
+
+static const uint32_t EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
+
+const SMModel EUCJPSMModel = {
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls },
+ 6,
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st },
+ CHAR_LEN_TABLE(EUCJPCharLenTable),
+ "EUC-JP",
+};
+
+// sjis
+
+static const uint32_t SJIS_cls [ 256 / 8 ] = {
+//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
+PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
+PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
+PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
+PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
+PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
+PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
+PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
+PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
+PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
+PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
+PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
+PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
+PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
+PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f
+PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87
+PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f
+PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97
+PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f
+//0xa0 is illegal in sjis encoding, but some pages does
+//contain such byte. We need to be more error forgiven.
+PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
+PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
+PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
+PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
+PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
+PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
+PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
+PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
+PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
+PCK4BITS(3,3,3,3,3,4,4,4), // e8 - ef
+PCK4BITS(4,4,4,4,4,4,4,4), // f0 - f7
+PCK4BITS(4,4,4,4,4,0,0,0) // f8 - ff
+};
+
+
+static const uint32_t SJIS_st [ 3] = {
+PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07
+PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
+PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17
+};
+
+static const uint32_t SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
+
+const SMModel SJISSMModel = {
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls },
+ 6,
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st },
+ CHAR_LEN_TABLE(SJISCharLenTable),
+ "Shift_JIS",
+};
+
+
+static const uint32_t UTF8_cls [ 256 / 8 ] = {
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
+PCK4BITS( 1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
+PCK4BITS( 1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
+PCK4BITS( 1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
+PCK4BITS( 2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
+PCK4BITS( 2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
+PCK4BITS( 3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
+PCK4BITS( 3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
+PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7
+PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // a8 - af
+PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7
+PCK4BITS( 4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf
+PCK4BITS( 0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7
+PCK4BITS( 5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf
+PCK4BITS( 5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7
+PCK4BITS( 5, 5, 5, 5, 5, 5, 5, 5), // d8 - df
+PCK4BITS( 6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7
+PCK4BITS( 7, 7, 7, 7, 7, 8, 7, 7), // e8 - ef
+PCK4BITS( 9,10,10,10,11, 0, 0, 0), // f0 - f7
+PCK4BITS( 0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff
+};
+
+
+static const uint32_t UTF8_st [ 15] = {
+PCK4BITS(eError,eStart,eError,eError,eError, 3, 4, 5), // 00 - 07
+PCK4BITS( 6, 7, 8, 9,eError,eError,eError,eError), // 08 - 0f
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError), // 10 - 17
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe), // 18 - 1f
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart), // 20 - 27
+PCK4BITS(eStart,eError,eError,eError,eError,eError,eError,eError), // 28 - 2f
+PCK4BITS(eError,eError,eError,eError, 3,eError,eError,eError), // 30 - 37
+PCK4BITS(eError,eError,eError,eError,eError,eError, 3, 3), // 38 - 3f
+PCK4BITS( 3,eError,eError,eError,eError,eError,eError,eError), // 40 - 47
+PCK4BITS(eError,eError, 3, 3,eError,eError,eError,eError), // 48 - 4f
+PCK4BITS(eError,eError,eError,eError,eError,eError, 5, 5), // 50 - 57
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError), // 58 - 5f
+PCK4BITS(eError,eError, 5, 5, 5,eError,eError,eError), // 60 - 67
+PCK4BITS(eError,eError,eError,eError,eError,eError, 5,eError), // 68 - 6f
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) // 70 - 77
+};
+
+static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 2, 3, 3, 3, 4, 4, 4};
+
+const SMModel UTF8SMModel = {
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
+ 12,
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
+ CHAR_LEN_TABLE(UTF8CharLenTable),
+ "UTF-8",
+};
diff --git a/extensions/universalchardet/src/base/nsPkgInt.h b/extensions/universalchardet/src/base/nsPkgInt.h
new file mode 100644
index 000000000..488b2389d
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsPkgInt.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsPkgInt_h__
+#define nsPkgInt_h__
+#include "nscore.h"
+
+typedef enum {
+ eIdxSft4bits = 3,
+ eIdxSft8bits = 2,
+ eIdxSft16bits = 1
+} nsIdxSft;
+
+typedef enum {
+ eSftMsk4bits = 7,
+ eSftMsk8bits = 3,
+ eSftMsk16bits = 1
+} nsSftMsk;
+
+typedef enum {
+ eBitSft4bits = 2,
+ eBitSft8bits = 3,
+ eBitSft16bits = 4
+} nsBitSft;
+
+typedef enum {
+ eUnitMsk4bits = 0x0000000FL,
+ eUnitMsk8bits = 0x000000FFL,
+ eUnitMsk16bits = 0x0000FFFFL
+} nsUnitMsk;
+
+typedef struct nsPkgInt {
+ nsIdxSft idxsft;
+ nsSftMsk sftmsk;
+ nsBitSft bitsft;
+ nsUnitMsk unitmsk;
+ const uint32_t* const data;
+} nsPkgInt;
+
+
+#define PCK16BITS(a,b) ((uint32_t)(((b) << 16) | (a)))
+
+#define PCK8BITS(a,b,c,d) PCK16BITS( ((uint32_t)(((b) << 8) | (a))), \
+ ((uint32_t)(((d) << 8) | (c))))
+
+#define PCK4BITS(a,b,c,d,e,f,g,h) PCK8BITS( ((uint32_t)(((b) << 4) | (a))), \
+ ((uint32_t)(((d) << 4) | (c))), \
+ ((uint32_t)(((f) << 4) | (e))), \
+ ((uint32_t)(((h) << 4) | (g))) )
+
+#define GETFROMPCK(i, c) \
+ (((((c).data)[(i)>>(c).idxsft])>>(((i)&(c).sftmsk)<<(c).bitsft))&(c).unitmsk)
+
+#endif /* nsPkgInt_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsSJISProber.cpp b/extensions/universalchardet/src/base/nsSJISProber.cpp
new file mode 100644
index 000000000..73d615bd9
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsSJISProber.cpp
@@ -0,0 +1,68 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// for S-JIS encoding, obeserve characteristic:
+// 1, kana character (or hankaku?) often have hight frequency of appereance
+// 2, kana character often exist in group
+// 3, certain combination of kana is never used in japanese language
+
+#include "nsSJISProber.h"
+#include "nsDebug.h"
+
+void nsSJISProber::Reset(void)
+{
+ mCodingSM->Reset();
+ mState = eDetecting;
+ mContextAnalyser.Reset();
+ mDistributionAnalyser.Reset();
+}
+
+nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen)
+{
+ NS_ASSERTION(aLen, "HandleData called with empty buffer");
+ nsSMState codingState;
+
+ for (uint32_t i = 0; i < aLen; i++)
+ {
+ codingState = mCodingSM->NextState(aBuf[i]);
+ if (codingState == eItsMe)
+ {
+ mState = eFoundIt;
+ break;
+ }
+ if (codingState == eStart)
+ {
+ uint32_t charLen = mCodingSM->GetCurrentCharLen();
+ if (i == 0)
+ {
+ mLastChar[1] = aBuf[0];
+ mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen);
+ mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
+ }
+ else
+ {
+ mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen);
+ mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
+ }
+ }
+ }
+
+ mLastChar[0] = aBuf[aLen-1];
+
+ if (mState == eDetecting)
+ if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ mState = eFoundIt;
+
+ return mState;
+}
+
+float nsSJISProber::GetConfidence(void)
+{
+ float contxtCf = mContextAnalyser.GetConfidence();
+ float distribCf = mDistributionAnalyser.GetConfidence();
+
+ return (contxtCf > distribCf ? contxtCf : distribCf);
+}
+
diff --git a/extensions/universalchardet/src/base/nsSJISProber.h b/extensions/universalchardet/src/base/nsSJISProber.h
new file mode 100644
index 000000000..6044fae87
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsSJISProber.h
@@ -0,0 +1,44 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// for S-JIS encoding, obeserve characteristic:
+// 1, kana character (or hankaku?) often have hight frequency of appereance
+// 2, kana character often exist in group
+// 3, certain combination of kana is never used in japanese language
+
+#ifndef nsSJISProber_h__
+#define nsSJISProber_h__
+
+#include "nsCharSetProber.h"
+#include "nsCodingStateMachine.h"
+#include "JpCntx.h"
+#include "CharDistribution.h"
+
+
+class nsSJISProber: public nsCharSetProber {
+public:
+ nsSJISProber()
+ {mCodingSM = new nsCodingStateMachine(&SJISSMModel);
+ Reset();}
+ virtual ~nsSJISProber(void){delete mCodingSM;}
+ nsProbingState HandleData(const char* aBuf, uint32_t aLen);
+ const char* GetCharSetName() {return "Shift_JIS";}
+ nsProbingState GetState(void) {return mState;}
+ void Reset(void);
+ float GetConfidence(void);
+
+protected:
+ nsCodingStateMachine* mCodingSM;
+ nsProbingState mState;
+
+ SJISContextAnalysis mContextAnalyser;
+ SJISDistributionAnalysis mDistributionAnalyser;
+
+ char mLastChar[2];
+};
+
+
+#endif /* nsSJISProber_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsUTF8Prober.cpp b/extensions/universalchardet/src/base/nsUTF8Prober.cpp
new file mode 100644
index 000000000..1cba4f3cb
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsUTF8Prober.cpp
@@ -0,0 +1,55 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUTF8Prober.h"
+
+void nsUTF8Prober::Reset(void)
+{
+ mCodingSM->Reset();
+ mNumOfMBChar = 0;
+ mState = eDetecting;
+}
+
+nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen)
+{
+ nsSMState codingState;
+
+ for (uint32_t i = 0; i < aLen; i++)
+ {
+ codingState = mCodingSM->NextState(aBuf[i]);
+ if (codingState == eItsMe)
+ {
+ mState = eFoundIt;
+ break;
+ }
+ if (codingState == eStart)
+ {
+ if (mCodingSM->GetCurrentCharLen() >= 2)
+ mNumOfMBChar++;
+ }
+ }
+
+ if (mState == eDetecting)
+ if (GetConfidence() > SHORTCUT_THRESHOLD)
+ mState = eFoundIt;
+ return mState;
+}
+
+#define ONE_CHAR_PROB (float)0.50
+
+float nsUTF8Prober::GetConfidence(void)
+{
+ float unlike = (float)0.99;
+
+ if (mNumOfMBChar < 6)
+ {
+ for (uint32_t i = 0; i < mNumOfMBChar; i++)
+ unlike *= ONE_CHAR_PROB;
+ return (float)1.0 - unlike;
+ }
+ else
+ return (float)0.99;
+}
+
diff --git a/extensions/universalchardet/src/base/nsUTF8Prober.h b/extensions/universalchardet/src/base/nsUTF8Prober.h
new file mode 100644
index 000000000..077d3dd74
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsUTF8Prober.h
@@ -0,0 +1,31 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsUTF8Prober_h__
+#define nsUTF8Prober_h__
+
+#include "nsCharSetProber.h"
+#include "nsCodingStateMachine.h"
+
+class nsUTF8Prober: public nsCharSetProber {
+public:
+ nsUTF8Prober(){mNumOfMBChar = 0;
+ mCodingSM = new nsCodingStateMachine(&UTF8SMModel);
+ Reset(); }
+ virtual ~nsUTF8Prober(){delete mCodingSM;}
+ nsProbingState HandleData(const char* aBuf, uint32_t aLen);
+ const char* GetCharSetName() {return "UTF-8";}
+ nsProbingState GetState(void) {return mState;}
+ void Reset(void);
+ float GetConfidence(void);
+
+protected:
+ nsCodingStateMachine* mCodingSM;
+ nsProbingState mState;
+ uint32_t mNumOfMBChar;
+};
+
+#endif /* nsUTF8Prober_h__ */
+
diff --git a/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/extensions/universalchardet/src/base/nsUniversalDetector.cpp
new file mode 100644
index 000000000..d272827b8
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsUniversalDetector.cpp
@@ -0,0 +1,240 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nscore.h"
+
+#include "nsUniversalDetector.h"
+
+#include "nsMBCSGroupProber.h"
+#include "nsEscCharsetProber.h"
+#include "nsLatin1Prober.h"
+
+nsUniversalDetector::nsUniversalDetector()
+{
+ mDone = false;
+ mBestGuess = -1; //illegal value as signal
+ mInTag = false;
+ mEscCharSetProber = nullptr;
+
+ mStart = true;
+ mDetectedCharset = nullptr;
+ mGotData = false;
+ mInputState = ePureAscii;
+ mLastChar = '\0';
+
+ uint32_t i;
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
+ mCharSetProbers[i] = nullptr;
+}
+
+nsUniversalDetector::~nsUniversalDetector()
+{
+ for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
+ delete mCharSetProbers[i];
+
+ delete mEscCharSetProber;
+}
+
+void
+nsUniversalDetector::Reset()
+{
+ mDone = false;
+ mBestGuess = -1; //illegal value as signal
+ mInTag = false;
+
+ mStart = true;
+ mDetectedCharset = nullptr;
+ mGotData = false;
+ mInputState = ePureAscii;
+ mLastChar = '\0';
+
+ if (mEscCharSetProber)
+ mEscCharSetProber->Reset();
+
+ uint32_t i;
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
+ if (mCharSetProbers[i])
+ mCharSetProbers[i]->Reset();
+}
+
+//---------------------------------------------------------------------
+#define SHORTCUT_THRESHOLD (float)0.95
+#define MINIMUM_THRESHOLD (float)0.20
+
+nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
+{
+ if(mDone)
+ return NS_OK;
+
+ if (aLen > 0)
+ mGotData = true;
+
+ //If the data starts with BOM, we know it is UTF
+ if (mStart)
+ {
+ mStart = false;
+ if (aLen >= 2) {
+ switch (aBuf[0]) {
+ case '\xEF':
+ if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
+ // EF BB BF UTF-8 encoded BOM
+ mDetectedCharset = "UTF-8";
+ }
+ break;
+ case '\xFE':
+ if ('\xFF' == aBuf[1]) {
+ // FE FF UTF-16, big endian BOM
+ mDetectedCharset = "UTF-16BE";
+ }
+ break;
+ case '\xFF':
+ if ('\xFE' == aBuf[1]) {
+ // FF FE UTF-16, little endian BOM
+ mDetectedCharset = "UTF-16LE";
+ }
+ break;
+ } // switch
+ }
+
+ if (mDetectedCharset)
+ {
+ mDone = true;
+ return NS_OK;
+ }
+ }
+
+ uint32_t i;
+ for (i = 0; i < aLen; i++)
+ {
+ //other than 0xa0, if every othe character is ascii, the page is ascii
+ if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP
+ {
+ //we got a non-ascii byte (high-byte)
+ if (mInputState != eHighbyte)
+ {
+ //adjust state
+ mInputState = eHighbyte;
+
+ //kill mEscCharSetProber if it is active
+ if (mEscCharSetProber) {
+ delete mEscCharSetProber;
+ mEscCharSetProber = nullptr;
+ }
+
+ //start multibyte and singlebyte charset prober
+ if (nullptr == mCharSetProbers[0])
+ {
+ mCharSetProbers[0] = new nsMBCSGroupProber();
+ if (nullptr == mCharSetProbers[0])
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ if (nullptr == mCharSetProbers[2])
+ {
+ mCharSetProbers[2] = new nsLatin1Prober;
+ if (nullptr == mCharSetProbers[2])
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+ }
+ else
+ {
+ //ok, just pure ascii so far
+ if ((ePureAscii == mInputState) && (aBuf[i] == '\033'))
+ {
+ //found escape character
+ mInputState = eEscAscii;
+ }
+ mLastChar = aBuf[i];
+ }
+ }
+
+ nsProbingState st;
+ switch (mInputState)
+ {
+ case eEscAscii:
+ if (nullptr == mEscCharSetProber) {
+ mEscCharSetProber = new nsEscCharSetProber();
+ if (nullptr == mEscCharSetProber)
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ st = mEscCharSetProber->HandleData(aBuf, aLen);
+ if (st == eFoundIt)
+ {
+ mDone = true;
+ mDetectedCharset = mEscCharSetProber->GetCharSetName();
+ }
+ break;
+ case eHighbyte:
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
+ {
+ if (mCharSetProbers[i])
+ {
+ st = mCharSetProbers[i]->HandleData(aBuf, aLen);
+ if (st == eFoundIt)
+ {
+ mDone = true;
+ mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
+ return NS_OK;
+ }
+ }
+ }
+ break;
+
+ default: //pure ascii
+ ;//do nothing here
+ }
+ return NS_OK;
+}
+
+
+//---------------------------------------------------------------------
+void nsUniversalDetector::DataEnd()
+{
+ if (!mGotData)
+ {
+ // we haven't got any data yet, return immediately
+ // caller program sometimes call DataEnd before anything has been sent to detector
+ return;
+ }
+
+ if (mDetectedCharset)
+ {
+ mDone = true;
+ Report(mDetectedCharset);
+ return;
+ }
+
+ switch (mInputState)
+ {
+ case eHighbyte:
+ {
+ float proberConfidence;
+ float maxProberConfidence = (float)0.0;
+ int32_t maxProber = 0;
+
+ for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
+ {
+ if (mCharSetProbers[i])
+ {
+ proberConfidence = mCharSetProbers[i]->GetConfidence();
+ if (proberConfidence > maxProberConfidence)
+ {
+ maxProberConfidence = proberConfidence;
+ maxProber = i;
+ }
+ }
+ }
+ //do not report anything because we are not confident of it, that's in fact a negative answer
+ if (maxProberConfidence > MINIMUM_THRESHOLD)
+ Report(mCharSetProbers[maxProber]->GetCharSetName());
+ }
+ break;
+ case eEscAscii:
+ break;
+ default:
+ ;
+ }
+ return;
+}
diff --git a/extensions/universalchardet/src/base/nsUniversalDetector.h b/extensions/universalchardet/src/base/nsUniversalDetector.h
new file mode 100644
index 000000000..345e74f9f
--- /dev/null
+++ b/extensions/universalchardet/src/base/nsUniversalDetector.h
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsUniversalDetector_h__
+#define nsUniversalDetector_h__
+
+class nsCharSetProber;
+
+#define NUM_OF_CHARSET_PROBERS 3
+
+typedef enum {
+ ePureAscii = 0,
+ eEscAscii = 1,
+ eHighbyte = 2
+} nsInputState;
+
+class nsUniversalDetector {
+public:
+ nsUniversalDetector();
+ virtual ~nsUniversalDetector();
+ virtual nsresult HandleData(const char* aBuf, uint32_t aLen);
+ virtual void DataEnd(void);
+
+protected:
+ virtual void Report(const char* aCharset) = 0;
+ virtual void Reset();
+ nsInputState mInputState;
+ bool mDone;
+ bool mInTag;
+ bool mStart;
+ bool mGotData;
+ char mLastChar;
+ const char * mDetectedCharset;
+ int32_t mBestGuess;
+ uint32_t mLanguageFilter;
+
+ nsCharSetProber *mCharSetProbers[NUM_OF_CHARSET_PROBERS];
+ nsCharSetProber *mEscCharSetProber;
+};
+
+#endif
+
diff --git a/extensions/universalchardet/src/moz.build b/extensions/universalchardet/src/moz.build
new file mode 100644
index 000000000..db5b3ff62
--- /dev/null
+++ b/extensions/universalchardet/src/moz.build
@@ -0,0 +1,8 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIRS += ['base', 'xpcom']
+
diff --git a/extensions/universalchardet/src/xpcom/moz.build b/extensions/universalchardet/src/xpcom/moz.build
new file mode 100644
index 000000000..5a24a5771
--- /dev/null
+++ b/extensions/universalchardet/src/xpcom/moz.build
@@ -0,0 +1,16 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+ 'nsUdetXPCOMWrapper.cpp',
+ 'nsUniversalCharDetModule.cpp',
+]
+
+FINAL_LIBRARY = 'xul'
+
+LOCAL_INCLUDES += [
+ '../base',
+]
diff --git a/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.cpp b/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.cpp
new file mode 100644
index 000000000..750b1fd07
--- /dev/null
+++ b/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.cpp
@@ -0,0 +1,130 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nscore.h"
+
+#include "nsUniversalDetector.h"
+#include "nsUdetXPCOMWrapper.h"
+#include "nsCharSetProber.h" // for DumpStatus
+
+#include "nsUniversalCharDetDll.h"
+//---- for XPCOM
+#include "nsIFactory.h"
+#include "nsISupports.h"
+#include "nsCOMPtr.h"
+
+//---------------------------------------------------------------------
+nsXPCOMDetector:: nsXPCOMDetector()
+ : nsUniversalDetector()
+{
+}
+//---------------------------------------------------------------------
+nsXPCOMDetector::~nsXPCOMDetector()
+{
+}
+//---------------------------------------------------------------------
+
+NS_IMPL_ISUPPORTS(nsXPCOMDetector, nsICharsetDetector)
+
+//---------------------------------------------------------------------
+NS_IMETHODIMP nsXPCOMDetector::Init(
+ nsICharsetDetectionObserver* aObserver)
+{
+ NS_ASSERTION(mObserver == nullptr , "Init twice");
+ if(nullptr == aObserver)
+ return NS_ERROR_ILLEGAL_VALUE;
+
+ mObserver = aObserver;
+ return NS_OK;
+}
+//----------------------------------------------------------
+NS_IMETHODIMP nsXPCOMDetector::DoIt(const char* aBuf,
+ uint32_t aLen, bool* oDontFeedMe)
+{
+ NS_ASSERTION(mObserver != nullptr , "have not init yet");
+
+ if((nullptr == aBuf) || (nullptr == oDontFeedMe))
+ return NS_ERROR_ILLEGAL_VALUE;
+
+ this->Reset();
+ nsresult rv = this->HandleData(aBuf, aLen);
+ if (NS_FAILED(rv))
+ return rv;
+
+ if (mDone)
+ {
+ if (mDetectedCharset)
+ Report(mDetectedCharset);
+
+ *oDontFeedMe = true;
+ }
+ *oDontFeedMe = false;
+ return NS_OK;
+}
+//----------------------------------------------------------
+NS_IMETHODIMP nsXPCOMDetector::Done()
+{
+ NS_ASSERTION(mObserver != nullptr , "have not init yet");
+#ifdef DEBUG_chardet
+ for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
+ {
+ // If no data was received the array might stay filled with nulls
+ // the way it was initialized in the constructor.
+ if (mCharSetProbers[i])
+ mCharSetProbers[i]->DumpStatus();
+ }
+#endif
+
+ this->DataEnd();
+ return NS_OK;
+}
+//----------------------------------------------------------
+void nsXPCOMDetector::Report(const char* aCharset)
+{
+ NS_ASSERTION(mObserver != nullptr , "have not init yet");
+#ifdef DEBUG_chardet
+ printf("Universal Charset Detector report charset %s . \r\n", aCharset);
+#endif
+ mObserver->Notify(aCharset, eBestAnswer);
+}
+
+
+//---------------------------------------------------------------------
+nsXPCOMStringDetector:: nsXPCOMStringDetector()
+ : nsUniversalDetector()
+{
+}
+//---------------------------------------------------------------------
+nsXPCOMStringDetector::~nsXPCOMStringDetector()
+{
+}
+//---------------------------------------------------------------------
+NS_IMPL_ISUPPORTS(nsXPCOMStringDetector, nsIStringCharsetDetector)
+//---------------------------------------------------------------------
+void nsXPCOMStringDetector::Report(const char *aCharset)
+{
+ mResult = aCharset;
+#ifdef DEBUG_chardet
+ printf("New Charset Prober report charset %s . \r\n", aCharset);
+#endif
+}
+//---------------------------------------------------------------------
+NS_IMETHODIMP nsXPCOMStringDetector::DoIt(const char* aBuf,
+ uint32_t aLen, const char** oCharset,
+ nsDetectionConfident &oConf)
+{
+ mResult = nullptr;
+ this->Reset();
+ nsresult rv = this->HandleData(aBuf, aLen);
+ if (NS_FAILED(rv))
+ return rv;
+ this->DataEnd();
+ if (mResult)
+ {
+ *oCharset=mResult;
+ oConf = eBestAnswer;
+ }
+ return NS_OK;
+}
diff --git a/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.h b/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.h
new file mode 100644
index 000000000..11a0bd10a
--- /dev/null
+++ b/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.h
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _nsUdetXPCOMWrapper_h__
+#define _nsUdetXPCOMWrapper_h__
+#include "nsISupports.h"
+#include "nsICharsetDetector.h"
+#include "nsIStringCharsetDetector.h"
+#include "nsICharsetDetectionObserver.h"
+#include "nsCOMPtr.h"
+
+#include "nsIFactory.h"
+
+// {12BB8F1B-2389-11d3-B3BF-00805F8A6670}
+#define NS_JA_PSMDETECTOR_CID \
+{ 0x12bb8f1b, 0x2389, 0x11d3, { 0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
+
+// {12BB8F1C-2389-11d3-B3BF-00805F8A6670}
+#define NS_JA_STRING_PSMDETECTOR_CID \
+{ 0x12bb8f1c, 0x2389, 0x11d3, { 0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
+
+//=====================================================================
+class nsXPCOMDetector :
+ public nsUniversalDetector,
+ public nsICharsetDetector
+{
+ NS_DECL_ISUPPORTS
+ public:
+ nsXPCOMDetector();
+ NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver) override;
+ NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool *oDontFeedMe) override;
+ NS_IMETHOD Done() override;
+ protected:
+ virtual ~nsXPCOMDetector();
+ virtual void Report(const char* aCharset) override;
+ private:
+ nsCOMPtr<nsICharsetDetectionObserver> mObserver;
+};
+
+
+//=====================================================================
+class nsXPCOMStringDetector :
+ public nsUniversalDetector,
+ public nsIStringCharsetDetector
+{
+ NS_DECL_ISUPPORTS
+ public:
+ nsXPCOMStringDetector();
+ NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen,
+ const char** oCharset, nsDetectionConfident &oConf) override;
+ protected:
+ virtual ~nsXPCOMStringDetector();
+ virtual void Report(const char* aCharset) override;
+ private:
+ nsCOMPtr<nsICharsetDetectionObserver> mObserver;
+ const char* mResult;
+};
+
+//=====================================================================
+
+class nsJAPSMDetector : public nsXPCOMDetector
+{
+public:
+ nsJAPSMDetector()
+ : nsXPCOMDetector() {}
+};
+
+class nsJAStringPSMDetector : public nsXPCOMStringDetector
+{
+public:
+ nsJAStringPSMDetector()
+ : nsXPCOMStringDetector() {}
+};
+
+#endif //_nsUdetXPCOMWrapper_h__
diff --git a/extensions/universalchardet/src/xpcom/nsUniversalCharDetDll.h b/extensions/universalchardet/src/xpcom/nsUniversalCharDetDll.h
new file mode 100644
index 000000000..404cb4518
--- /dev/null
+++ b/extensions/universalchardet/src/xpcom/nsUniversalCharDetDll.h
@@ -0,0 +1,11 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsCharDetDll_h__
+#define nsCharDetDll_h__
+
+#include "prtypes.h"
+
+#endif /* nsCharDetDll_h__ */
diff --git a/extensions/universalchardet/src/xpcom/nsUniversalCharDetModule.cpp b/extensions/universalchardet/src/xpcom/nsUniversalCharDetModule.cpp
new file mode 100644
index 000000000..38e2e60bd
--- /dev/null
+++ b/extensions/universalchardet/src/xpcom/nsUniversalCharDetModule.cpp
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ModuleUtils.h"
+
+#include "nsCOMPtr.h"
+
+#include "nspr.h"
+#include "nsString.h"
+#include "nsUniversalCharDetDll.h"
+#include "nsISupports.h"
+#include "nsICategoryManager.h"
+#include "nsIComponentManager.h"
+#include "nsIServiceManager.h"
+#include "nsICharsetDetector.h"
+#include "nsIStringCharsetDetector.h"
+
+#include "nsUniversalDetector.h"
+#include "nsUdetXPCOMWrapper.h"
+
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsJAPSMDetector)
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsJAStringPSMDetector)
+NS_DEFINE_NAMED_CID(NS_JA_PSMDETECTOR_CID);
+NS_DEFINE_NAMED_CID(NS_JA_STRING_PSMDETECTOR_CID);
+
+static const mozilla::Module::CIDEntry kChardetCIDs[] = {
+ { &kNS_JA_PSMDETECTOR_CID, false, nullptr, nsJAPSMDetectorConstructor },
+ { &kNS_JA_STRING_PSMDETECTOR_CID, false, nullptr, nsJAStringPSMDetectorConstructor },
+ { nullptr }
+};
+
+static const mozilla::Module::ContractIDEntry kChardetContracts[] = {
+ { NS_CHARSET_DETECTOR_CONTRACTID_BASE "ja_parallel_state_machine", &kNS_JA_PSMDETECTOR_CID },
+ { NS_STRCDETECTOR_CONTRACTID_BASE "ja_parallel_state_machine", &kNS_JA_STRING_PSMDETECTOR_CID },
+ { nullptr }
+};
+
+static const mozilla::Module::CategoryEntry kChardetCategories[] = {
+ { NS_CHARSET_DETECTOR_CATEGORY, "ja_parallel_state_machine", NS_CHARSET_DETECTOR_CONTRACTID_BASE "ja_parallel_state_machine" },
+ { nullptr }
+};
+
+static const mozilla::Module kChardetModule = {
+ mozilla::Module::kVersion,
+ kChardetCIDs,
+ kChardetContracts,
+ kChardetCategories
+};
+
+NSMODULE_DEFN(nsUniversalCharDetModule) = &kChardetModule;