summaryrefslogtreecommitdiffstats
path: root/browser/components/translation/cld2/post.js
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/translation/cld2/post.js')
-rw-r--r--browser/components/translation/cld2/post.js171
1 files changed, 171 insertions, 0 deletions
diff --git a/browser/components/translation/cld2/post.js b/browser/components/translation/cld2/post.js
new file mode 100644
index 000000000..a3e8b8522
--- /dev/null
+++ b/browser/components/translation/cld2/post.js
@@ -0,0 +1,171 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The WebIDL binder places static methods on the prototype, rather than
+// on the constructor, which is a bit clumsy, and is definitely not
+// idiomatic.
+LanguageInfo.detectLanguage = LanguageInfo.prototype.detectLanguage;
+
+// Closure is overzealous in its function call optimization, and tries
+// to turn these singleton methods into unbound function calls.
+ensureCache.alloc = ensureCache.alloc.bind(ensureCache);
+ensureCache.prepare = ensureCache.prepare.bind(ensureCache);
+
+// From public/encodings.h. Unfortunately, the WebIDL binder doesn't
+// allow us to define or automatically derive these in the IDL.
+var Encodings = {
+ 'ISO_8859_1' : 0,
+ 'ISO_8859_2' : 1,
+ 'ISO_8859_3' : 2,
+ 'ISO_8859_4' : 3,
+ 'ISO_8859_5' : 4,
+ 'ISO_8859_6' : 5,
+ 'ISO_8859_7' : 6,
+ 'ISO_8859_8' : 7,
+ 'ISO_8859_9' : 8,
+ 'ISO_8859_10' : 9,
+ 'JAPANESE_EUC_JP' : 10,
+ 'EUC_JP' : 10,
+ 'JAPANESE_SHIFT_JIS' : 11,
+ 'SHIFT_JIS' : 11,
+ 'JAPANESE_JIS' : 12,
+ 'JIS' : 12,
+ 'CHINESE_BIG5' : 13,
+ 'BIG5' : 13,
+ 'CHINESE_GB' : 14,
+ 'CHINESE_EUC_CN' : 15,
+ 'EUC_CN' : 15,
+ 'KOREAN_EUC_KR' : 16,
+ 'EUC_KR' : 16,
+ 'UNICODE_UNUSED' : 17,
+ 'CHINESE_EUC_DEC' : 18,
+ 'EUC_DEC' : 18,
+ 'CHINESE_CNS' : 19,
+ 'CNS' : 19,
+ 'CHINESE_BIG5_CP950' : 20,
+ 'BIG5_CP950' : 20,
+ 'JAPANESE_CP932' : 21,
+ 'CP932' : 21,
+ 'UTF8' : 22,
+ 'UNKNOWN_ENCODING' : 23,
+ 'ASCII_7BIT' : 24,
+ 'RUSSIAN_KOI8_R' : 25,
+ 'KOI8_R' : 25,
+ 'RUSSIAN_CP1251' : 26,
+ 'CP1251' : 26,
+ 'MSFT_CP1252' : 27,
+ 'CP1252' : 27,
+ 'RUSSIAN_KOI8_RU' : 28,
+ 'KOI8_RU' : 28,
+ 'MSFT_CP1250' : 29,
+ 'CP1250' : 29,
+ 'ISO_8859_15' : 30,
+ 'MSFT_CP1254' : 31,
+ 'CP1254' : 31,
+ 'MSFT_CP1257' : 32,
+ 'CP1257' : 32,
+ 'ISO_8859_11' : 33,
+ 'MSFT_CP874' : 34,
+ 'CP874' : 34,
+ 'MSFT_CP1256' : 35,
+ 'CP1256' : 35,
+ 'MSFT_CP1255' : 36,
+ 'CP1255' : 36,
+ 'ISO_8859_8_I' : 37,
+ 'HEBREW_VISUAL' : 38,
+ 'CZECH_CP852' : 39,
+ 'CP852' : 39,
+ 'CZECH_CSN_369103' : 40,
+ 'CSN_369103' : 40,
+ 'MSFT_CP1253' : 41,
+ 'CP1253' : 41,
+ 'RUSSIAN_CP866' : 42,
+ 'CP866' : 42,
+ 'ISO_8859_13' : 43,
+ 'ISO_2022_KR' : 44,
+ 'GBK' : 45,
+ 'GB18030' : 46,
+ 'BIG5_HKSCS' : 47,
+ 'ISO_2022_CN' : 48,
+ 'TSCII' : 49,
+ 'TAMIL_MONO' : 50,
+ 'TAMIL_BI' : 51,
+ 'JAGRAN' : 52,
+ 'MACINTOSH_ROMAN' : 53,
+ 'UTF7' : 54,
+ 'BHASKAR' : 55,
+ 'HTCHANAKYA' : 56,
+ 'UTF16BE' : 57,
+ 'UTF16LE' : 58,
+ 'UTF32BE' : 59,
+ 'UTF32LE' : 60,
+ 'BINARYENC' : 61,
+ 'HZ_GB_2312' : 62,
+ 'UTF8UTF8' : 63,
+ 'TAM_ELANGO' : 64,
+ 'TAM_LTTMBARANI' : 65,
+ 'TAM_SHREE' : 66,
+ 'TAM_TBOOMIS' : 67,
+ 'TAM_TMNEWS' : 68,
+ 'TAM_WEBTAMIL' : 69,
+ 'KDDI_SHIFT_JIS' : 70,
+ 'DOCOMO_SHIFT_JIS' : 71,
+ 'SOFTBANK_SHIFT_JIS' : 72,
+ 'KDDI_ISO_2022_JP' : 73,
+ 'ISO_2022_JP' : 73,
+ 'SOFTBANK_ISO_2022_JP' : 74,
+};
+
+// Accept forms both with and without underscores/hypens.
+for (let code of Object.keys(Encodings)) {
+ if (code['includes']("_"))
+ Encodings[code.replace(/_/g, "")] = Encodings[code];
+}
+
+addOnPreMain(function() {
+
+ onmessage = function(aMsg) {
+ let data = aMsg['data'];
+
+ let langInfo;
+ if (data['tld'] == undefined && data['encoding'] == undefined && data['language'] == undefined) {
+ langInfo = LanguageInfo.detectLanguage(data['text'], !data['isHTML']);
+ } else {
+ // Do our best to find the given encoding in the encodings table.
+ // Otherwise, just fall back to unknown.
+ let enc = String(data['encoding']).toUpperCase().replace(/[_-]/g, "");
+
+ let encoding;
+ if (Encodings.hasOwnProperty(enc))
+ encoding = Encodings[enc];
+ else
+ encoding = Encodings['UNKNOWN_ENCODING'];
+
+ langInfo = LanguageInfo.detectLanguage(data['text'], !data['isHTML'],
+ data['tld'] || null,
+ encoding,
+ data['language'] || null);
+ }
+
+ postMessage({
+ 'language': langInfo.getLanguageCode(),
+ 'confident': langInfo.getIsReliable(),
+
+ 'languages': new Array(3).fill(0).map((_, index) => {
+ let lang = langInfo.get_languages(index);
+ return {
+ 'languageCode': lang.getLanguageCode(),
+ 'percent': lang.getPercent(),
+ };
+ }).filter(lang => {
+ // Ignore empty results.
+ return lang['languageCode'] != "un" || lang['percent'] > 0;
+ }),
+ });
+
+ Module.destroy(langInfo);
+ };
+
+ postMessage("ready");
+});