/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ // The WebIDL binder places static methods on the prototype, rather than // on the constructor, which is a bit clumsy, and is definitely not // idiomatic. LanguageInfo.detectLanguage = LanguageInfo.prototype.detectLanguage; // Closure is overzealous in its function call optimization, and tries // to turn these singleton methods into unbound function calls. ensureCache.alloc = ensureCache.alloc.bind(ensureCache); ensureCache.prepare = ensureCache.prepare.bind(ensureCache); // From public/encodings.h. Unfortunately, the WebIDL binder doesn't // allow us to define or automatically derive these in the IDL. var Encodings = { 'ISO_8859_1' : 0, 'ISO_8859_2' : 1, 'ISO_8859_3' : 2, 'ISO_8859_4' : 3, 'ISO_8859_5' : 4, 'ISO_8859_6' : 5, 'ISO_8859_7' : 6, 'ISO_8859_8' : 7, 'ISO_8859_9' : 8, 'ISO_8859_10' : 9, 'JAPANESE_EUC_JP' : 10, 'EUC_JP' : 10, 'JAPANESE_SHIFT_JIS' : 11, 'SHIFT_JIS' : 11, 'JAPANESE_JIS' : 12, 'JIS' : 12, 'CHINESE_BIG5' : 13, 'BIG5' : 13, 'CHINESE_GB' : 14, 'CHINESE_EUC_CN' : 15, 'EUC_CN' : 15, 'KOREAN_EUC_KR' : 16, 'EUC_KR' : 16, 'UNICODE_UNUSED' : 17, 'CHINESE_EUC_DEC' : 18, 'EUC_DEC' : 18, 'CHINESE_CNS' : 19, 'CNS' : 19, 'CHINESE_BIG5_CP950' : 20, 'BIG5_CP950' : 20, 'JAPANESE_CP932' : 21, 'CP932' : 21, 'UTF8' : 22, 'UNKNOWN_ENCODING' : 23, 'ASCII_7BIT' : 24, 'RUSSIAN_KOI8_R' : 25, 'KOI8_R' : 25, 'RUSSIAN_CP1251' : 26, 'CP1251' : 26, 'MSFT_CP1252' : 27, 'CP1252' : 27, 'RUSSIAN_KOI8_RU' : 28, 'KOI8_RU' : 28, 'MSFT_CP1250' : 29, 'CP1250' : 29, 'ISO_8859_15' : 30, 'MSFT_CP1254' : 31, 'CP1254' : 31, 'MSFT_CP1257' : 32, 'CP1257' : 32, 'ISO_8859_11' : 33, 'MSFT_CP874' : 34, 'CP874' : 34, 'MSFT_CP1256' : 35, 'CP1256' : 35, 'MSFT_CP1255' : 36, 'CP1255' : 36, 'ISO_8859_8_I' : 37, 'HEBREW_VISUAL' : 38, 'CZECH_CP852' : 39, 'CP852' : 39, 'CZECH_CSN_369103' : 40, 'CSN_369103' : 40, 'MSFT_CP1253' : 41, 'CP1253' : 41, 'RUSSIAN_CP866' : 42, 'CP866' : 42, 'ISO_8859_13' : 43, 'ISO_2022_KR' : 44, 'GBK' : 45, 'GB18030' : 46, 'BIG5_HKSCS' : 47, 'ISO_2022_CN' : 48, 'TSCII' : 49, 'TAMIL_MONO' : 50, 'TAMIL_BI' : 51, 'JAGRAN' : 52, 'MACINTOSH_ROMAN' : 53, 'UTF7' : 54, 'BHASKAR' : 55, 'HTCHANAKYA' : 56, 'UTF16BE' : 57, 'UTF16LE' : 58, 'UTF32BE' : 59, 'UTF32LE' : 60, 'BINARYENC' : 61, 'HZ_GB_2312' : 62, 'UTF8UTF8' : 63, 'TAM_ELANGO' : 64, 'TAM_LTTMBARANI' : 65, 'TAM_SHREE' : 66, 'TAM_TBOOMIS' : 67, 'TAM_TMNEWS' : 68, 'TAM_WEBTAMIL' : 69, 'KDDI_SHIFT_JIS' : 70, 'DOCOMO_SHIFT_JIS' : 71, 'SOFTBANK_SHIFT_JIS' : 72, 'KDDI_ISO_2022_JP' : 73, 'ISO_2022_JP' : 73, 'SOFTBANK_ISO_2022_JP' : 74, }; // Accept forms both with and without underscores/hypens. for (let code of Object.keys(Encodings)) { if (code['includes']("_")) Encodings[code.replace(/_/g, "")] = Encodings[code]; } addOnPreMain(function() { onmessage = function(aMsg) { let data = aMsg['data']; let langInfo; if (data['tld'] == undefined && data['encoding'] == undefined && data['language'] == undefined) { langInfo = LanguageInfo.detectLanguage(data['text'], !data['isHTML']); } else { // Do our best to find the given encoding in the encodings table. // Otherwise, just fall back to unknown. let enc = String(data['encoding']).toUpperCase().replace(/[_-]/g, ""); let encoding; if (Encodings.hasOwnProperty(enc)) encoding = Encodings[enc]; else encoding = Encodings['UNKNOWN_ENCODING']; langInfo = LanguageInfo.detectLanguage(data['text'], !data['isHTML'], data['tld'] || null, encoding, data['language'] || null); } postMessage({ 'language': langInfo.getLanguageCode(), 'confident': langInfo.getIsReliable(), 'languages': new Array(3).fill(0).map((_, index) => { let lang = langInfo.get_languages(index); return { 'languageCode': lang.getLanguageCode(), 'percent': lang.getPercent(), }; }).filter(lang => { // Ignore empty results. return lang['languageCode'] != "un" || lang['percent'] > 0; }), }); Module.destroy(langInfo); }; postMessage("ready"); });