diff options
Diffstat (limited to 'browser/components/translation/cld2/internal/compact_lang_det.cc')
-rw-r--r-- | browser/components/translation/cld2/internal/compact_lang_det.cc | 322 |
1 files changed, 0 insertions, 322 deletions
diff --git a/browser/components/translation/cld2/internal/compact_lang_det.cc b/browser/components/translation/cld2/internal/compact_lang_det.cc deleted file mode 100644 index ccc6588fc..000000000 --- a/browser/components/translation/cld2/internal/compact_lang_det.cc +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// -// Author: dsites@google.com (Dick Sites) -// - -#include <stdio.h> -#include <stdlib.h> - -#include "../public/compact_lang_det.h" -#include "../public/encodings.h" -#include "compact_lang_det_impl.h" -#include "integral_types.h" -#include "lang_script.h" - -namespace CLD2 { - -// String is "code_version - data_scrape_date" -//static const char* kDetectLanguageVersion = "V2.0 - 20130715"; - - -// Large-table version for all ~160 languages -// Small-table version for all ~60 languages - -// Scan interchange-valid UTF-8 bytes and detect most likely language -Language DetectLanguage( - const char* buffer, - int buffer_length, - bool is_plain_text, - bool* is_reliable) { - bool allow_extended_lang = false; - Language language3[3]; - int percent3[3]; - double normalized_score3[3]; - int text_bytes; - int flags = 0; - Language plus_one = UNKNOWN_LANGUAGE; - const char* tld_hint = ""; - int encoding_hint = UNKNOWN_ENCODING; - Language language_hint = UNKNOWN_LANGUAGE; - CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint}; - - Language lang = DetectLanguageSummaryV2( - buffer, - buffer_length, - is_plain_text, - &cldhints, - allow_extended_lang, - flags, - plus_one, - language3, - percent3, - normalized_score3, - NULL, - &text_bytes, - is_reliable); - // Default to English - if (lang == UNKNOWN_LANGUAGE) { - lang = ENGLISH; - } - return lang; -} - -// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages. -Language DetectLanguageSummary( - const char* buffer, - int buffer_length, - bool is_plain_text, - Language* language3, - int* percent3, - int* text_bytes, - bool* is_reliable) { - double normalized_score3[3]; - bool allow_extended_lang = false; - int flags = 0; - Language plus_one = UNKNOWN_LANGUAGE; - const char* tld_hint = ""; - int encoding_hint = UNKNOWN_ENCODING; - Language language_hint = UNKNOWN_LANGUAGE; - CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint}; - - Language lang = DetectLanguageSummaryV2( - buffer, - buffer_length, - is_plain_text, - &cldhints, - allow_extended_lang, - flags, - plus_one, - language3, - percent3, - normalized_score3, - NULL, - text_bytes, - is_reliable); - // Default to English - if (lang == UNKNOWN_LANGUAGE) { - lang = ENGLISH; - } - return lang; -} - -// Same as above, with hints supplied -// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages. -Language DetectLanguageSummary( - const char* buffer, - int buffer_length, - bool is_plain_text, - const char* tld_hint, // "id" boosts Indonesian - int encoding_hint, // SJS boosts Japanese - Language language_hint, // ITALIAN boosts it - Language* language3, - int* percent3, - int* text_bytes, - bool* is_reliable) { - double normalized_score3[3]; - bool allow_extended_lang = false; - int flags = 0; - Language plus_one = UNKNOWN_LANGUAGE; - CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint}; - - Language lang = DetectLanguageSummaryV2( - buffer, - buffer_length, - is_plain_text, - &cldhints, - allow_extended_lang, - flags, - plus_one, - language3, - percent3, - normalized_score3, - NULL, - text_bytes, - is_reliable); - // Default to English - if (lang == UNKNOWN_LANGUAGE) { - lang = ENGLISH; - } - return lang; -} - - -// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended -// languages. -// Extended languages are additional Google interface languages and Unicode -// single-language scripts, from ext_lang_enc.h -Language ExtDetectLanguageSummary( - const char* buffer, - int buffer_length, - bool is_plain_text, - Language* language3, - int* percent3, - int* text_bytes, - bool* is_reliable) { - double normalized_score3[3]; - bool allow_extended_lang = true; - int flags = 0; - Language plus_one = UNKNOWN_LANGUAGE; - const char* tld_hint = ""; - int encoding_hint = UNKNOWN_ENCODING; - Language language_hint = UNKNOWN_LANGUAGE; - CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint}; - - Language lang = DetectLanguageSummaryV2( - buffer, - buffer_length, - is_plain_text, - &cldhints, - allow_extended_lang, - flags, - plus_one, - language3, - percent3, - normalized_score3, - NULL, - text_bytes, - is_reliable); - // Do not default to English - return lang; -} - -// Same as above, with hints supplied -// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended -// languages. -// Extended languages are additional Google interface languages and Unicode -// single-language scripts, from ext_lang_enc.h -Language ExtDetectLanguageSummary( - const char* buffer, - int buffer_length, - bool is_plain_text, - const char* tld_hint, // "id" boosts Indonesian - int encoding_hint, // SJS boosts Japanese - Language language_hint, // ITALIAN boosts it - Language* language3, - int* percent3, - int* text_bytes, - bool* is_reliable) { - double normalized_score3[3]; - bool allow_extended_lang = true; - int flags = 0; - Language plus_one = UNKNOWN_LANGUAGE; - CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint}; - - Language lang = DetectLanguageSummaryV2( - buffer, - buffer_length, - is_plain_text, - &cldhints, - allow_extended_lang, - flags, - plus_one, - language3, - percent3, - normalized_score3, - NULL, - text_bytes, - is_reliable); - // Do not default to English - return lang; -} - -// Same as above, and also returns internal language scores as a ratio to -// normal score for real text in that language. Scores close to 1.0 indicate -// normal text, while scores far away from 1.0 indicate badly-skewed text or -// gibberish -// -Language ExtDetectLanguageSummary( - const char* buffer, - int buffer_length, - bool is_plain_text, - const char* tld_hint, // "id" boosts Indonesian - int encoding_hint, // SJS boosts Japanese - Language language_hint, // ITALIAN boosts it - Language* language3, - int* percent3, - double* normalized_score3, - int* text_bytes, - bool* is_reliable) { - bool allow_extended_lang = true; - int flags = 0; - Language plus_one = UNKNOWN_LANGUAGE; - CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint}; - - Language lang = DetectLanguageSummaryV2( - buffer, - buffer_length, - is_plain_text, - &cldhints, - allow_extended_lang, - flags, - plus_one, - language3, - percent3, - normalized_score3, - NULL, - text_bytes, - is_reliable); - // Do not default to English - return lang; -} - -// Use this one. -// Hints are collected into a struct. -// Flags are passed in (normally zero). -// -// Also returns 3 internal language scores as a ratio to -// normal score for real text in that language. Scores close to 1.0 indicate -// normal text, while scores far away from 1.0 indicate badly-skewed text or -// gibberish -// -// Returns a vector of chunks in different languages, so that caller may -// spell-check, translate, or otherwaise process different parts of the input -// buffer in language-dependant ways. -// -Language ExtDetectLanguageSummary( - const char* buffer, - int buffer_length, - bool is_plain_text, - const CLDHints* cld_hints, - int flags, - Language* language3, - int* percent3, - double* normalized_score3, - ResultChunkVector* resultchunkvector, - int* text_bytes, - bool* is_reliable) { - bool allow_extended_lang = true; - Language plus_one = UNKNOWN_LANGUAGE; - - Language lang = DetectLanguageSummaryV2( - buffer, - buffer_length, - is_plain_text, - cld_hints, - allow_extended_lang, - flags, - plus_one, - language3, - percent3, - normalized_score3, - resultchunkvector, - text_bytes, - is_reliable); - // Do not default to English - return lang; -} - -} // End namespace CLD2 - |