summaryrefslogtreecommitdiffstats
path: root/browser/components/translation/cld2/internal/compact_lang_det.cc
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/translation/cld2/internal/compact_lang_det.cc')
-rw-r--r--browser/components/translation/cld2/internal/compact_lang_det.cc322
1 files changed, 0 insertions, 322 deletions
diff --git a/browser/components/translation/cld2/internal/compact_lang_det.cc b/browser/components/translation/cld2/internal/compact_lang_det.cc
deleted file mode 100644
index ccc6588fc..000000000
--- a/browser/components/translation/cld2/internal/compact_lang_det.cc
+++ /dev/null
@@ -1,322 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//
-// Author: dsites@google.com (Dick Sites)
-//
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "../public/compact_lang_det.h"
-#include "../public/encodings.h"
-#include "compact_lang_det_impl.h"
-#include "integral_types.h"
-#include "lang_script.h"
-
-namespace CLD2 {
-
-// String is "code_version - data_scrape_date"
-//static const char* kDetectLanguageVersion = "V2.0 - 20130715";
-
-
-// Large-table version for all ~160 languages
-// Small-table version for all ~60 languages
-
-// Scan interchange-valid UTF-8 bytes and detect most likely language
-Language DetectLanguage(
- const char* buffer,
- int buffer_length,
- bool is_plain_text,
- bool* is_reliable) {
- bool allow_extended_lang = false;
- Language language3[3];
- int percent3[3];
- double normalized_score3[3];
- int text_bytes;
- int flags = 0;
- Language plus_one = UNKNOWN_LANGUAGE;
- const char* tld_hint = "";
- int encoding_hint = UNKNOWN_ENCODING;
- Language language_hint = UNKNOWN_LANGUAGE;
- CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint};
-
- Language lang = DetectLanguageSummaryV2(
- buffer,
- buffer_length,
- is_plain_text,
- &cldhints,
- allow_extended_lang,
- flags,
- plus_one,
- language3,
- percent3,
- normalized_score3,
- NULL,
- &text_bytes,
- is_reliable);
- // Default to English
- if (lang == UNKNOWN_LANGUAGE) {
- lang = ENGLISH;
- }
- return lang;
-}
-
-// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
-Language DetectLanguageSummary(
- const char* buffer,
- int buffer_length,
- bool is_plain_text,
- Language* language3,
- int* percent3,
- int* text_bytes,
- bool* is_reliable) {
- double normalized_score3[3];
- bool allow_extended_lang = false;
- int flags = 0;
- Language plus_one = UNKNOWN_LANGUAGE;
- const char* tld_hint = "";
- int encoding_hint = UNKNOWN_ENCODING;
- Language language_hint = UNKNOWN_LANGUAGE;
- CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint};
-
- Language lang = DetectLanguageSummaryV2(
- buffer,
- buffer_length,
- is_plain_text,
- &cldhints,
- allow_extended_lang,
- flags,
- plus_one,
- language3,
- percent3,
- normalized_score3,
- NULL,
- text_bytes,
- is_reliable);
- // Default to English
- if (lang == UNKNOWN_LANGUAGE) {
- lang = ENGLISH;
- }
- return lang;
-}
-
-// Same as above, with hints supplied
-// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
-Language DetectLanguageSummary(
- const char* buffer,
- int buffer_length,
- bool is_plain_text,
- const char* tld_hint, // "id" boosts Indonesian
- int encoding_hint, // SJS boosts Japanese
- Language language_hint, // ITALIAN boosts it
- Language* language3,
- int* percent3,
- int* text_bytes,
- bool* is_reliable) {
- double normalized_score3[3];
- bool allow_extended_lang = false;
- int flags = 0;
- Language plus_one = UNKNOWN_LANGUAGE;
- CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint};
-
- Language lang = DetectLanguageSummaryV2(
- buffer,
- buffer_length,
- is_plain_text,
- &cldhints,
- allow_extended_lang,
- flags,
- plus_one,
- language3,
- percent3,
- normalized_score3,
- NULL,
- text_bytes,
- is_reliable);
- // Default to English
- if (lang == UNKNOWN_LANGUAGE) {
- lang = ENGLISH;
- }
- return lang;
-}
-
-
-// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
-// languages.
-// Extended languages are additional Google interface languages and Unicode
-// single-language scripts, from ext_lang_enc.h
-Language ExtDetectLanguageSummary(
- const char* buffer,
- int buffer_length,
- bool is_plain_text,
- Language* language3,
- int* percent3,
- int* text_bytes,
- bool* is_reliable) {
- double normalized_score3[3];
- bool allow_extended_lang = true;
- int flags = 0;
- Language plus_one = UNKNOWN_LANGUAGE;
- const char* tld_hint = "";
- int encoding_hint = UNKNOWN_ENCODING;
- Language language_hint = UNKNOWN_LANGUAGE;
- CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint};
-
- Language lang = DetectLanguageSummaryV2(
- buffer,
- buffer_length,
- is_plain_text,
- &cldhints,
- allow_extended_lang,
- flags,
- plus_one,
- language3,
- percent3,
- normalized_score3,
- NULL,
- text_bytes,
- is_reliable);
- // Do not default to English
- return lang;
-}
-
-// Same as above, with hints supplied
-// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
-// languages.
-// Extended languages are additional Google interface languages and Unicode
-// single-language scripts, from ext_lang_enc.h
-Language ExtDetectLanguageSummary(
- const char* buffer,
- int buffer_length,
- bool is_plain_text,
- const char* tld_hint, // "id" boosts Indonesian
- int encoding_hint, // SJS boosts Japanese
- Language language_hint, // ITALIAN boosts it
- Language* language3,
- int* percent3,
- int* text_bytes,
- bool* is_reliable) {
- double normalized_score3[3];
- bool allow_extended_lang = true;
- int flags = 0;
- Language plus_one = UNKNOWN_LANGUAGE;
- CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint};
-
- Language lang = DetectLanguageSummaryV2(
- buffer,
- buffer_length,
- is_plain_text,
- &cldhints,
- allow_extended_lang,
- flags,
- plus_one,
- language3,
- percent3,
- normalized_score3,
- NULL,
- text_bytes,
- is_reliable);
- // Do not default to English
- return lang;
-}
-
-// Same as above, and also returns internal language scores as a ratio to
-// normal score for real text in that language. Scores close to 1.0 indicate
-// normal text, while scores far away from 1.0 indicate badly-skewed text or
-// gibberish
-//
-Language ExtDetectLanguageSummary(
- const char* buffer,
- int buffer_length,
- bool is_plain_text,
- const char* tld_hint, // "id" boosts Indonesian
- int encoding_hint, // SJS boosts Japanese
- Language language_hint, // ITALIAN boosts it
- Language* language3,
- int* percent3,
- double* normalized_score3,
- int* text_bytes,
- bool* is_reliable) {
- bool allow_extended_lang = true;
- int flags = 0;
- Language plus_one = UNKNOWN_LANGUAGE;
- CLDHints cldhints = {NULL, tld_hint, encoding_hint, language_hint};
-
- Language lang = DetectLanguageSummaryV2(
- buffer,
- buffer_length,
- is_plain_text,
- &cldhints,
- allow_extended_lang,
- flags,
- plus_one,
- language3,
- percent3,
- normalized_score3,
- NULL,
- text_bytes,
- is_reliable);
- // Do not default to English
- return lang;
-}
-
-// Use this one.
-// Hints are collected into a struct.
-// Flags are passed in (normally zero).
-//
-// Also returns 3 internal language scores as a ratio to
-// normal score for real text in that language. Scores close to 1.0 indicate
-// normal text, while scores far away from 1.0 indicate badly-skewed text or
-// gibberish
-//
-// Returns a vector of chunks in different languages, so that caller may
-// spell-check, translate, or otherwaise process different parts of the input
-// buffer in language-dependant ways.
-//
-Language ExtDetectLanguageSummary(
- const char* buffer,
- int buffer_length,
- bool is_plain_text,
- const CLDHints* cld_hints,
- int flags,
- Language* language3,
- int* percent3,
- double* normalized_score3,
- ResultChunkVector* resultchunkvector,
- int* text_bytes,
- bool* is_reliable) {
- bool allow_extended_lang = true;
- Language plus_one = UNKNOWN_LANGUAGE;
-
- Language lang = DetectLanguageSummaryV2(
- buffer,
- buffer_length,
- is_plain_text,
- cld_hints,
- allow_extended_lang,
- flags,
- plus_one,
- language3,
- percent3,
- normalized_score3,
- resultchunkvector,
- text_bytes,
- is_reliable);
- // Do not default to English
- return lang;
-}
-
-} // End namespace CLD2
-