summaryrefslogtreecommitdiffstats
path: root/application/basilisk/components/translation/cld2/cldapp.cc
blob: 4750cc54bb1ae3138d0937094dbc869070ecd374 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "public/compact_lang_det.h"

#define MAX_RESULTS 3

class Language {
public:
  Language(CLD2::Language lang) : mLang(lang) {}

  const char* getLanguageCode() const
  {
    return CLD2::LanguageCode(mLang);
  }

private:
  const CLD2::Language mLang;
};

class LanguageGuess : public Language {
public:
  LanguageGuess(CLD2::Language lang, char percent) :
    Language(lang), mPercent(percent) {}

  char getPercent() const
  {
    return mPercent;
  }

private:
  const char mPercent;
};


class LanguageInfo : public Language {
public:
  static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText)
  {
    CLD2::Language languages[MAX_RESULTS] = {};
    int percentages[MAX_RESULTS] = {};
    bool isReliable = false;

    // This is ignored.
    int textBytes;

    CLD2::Language bestGuess = DetectLanguageSummary(
      buffer, strlen(buffer), isPlainText,
      languages, percentages, &textBytes,
      &isReliable);

    return new LanguageInfo(isReliable, bestGuess, languages, percentages);
  }

  static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText,
                                      const char* tldHint, int encodingHint,
                                      const char* languageHint)
  {
    CLD2::CLDHints hints = {languageHint, tldHint, encodingHint, CLD2::UNKNOWN_LANGUAGE};

    CLD2::Language languages[MAX_RESULTS] = {};
    int percentages[MAX_RESULTS] = {};
    bool isReliable = false;

    // These are ignored.
    double scores[MAX_RESULTS];
    int textBytes;

    CLD2::Language bestGuess = ExtDetectLanguageSummary(
      buffer, strlen(buffer), isPlainText,
      &hints, 0,
      languages, percentages, scores,
      nullptr, &textBytes, &isReliable);

    return new LanguageInfo(isReliable, bestGuess, languages, percentages);
  }

  ~LanguageInfo()
  {
    for (int i = 0; i < MAX_RESULTS; i++) {
      delete languages[i];
    }
  }

  bool getIsReliable() const
  {
    return mIsReliable;
  }

  const LanguageGuess* languages[MAX_RESULTS];

private:
  LanguageInfo(bool isReliable, CLD2::Language bestGuess,
               CLD2::Language languageIDs[MAX_RESULTS],
               int percentages[MAX_RESULTS]) :
    Language(bestGuess), mIsReliable(isReliable)
  {
    for (int i = 0; i < MAX_RESULTS; i++) {
      languages[i] = new LanguageGuess(languageIDs[i], percentages[i]);
    }
  }

  const bool mIsReliable;
};

#include "cld.cpp"