summaryrefslogtreecommitdiffstats
path: root/application/basilisk/components/translation/cld2/internal/cldutil.h
blob: 9712b30a96afccc90fdc766130d492035c4afac7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//
// Author: dsites@google.com (Dick Sites)
//
// Stuff used only by online detector, not used offline
//

#ifndef I18N_ENCODINGS_CLD2_INTERNAL_NEW_CLDUTIL_H__
#define I18N_ENCODINGS_CLD2_INTERNAL_NEW_CLDUTIL_H__

#include "cldutil_shared.h"
#include "scoreonescriptspan.h"
#include "tote.h"

namespace CLD2 {

// Score up to 64KB of a single script span in one pass
// Make a dummy entry off the end to calc length of last span
// Return offset of first unused input byte
int GetUniHits(const char* text,
                     int letter_offset, int letter_limit,
                     ScoringContext* scoringcontext,
                     ScoringHitBuffer* hitbuffer);

// Score up to 64KB of a single script span, doing both delta-bi and
// distinct bis in one pass
void GetBiHits(const char* text,
                     int letter_offset, int letter_limit,
                     ScoringContext* scoringcontext,
                     ScoringHitBuffer* hitbuffer);

// Score up to 64KB of a single script span in one pass
// Make a dummy entry off the end to calc length of last span
// Return offset of first unused input byte
int GetQuadHits(const char* text,
                     int letter_offset, int letter_limit,
                     ScoringContext* scoringcontext,
                     ScoringHitBuffer* hitbuffer);

// Score up to 64KB of a single script span, doing both delta-octa and
// distinct words in one pass
void GetOctaHits(const char* text,
                     int letter_offset, int letter_limit,
                     ScoringContext* scoringcontext,
                     ScoringHitBuffer* hitbuffer);

// Not sure if these belong here or in scoreonescriptspan.cc
int ReliabilityDelta(int value1, int value2, int gramcount);
int ReliabilityExpected(int actual_score_1kb, int expected_score_1kb);

// Create a langprob packed value from its parts.
uint32 MakeLangProb(Language lang, int qprob);


void ProcessProbV2Tote(uint32 probs, Tote* tote);

// Return score for a particular per-script language, or zero
int GetLangScore(uint32 probs, uint8 pslang);

static inline int minint(int a, int b) {return (a < b) ? a: b;}
static inline int maxint(int a, int b) {return (a > b) ? a: b;}

}       // End namespace CLD2

#endif  // I18N_ENCODINGS_CLD2_INTERNAL_NEW_CLDUTIL_H__