1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsSemanticUnitScanner.h"
NS_IMPL_ISUPPORTS_INHERITED(nsSemanticUnitScanner, nsSampleWordBreaker, nsISemanticUnitScanner)
nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
{
/* member initializers and constructor code */
}
nsSemanticUnitScanner::~nsSemanticUnitScanner()
{
/* destructor code */
}
NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
{
// do nothing for now.
return NS_OK;
}
NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval)
{
// xxx need to bullet proff and check input pointer
// make sure begin, end and _retval is not nullptr here
// if we reach the end, just return
if (pos >= length) {
*begin = pos;
*end = pos;
*_retval = false;
return NS_OK;
}
uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]);
// if we are in chinese mode, return one han letter at a time
// we should not do this if we are in Japanese or Korean mode
if (kWbClassHanLetter == char_class) {
*begin = pos;
*end = pos+1;
*_retval = true;
return NS_OK;
}
int32_t next;
// find the next "word"
next = NextWord(text, (uint32_t) length, (uint32_t) pos);
// if we don't have enough text to make decision, return
if (next == NS_WORDBREAKER_NEED_MORE_TEXT) {
*begin = pos;
*end = isLastBuffer ? length : pos;
*_retval = isLastBuffer;
return NS_OK;
}
// if what we got is space or punct, look at the next break
if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
// if the next "word" is not letters,
// call itself recursively with the new pos
return Next(text, length, next, isLastBuffer, begin, end, _retval);
}
// for the rest, return
*begin = pos;
*end = next;
*_retval = true;
return NS_OK;
}
|