1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupports.idl"
%{C++
// {ADF42751-1CEF-4ad2-AA8E-BCB849D8D31F}
#define NS_SEMANTICUNITSCANNER_CID { 0xadf42751, 0x1cef, 0x4ad2, { 0xaa, 0x8e, 0xbc, 0xb8, 0x49, 0xd8, 0xd3, 0x1f}}
#define NS_SEMANTICUNITSCANNER_CONTRACTID "@mozilla.org/intl/semanticunitscanner;1"
%}
/**
* Provides a language independent way to break UNICODE
* text into meaningful semantic units (e.g. words).
*/
[scriptable, uuid(9f620be4-e535-11d6-b254-00039310a47a)]
interface nsISemanticUnitScanner : nsISupports {
/**
* start()
*
* Starts up the semantic unit scanner with an optional
* character set, which acts as a hint to optimize the heuristics
* used to determine the language(s) of the processed text.
*
* @param characterSet the character set the text was originally
* encoded in (can be NULL)
*/
void start(in string characterSet);
/**
* next()
* Get the begin / end offset of the next unit in the current text
*
* @param text the text to be scanned
* @param length the number of characters in the text to be processed
* @param pos the current position
* @param isLastBuffer, the buffer is the last one
* @param begin the begin offset of the next unit
* @param begin the end offset of the next unit
* @return has more unit in the current text
*/
boolean next(in wstring text, in long length, in long pos,
in boolean isLastBuffer,
out long begin, out long end );
};
|