summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/rbt_set.h
blob: 573c079e65febb571f3f4c71c240ec48042ad2d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1999-2007, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#ifndef RBT_SET_H
#define RBT_SET_H

#include "unicode/utypes.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uobject.h"
#include "unicode/utrans.h"
#include "uvector.h"

U_NAMESPACE_BEGIN

class Replaceable;
class TransliterationRule;
class TransliterationRuleData;
class UnicodeFilter;
class UnicodeString;
class UnicodeSet;

/**
 * A set of rules for a <code>RuleBasedTransliterator</code>.
 * @author Alan Liu
 */
class TransliterationRuleSet : public UMemory {
    /**
     * Vector of rules, in the order added.  This is used while the
     * rule set is getting built.  After that, freeze() reorders and
     * indexes the rules into rules[].  Any given rule is stored once
     * in ruleVector, and one or more times in rules[].  ruleVector
     * owns and deletes the rules.
     */
    UVector* ruleVector;

    /**
     * Sorted and indexed table of rules.  This is created by freeze()
     * from the rules in ruleVector.  It contains alias pointers to
     * the rules in ruleVector.  It is zero before freeze() is called
     * and non-zero thereafter.
     */
    TransliterationRule** rules;

    /**
     * Index table.  For text having a first character c, compute x = c&0xFF.
     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
     * freeze().  Before freeze() is called it contains garbage.
     */
    int32_t index[257];

    /**
     * Length of the longest preceding context
     */
    int32_t maxContextLength;

public:

    /**
     * Construct a new empty rule set.
     * @param status    Output parameter filled in with success or failure status.
     */
    TransliterationRuleSet(UErrorCode& status);

    /**
     * Copy constructor.
     */
    TransliterationRuleSet(const TransliterationRuleSet&);

    /**
     * Destructor.
     */
    virtual ~TransliterationRuleSet();

    /**
     * Change the data object that this rule belongs to.  Used
     * internally by the TransliterationRuleData copy constructor.
     * @param data    the new data value to be set.
     */
    void setData(const TransliterationRuleData* data);

    /**
     * Return the maximum context length.
     * @return the length of the longest preceding context.
     */
    virtual int32_t getMaximumContextLength(void) const;

    /**
     * Add a rule to this set.  Rules are added in order, and order is
     * significant.  The last call to this method must be followed by
     * a call to <code>freeze()</code> before the rule set is used.
     * This method must <em>not</em> be called after freeze() has been
     * called.
     *
     * @param adoptedRule the rule to add
     */
    virtual void addRule(TransliterationRule* adoptedRule,
                         UErrorCode& status);

    /**
     * Check this for masked rules and index it to optimize performance.
     * The sequence of operations is: (1) add rules to a set using
     * <code>addRule()</code>; (2) freeze the set using
     * <code>freeze()</code>; (3) use the rule set.  If
     * <code>addRule()</code> is called after calling this method, it
     * invalidates this object, and this method must be called again.
     * That is, <code>freeze()</code> may be called multiple times,
     * although for optimal performance it shouldn't be.
     * @param parseError A pointer to UParseError to receive information about errors
     *                   occurred.
     * @param status     Output parameter filled in with success or failure status.
     */
    virtual void freeze(UParseError& parseError, UErrorCode& status);
    
    /**
     * Transliterate the given text with the given UTransPosition
     * indices.  Return TRUE if the transliteration should continue
     * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
     * Note that FALSE is only ever returned if isIncremental is TRUE.
     * @param text the text to be transliterated
     * @param index the position indices, which will be updated
     * @param isIncremental if TRUE, assume new text may be inserted
     * at index.limit, and return FALSE if thre is a partial match.
     * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
     * indicating that transliteration should stop until more text
     * arrives.
     */
    UBool transliterate(Replaceable& text,
                        UTransPosition& index,
                        UBool isIncremental);

    /**
     * Create rule strings that represents this rule set.
     * @param result string to receive the rule strings.  Current
     * contents will be deleted.
     * @param escapeUnprintable  True, will escape the unprintable characters
     * @return    A reference to 'result'.
     */
    virtual UnicodeString& toRules(UnicodeString& result,
                                   UBool escapeUnprintable) const;

    /**
     * Return the set of all characters that may be modified
     * (getTarget=false) or emitted (getTarget=true) by this set.
     */
    UnicodeSet& getSourceTargetSet(UnicodeSet& result,
                   UBool getTarget) const;

private:

    TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
};

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

#endif