diff options
Diffstat (limited to 'intl/icu/source/i18n/rbt_set.h')
-rw-r--r-- | intl/icu/source/i18n/rbt_set.h | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/rbt_set.h b/intl/icu/source/i18n/rbt_set.h new file mode 100644 index 000000000..573c079e6 --- /dev/null +++ b/intl/icu/source/i18n/rbt_set.h @@ -0,0 +1,167 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2007, International Business Machines Corporation +* and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/17/99 aliu Creation. +********************************************************************** +*/ +#ifndef RBT_SET_H +#define RBT_SET_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_TRANSLITERATION + +#include "unicode/uobject.h" +#include "unicode/utrans.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN + +class Replaceable; +class TransliterationRule; +class TransliterationRuleData; +class UnicodeFilter; +class UnicodeString; +class UnicodeSet; + +/** + * A set of rules for a <code>RuleBasedTransliterator</code>. + * @author Alan Liu + */ +class TransliterationRuleSet : public UMemory { + /** + * Vector of rules, in the order added. This is used while the + * rule set is getting built. After that, freeze() reorders and + * indexes the rules into rules[]. Any given rule is stored once + * in ruleVector, and one or more times in rules[]. ruleVector + * owns and deletes the rules. + */ + UVector* ruleVector; + + /** + * Sorted and indexed table of rules. This is created by freeze() + * from the rules in ruleVector. It contains alias pointers to + * the rules in ruleVector. It is zero before freeze() is called + * and non-zero thereafter. + */ + TransliterationRule** rules; + + /** + * Index table. For text having a first character c, compute x = c&0xFF. + * Now use rules[index[x]..index[x+1]-1]. This index table is created by + * freeze(). Before freeze() is called it contains garbage. + */ + int32_t index[257]; + + /** + * Length of the longest preceding context + */ + int32_t maxContextLength; + +public: + + /** + * Construct a new empty rule set. + * @param status Output parameter filled in with success or failure status. + */ + TransliterationRuleSet(UErrorCode& status); + + /** + * Copy constructor. + */ + TransliterationRuleSet(const TransliterationRuleSet&); + + /** + * Destructor. + */ + virtual ~TransliterationRuleSet(); + + /** + * Change the data object that this rule belongs to. Used + * internally by the TransliterationRuleData copy constructor. + * @param data the new data value to be set. + */ + void setData(const TransliterationRuleData* data); + + /** + * Return the maximum context length. + * @return the length of the longest preceding context. + */ + virtual int32_t getMaximumContextLength(void) const; + + /** + * Add a rule to this set. Rules are added in order, and order is + * significant. The last call to this method must be followed by + * a call to <code>freeze()</code> before the rule set is used. + * This method must <em>not</em> be called after freeze() has been + * called. + * + * @param adoptedRule the rule to add + */ + virtual void addRule(TransliterationRule* adoptedRule, + UErrorCode& status); + + /** + * Check this for masked rules and index it to optimize performance. + * The sequence of operations is: (1) add rules to a set using + * <code>addRule()</code>; (2) freeze the set using + * <code>freeze()</code>; (3) use the rule set. If + * <code>addRule()</code> is called after calling this method, it + * invalidates this object, and this method must be called again. + * That is, <code>freeze()</code> may be called multiple times, + * although for optimal performance it shouldn't be. + * @param parseError A pointer to UParseError to receive information about errors + * occurred. + * @param status Output parameter filled in with success or failure status. + */ + virtual void freeze(UParseError& parseError, UErrorCode& status); + + /** + * Transliterate the given text with the given UTransPosition + * indices. Return TRUE if the transliteration should continue + * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). + * Note that FALSE is only ever returned if isIncremental is TRUE. + * @param text the text to be transliterated + * @param index the position indices, which will be updated + * @param isIncremental if TRUE, assume new text may be inserted + * at index.limit, and return FALSE if thre is a partial match. + * @return TRUE unless a U_PARTIAL_MATCH has been obtained, + * indicating that transliteration should stop until more text + * arrives. + */ + UBool transliterate(Replaceable& text, + UTransPosition& index, + UBool isIncremental); + + /** + * Create rule strings that represents this rule set. + * @param result string to receive the rule strings. Current + * contents will be deleted. + * @param escapeUnprintable True, will escape the unprintable characters + * @return A reference to 'result'. + */ + virtual UnicodeString& toRules(UnicodeString& result, + UBool escapeUnprintable) const; + + /** + * Return the set of all characters that may be modified + * (getTarget=false) or emitted (getTarget=true) by this set. + */ + UnicodeSet& getSourceTargetSet(UnicodeSet& result, + UBool getTarget) const; + +private: + + TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ + +#endif |