summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/rbbiscan.h
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/common/rbbiscan.h')
-rw-r--r--intl/icu/source/common/rbbiscan.h165
1 files changed, 165 insertions, 0 deletions
diff --git a/intl/icu/source/common/rbbiscan.h b/intl/icu/source/common/rbbiscan.h
new file mode 100644
index 000000000..6f3267e21
--- /dev/null
+++ b/intl/icu/source/common/rbbiscan.h
@@ -0,0 +1,165 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// rbbiscan.h
+//
+// Copyright (C) 2002-2016, International Business Machines Corporation and others.
+// All Rights Reserved.
+//
+// This file contains declarations for class RBBIRuleScanner
+//
+
+
+#ifndef RBBISCAN_H
+#define RBBISCAN_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/rbbi.h"
+#include "unicode/uniset.h"
+#include "unicode/parseerr.h"
+#include "uhash.h"
+#include "uvector.h"
+#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
+ // looks up references to $variables within a set.
+#include "rbbinode.h"
+#include "rbbirpt.h"
+
+U_NAMESPACE_BEGIN
+
+class RBBIRuleBuilder;
+class RBBISymbolTable;
+
+
+//--------------------------------------------------------------------------------
+//
+// class RBBIRuleScanner does the lowest level, character-at-a-time
+// scanning of break iterator rules.
+//
+// The output of the scanner is parse trees for
+// the rule expressions and a list of all Unicode Sets
+// encountered.
+//
+//--------------------------------------------------------------------------------
+
+class RBBIRuleScanner : public UMemory {
+public:
+
+ enum {
+ kStackSize = 100 // The size of the state stack for
+ }; // rules parsing. Corresponds roughly
+ // to the depth of parentheses nesting
+ // that is allowed in the rules.
+
+ struct RBBIRuleChar {
+ UChar32 fChar;
+ UBool fEscaped;
+ RBBIRuleChar() : fChar(0), fEscaped(FALSE) {};
+ };
+
+ RBBIRuleScanner(RBBIRuleBuilder *rb);
+
+
+ virtual ~RBBIRuleScanner();
+
+ void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
+ // Return false if at end.
+
+ UBool push(const RBBIRuleChar &c); // Push (unget) one character.
+ // Only a single character may be pushed.
+
+ void parse(); // Parse the rules, generating two parse
+ // trees, one each for the forward and
+ // reverse rules,
+ // and a list of UnicodeSets encountered.
+
+ /**
+ * Return a rules string without unnecessary
+ * characters.
+ */
+ static UnicodeString stripRules(const UnicodeString &rules);
+private:
+
+ UBool doParseActions(int32_t a);
+ void error(UErrorCode e); // error reporting convenience function.
+ void fixOpStack(RBBINode::OpPrecedence p);
+ // a character.
+ void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
+
+ UChar32 nextCharLL();
+#ifdef RBBI_DEBUG
+ void printNodeStack(const char *title);
+#endif
+ RBBINode *pushNewNode(RBBINode::NodeType t);
+ void scanSet();
+
+
+ RBBIRuleBuilder *fRB; // The rule builder that we are part of.
+
+ int32_t fScanIndex; // Index of current character being processed
+ // in the rule input string.
+ int32_t fNextIndex; // Index of the next character, which
+ // is the first character not yet scanned.
+ UBool fQuoteMode; // Scan is in a 'quoted region'
+ int32_t fLineNum; // Line number in input file.
+ int32_t fCharNum; // Char position within the line.
+ UChar32 fLastChar; // Previous char, needed to count CR-LF
+ // as a single line, not two.
+
+ RBBIRuleChar fC; // Current char for parse state machine
+ // processing.
+ UnicodeString fVarName; // $variableName, valid when we've just
+ // scanned one.
+
+ RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
+ // parsing. index by p[state][char-class]
+
+ uint16_t fStack[kStackSize]; // State stack, holds state pushes
+ int32_t fStackPtr; // and pops as specified in the state
+ // transition rules.
+
+ RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
+ // during the parse of a rule
+ int32_t fNodeStackPtr;
+
+
+ UBool fReverseRule; // True if the rule currently being scanned
+ // is a reverse direction rule (if it
+ // starts with a '!')
+
+ UBool fLookAheadRule; // True if the rule includes a '/'
+ // somewhere within it.
+
+ UBool fNoChainInRule; // True if the current rule starts with a '^'.
+
+ RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
+ // $variable symbols.
+
+ UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
+ // the sets created while parsing rules.
+ // The key is the string used for creating
+ // the set.
+
+ UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during
+ // the scanning of RBBI rules. The
+ // indicies for these are assigned by the
+ // perl script that builds the state tables.
+ // See rbbirpt.h.
+
+ int32_t fRuleNum; // Counts each rule as it is scanned.
+
+ int32_t fOptionStart; // Input index of start of a !!option
+ // keyword, while being scanned.
+
+ UnicodeSet *gRuleSet_rule_char;
+ UnicodeSet *gRuleSet_white_space;
+ UnicodeSet *gRuleSet_name_char;
+ UnicodeSet *gRuleSet_name_start_char;
+
+ RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
+ RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
+};
+
+U_NAMESPACE_END
+
+#endif