diff options
Diffstat (limited to 'dom/xslt/xpath/txExprLexer.h')
-rw-r--r-- | dom/xslt/xpath/txExprLexer.h | 226 |
1 files changed, 226 insertions, 0 deletions
diff --git a/dom/xslt/xpath/txExprLexer.h b/dom/xslt/xpath/txExprLexer.h new file mode 100644 index 000000000..7497ca505 --- /dev/null +++ b/dom/xslt/xpath/txExprLexer.h @@ -0,0 +1,226 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +#ifndef MITREXSL_EXPRLEXER_H +#define MITREXSL_EXPRLEXER_H + +#include "txCore.h" +#include "nsString.h" + +/** + * A Token class for the ExprLexer. + * + * This class was ported from XSL:P, an open source Java based + * XSLT processor, written by yours truly. + */ +class Token +{ +public: + + /** + * Token types + */ + enum Type { + //-- Trivial Tokens + NULL_TOKEN = 1, + LITERAL, + NUMBER, + CNAME, + VAR_REFERENCE, + PARENT_NODE, + SELF_NODE, + R_PAREN, + R_BRACKET, // 9 + /** + * start of tokens for 3.7, bullet 1 + * ExprLexer::nextIsOperatorToken bails if the tokens aren't + * consecutive. + */ + COMMA, + AT_SIGN, + L_PAREN, + L_BRACKET, + AXIS_IDENTIFIER, + + // These tokens include their following left parenthesis + FUNCTION_NAME_AND_PAREN, // 15 + COMMENT_AND_PAREN, + NODE_AND_PAREN, + PROC_INST_AND_PAREN, + TEXT_AND_PAREN, + + /** + * operators + */ + //-- boolean ops + AND_OP, // 20 + OR_OP, + + //-- relational + EQUAL_OP, // 22 + NOT_EQUAL_OP, + LESS_THAN_OP, + GREATER_THAN_OP, + LESS_OR_EQUAL_OP, + GREATER_OR_EQUAL_OP, + //-- additive operators + ADDITION_OP, // 28 + SUBTRACTION_OP, + //-- multiplicative + DIVIDE_OP, // 30 + MULTIPLY_OP, + MODULUS_OP, + //-- path operators + PARENT_OP, // 33 + ANCESTOR_OP, + UNION_OP, + /** + * end of tokens for 3.7, bullet 1 -/ + */ + //-- Special endtoken + END // 36 + }; + + + /** + * Constructors + */ + typedef nsASingleFragmentString::const_char_iterator iterator; + + Token(iterator aStart, iterator aEnd, Type aType) + : mStart(aStart), + mEnd(aEnd), + mType(aType), + mNext(nullptr) + { + } + Token(iterator aChar, Type aType) + : mStart(aChar), + mEnd(aChar + 1), + mType(aType), + mNext(nullptr) + { + } + + const nsDependentSubstring Value() + { + return Substring(mStart, mEnd); + } + + iterator mStart, mEnd; + Type mType; + Token* mNext; +}; + +/** + * A class for splitting an "Expr" String into tokens and + * performing basic Lexical Analysis. + * + * This class was ported from XSL:P, an open source Java based XSL processor + */ + +class txExprLexer +{ +public: + + txExprLexer(); + ~txExprLexer(); + + /** + * Parse the given string. + * returns an error result if lexing failed. + * The given string must outlive the use of the lexer, as the + * generated Tokens point to Substrings of it. + * mPosition points to the offending location in case of an error. + */ + nsresult parse(const nsASingleFragmentString& aPattern); + + typedef nsASingleFragmentString::const_char_iterator iterator; + iterator mPosition; + + /** + * Functions for iterating over the TokenList + */ + + Token* nextToken(); + Token* peek() + { + NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer"); + return mCurrentItem; + } + Token* peekAhead() + { + NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer"); + // Don't peek past the end node + return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext : mCurrentItem; + } + bool hasMoreTokens() + { + NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer"); + return (mCurrentItem && mCurrentItem->mType != Token::END); + } + + /** + * Trivial Tokens + */ + //-- LF, changed to enum + enum _TrivialTokens { + D_QUOTE = '\"', + S_QUOTE = '\'', + L_PAREN = '(', + R_PAREN = ')', + L_BRACKET = '[', + R_BRACKET = ']', + L_ANGLE = '<', + R_ANGLE = '>', + COMMA = ',', + PERIOD = '.', + ASTERISK = '*', + FORWARD_SLASH = '/', + EQUAL = '=', + BANG = '!', + VERT_BAR = '|', + AT_SIGN = '@', + DOLLAR_SIGN = '$', + PLUS = '+', + HYPHEN = '-', + COLON = ':', + //-- whitespace tokens + SPACE = ' ', + TX_TAB = '\t', + TX_CR = '\n', + TX_LF = '\r' + }; + +private: + + Token* mCurrentItem; + Token* mFirstItem; + Token* mLastItem; + + int mTokenCount; + + void addToken(Token* aToken); + + /** + * Returns true if the following Token should be an operator. + * This is a helper for the first bullet of [XPath 3.7] + * Lexical Structure + */ + bool nextIsOperatorToken(Token* aToken); + + /** + * Returns true if the given character represents a numeric letter (digit) + * Implemented in ExprLexerChars.cpp + */ + static bool isXPathDigit(char16_t ch) + { + return (ch >= '0' && ch <= '9'); + } +}; + +#endif + |