diff options
Diffstat (limited to 'dom/xslt/xpath/txExprLexer.cpp')
-rw-r--r-- | dom/xslt/xpath/txExprLexer.cpp | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/dom/xslt/xpath/txExprLexer.cpp b/dom/xslt/xpath/txExprLexer.cpp new file mode 100644 index 000000000..dc1616af0 --- /dev/null +++ b/dom/xslt/xpath/txExprLexer.cpp @@ -0,0 +1,367 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Lexical analyzer for XPath expressions + */ + +#include "txExprLexer.h" +#include "nsGkAtoms.h" +#include "nsString.h" +#include "nsError.h" +#include "txXMLUtils.h" + +/** + * Creates a new ExprLexer + */ +txExprLexer::txExprLexer() + : mCurrentItem(nullptr), + mFirstItem(nullptr), + mLastItem(nullptr), + mTokenCount(0) +{ +} + +/** + * Destroys this instance of an txExprLexer + */ +txExprLexer::~txExprLexer() +{ + //-- delete tokens + Token* tok = mFirstItem; + while (tok) { + Token* temp = tok->mNext; + delete tok; + tok = temp; + } + mCurrentItem = nullptr; +} + +Token* +txExprLexer::nextToken() +{ + if (!mCurrentItem) { + NS_NOTREACHED("nextToken called on uninitialized lexer"); + return nullptr; + } + + if (mCurrentItem->mType == Token::END) { + // Do not progress beyond the end token + return mCurrentItem; + } + + Token* token = mCurrentItem; + mCurrentItem = mCurrentItem->mNext; + return token; +} + +void +txExprLexer::addToken(Token* aToken) +{ + if (mLastItem) { + mLastItem->mNext = aToken; + } + if (!mFirstItem) { + mFirstItem = aToken; + mCurrentItem = aToken; + } + mLastItem = aToken; + ++mTokenCount; +} + +/** + * Returns true if the following Token should be an operator. + * This is a helper for the first bullet of [XPath 3.7] + * Lexical Structure + */ +bool +txExprLexer::nextIsOperatorToken(Token* aToken) +{ + if (!aToken || aToken->mType == Token::NULL_TOKEN) { + return false; + } + /* This relies on the tokens having the right order in txExprLexer.h */ + return aToken->mType < Token::COMMA || + aToken->mType > Token::UNION_OP; + +} + +/** + * Parses the given string into a sequence of Tokens + */ +nsresult +txExprLexer::parse(const nsASingleFragmentString& aPattern) +{ + iterator start, end; + start = aPattern.BeginReading(mPosition); + aPattern.EndReading(end); + + //-- initialize previous token, this will automatically get + //-- deleted when it goes out of scope + Token nullToken(nullptr, nullptr, Token::NULL_TOKEN); + + Token::Type defType; + Token* newToken = nullptr; + Token* prevToken = &nullToken; + bool isToken; + + while (mPosition < end) { + + defType = Token::CNAME; + isToken = true; + + if (*mPosition == DOLLAR_SIGN) { + if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) { + return NS_ERROR_XPATH_INVALID_VAR_NAME; + } + defType = Token::VAR_REFERENCE; + } + // just reuse the QName parsing, which will use defType + // the token to construct + + if (XMLUtils::isLetter(*mPosition)) { + // NCName, can get QName or OperatorName; + // FunctionName, NodeName, and AxisSpecifier may want whitespace, + // and are dealt with below + start = mPosition; + while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { + /* just go */ + } + if (mPosition < end && *mPosition == COLON) { + // try QName or wildcard, might need to step back for axis + if (++mPosition == end) { + return NS_ERROR_XPATH_UNEXPECTED_END; + } + if (XMLUtils::isLetter(*mPosition)) { + while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { + /* just go */ + } + } + else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) { + // eat wildcard for NameTest, bail for var ref at COLON + ++mPosition; + } + else { + --mPosition; // step back + } + } + if (nextIsOperatorToken(prevToken)) { + nsDependentSubstring op(Substring(start, mPosition)); + if (nsGkAtoms::_and->Equals(op)) { + defType = Token::AND_OP; + } + else if (nsGkAtoms::_or->Equals(op)) { + defType = Token::OR_OP; + } + else if (nsGkAtoms::mod->Equals(op)) { + defType = Token::MODULUS_OP; + } + else if (nsGkAtoms::div->Equals(op)) { + defType = Token::DIVIDE_OP; + } + else { + // XXX QUESTION: spec is not too precise + // badops is sure an error, but is bad:ops, too? We say yes! + return NS_ERROR_XPATH_OPERATOR_EXPECTED; + } + } + newToken = new Token(start, mPosition, defType); + } + else if (isXPathDigit(*mPosition)) { + start = mPosition; + while (++mPosition < end && isXPathDigit(*mPosition)) { + /* just go */ + } + if (mPosition < end && *mPosition == '.') { + while (++mPosition < end && isXPathDigit(*mPosition)) { + /* just go */ + } + } + newToken = new Token(start, mPosition, Token::NUMBER); + } + else { + switch (*mPosition) { + //-- ignore whitespace + case SPACE: + case TX_TAB: + case TX_CR: + case TX_LF: + ++mPosition; + isToken = false; + break; + case S_QUOTE : + case D_QUOTE : + start = mPosition; + while (++mPosition < end && *mPosition != *start) { + // eat literal + } + if (mPosition == end) { + mPosition = start; + return NS_ERROR_XPATH_UNCLOSED_LITERAL; + } + newToken = new Token(start + 1, mPosition, Token::LITERAL); + ++mPosition; + break; + case PERIOD: + // period can be .., .(DIGITS)+ or ., check next + if (++mPosition == end) { + newToken = new Token(mPosition - 1, Token::SELF_NODE); + } + else if (isXPathDigit(*mPosition)) { + start = mPosition - 1; + while (++mPosition < end && isXPathDigit(*mPosition)) { + /* just go */ + } + newToken = new Token(start, mPosition, Token::NUMBER); + } + else if (*mPosition == PERIOD) { + ++mPosition; + newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE); + } + else { + newToken = new Token(mPosition - 1, Token::SELF_NODE); + } + break; + case COLON: // QNames are dealt above, must be axis ident + if (++mPosition >= end || *mPosition != COLON || + prevToken->mType != Token::CNAME) { + return NS_ERROR_XPATH_BAD_COLON; + } + prevToken->mType = Token::AXIS_IDENTIFIER; + ++mPosition; + isToken = false; + break; + case FORWARD_SLASH : + if (++mPosition < end && *mPosition == FORWARD_SLASH) { + ++mPosition; + newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP); + } + else { + newToken = new Token(mPosition - 1, Token::PARENT_OP); + } + break; + case BANG : // can only be != + if (++mPosition < end && *mPosition == EQUAL) { + ++mPosition; + newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP); + break; + } + // Error ! is not not() + return NS_ERROR_XPATH_BAD_BANG; + case EQUAL: + newToken = new Token(mPosition, Token::EQUAL_OP); + ++mPosition; + break; + case L_ANGLE: + if (++mPosition == end) { + return NS_ERROR_XPATH_UNEXPECTED_END; + } + if (*mPosition == EQUAL) { + ++mPosition; + newToken = new Token(mPosition - 2, mPosition, + Token::LESS_OR_EQUAL_OP); + } + else { + newToken = new Token(mPosition - 1, Token::LESS_THAN_OP); + } + break; + case R_ANGLE: + if (++mPosition == end) { + return NS_ERROR_XPATH_UNEXPECTED_END; + } + if (*mPosition == EQUAL) { + ++mPosition; + newToken = new Token(mPosition - 2, mPosition, + Token::GREATER_OR_EQUAL_OP); + } + else { + newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP); + } + break; + case HYPHEN : + newToken = new Token(mPosition, Token::SUBTRACTION_OP); + ++mPosition; + break; + case ASTERISK: + if (nextIsOperatorToken(prevToken)) { + newToken = new Token(mPosition, Token::MULTIPLY_OP); + } + else { + newToken = new Token(mPosition, Token::CNAME); + } + ++mPosition; + break; + case L_PAREN: + if (prevToken->mType == Token::CNAME) { + const nsDependentSubstring& val = prevToken->Value(); + if (val.EqualsLiteral("comment")) { + prevToken->mType = Token::COMMENT_AND_PAREN; + } + else if (val.EqualsLiteral("node")) { + prevToken->mType = Token::NODE_AND_PAREN; + } + else if (val.EqualsLiteral("processing-instruction")) { + prevToken->mType = Token::PROC_INST_AND_PAREN; + } + else if (val.EqualsLiteral("text")) { + prevToken->mType = Token::TEXT_AND_PAREN; + } + else { + prevToken->mType = Token::FUNCTION_NAME_AND_PAREN; + } + isToken = false; + } + else { + newToken = new Token(mPosition, Token::L_PAREN); + } + ++mPosition; + break; + case R_PAREN: + newToken = new Token(mPosition, Token::R_PAREN); + ++mPosition; + break; + case L_BRACKET: + newToken = new Token(mPosition, Token::L_BRACKET); + ++mPosition; + break; + case R_BRACKET: + newToken = new Token(mPosition, Token::R_BRACKET); + ++mPosition; + break; + case COMMA: + newToken = new Token(mPosition, Token::COMMA); + ++mPosition; + break; + case AT_SIGN : + newToken = new Token(mPosition, Token::AT_SIGN); + ++mPosition; + break; + case PLUS: + newToken = new Token(mPosition, Token::ADDITION_OP); + ++mPosition; + break; + case VERT_BAR: + newToken = new Token(mPosition, Token::UNION_OP); + ++mPosition; + break; + default: + // Error, don't grok character :-( + return NS_ERROR_XPATH_ILLEGAL_CHAR; + } + } + if (isToken) { + NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY); + NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE); + prevToken = newToken; + addToken(newToken); + } + } + + // add a endToken to the list + newToken = new Token(end, end, Token::END); + addToken(newToken); + + return NS_OK; +} |