summaryrefslogtreecommitdiffstats
path: root/dom/xslt/xpath/txExprLexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dom/xslt/xpath/txExprLexer.cpp')
-rw-r--r--dom/xslt/xpath/txExprLexer.cpp367
1 files changed, 367 insertions, 0 deletions
diff --git a/dom/xslt/xpath/txExprLexer.cpp b/dom/xslt/xpath/txExprLexer.cpp
new file mode 100644
index 000000000..dc1616af0
--- /dev/null
+++ b/dom/xslt/xpath/txExprLexer.cpp
@@ -0,0 +1,367 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Lexical analyzer for XPath expressions
+ */
+
+#include "txExprLexer.h"
+#include "nsGkAtoms.h"
+#include "nsString.h"
+#include "nsError.h"
+#include "txXMLUtils.h"
+
+/**
+ * Creates a new ExprLexer
+ */
+txExprLexer::txExprLexer()
+ : mCurrentItem(nullptr),
+ mFirstItem(nullptr),
+ mLastItem(nullptr),
+ mTokenCount(0)
+{
+}
+
+/**
+ * Destroys this instance of an txExprLexer
+ */
+txExprLexer::~txExprLexer()
+{
+ //-- delete tokens
+ Token* tok = mFirstItem;
+ while (tok) {
+ Token* temp = tok->mNext;
+ delete tok;
+ tok = temp;
+ }
+ mCurrentItem = nullptr;
+}
+
+Token*
+txExprLexer::nextToken()
+{
+ if (!mCurrentItem) {
+ NS_NOTREACHED("nextToken called on uninitialized lexer");
+ return nullptr;
+ }
+
+ if (mCurrentItem->mType == Token::END) {
+ // Do not progress beyond the end token
+ return mCurrentItem;
+ }
+
+ Token* token = mCurrentItem;
+ mCurrentItem = mCurrentItem->mNext;
+ return token;
+}
+
+void
+txExprLexer::addToken(Token* aToken)
+{
+ if (mLastItem) {
+ mLastItem->mNext = aToken;
+ }
+ if (!mFirstItem) {
+ mFirstItem = aToken;
+ mCurrentItem = aToken;
+ }
+ mLastItem = aToken;
+ ++mTokenCount;
+}
+
+/**
+ * Returns true if the following Token should be an operator.
+ * This is a helper for the first bullet of [XPath 3.7]
+ * Lexical Structure
+ */
+bool
+txExprLexer::nextIsOperatorToken(Token* aToken)
+{
+ if (!aToken || aToken->mType == Token::NULL_TOKEN) {
+ return false;
+ }
+ /* This relies on the tokens having the right order in txExprLexer.h */
+ return aToken->mType < Token::COMMA ||
+ aToken->mType > Token::UNION_OP;
+
+}
+
+/**
+ * Parses the given string into a sequence of Tokens
+ */
+nsresult
+txExprLexer::parse(const nsASingleFragmentString& aPattern)
+{
+ iterator start, end;
+ start = aPattern.BeginReading(mPosition);
+ aPattern.EndReading(end);
+
+ //-- initialize previous token, this will automatically get
+ //-- deleted when it goes out of scope
+ Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
+
+ Token::Type defType;
+ Token* newToken = nullptr;
+ Token* prevToken = &nullToken;
+ bool isToken;
+
+ while (mPosition < end) {
+
+ defType = Token::CNAME;
+ isToken = true;
+
+ if (*mPosition == DOLLAR_SIGN) {
+ if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
+ return NS_ERROR_XPATH_INVALID_VAR_NAME;
+ }
+ defType = Token::VAR_REFERENCE;
+ }
+ // just reuse the QName parsing, which will use defType
+ // the token to construct
+
+ if (XMLUtils::isLetter(*mPosition)) {
+ // NCName, can get QName or OperatorName;
+ // FunctionName, NodeName, and AxisSpecifier may want whitespace,
+ // and are dealt with below
+ start = mPosition;
+ while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
+ /* just go */
+ }
+ if (mPosition < end && *mPosition == COLON) {
+ // try QName or wildcard, might need to step back for axis
+ if (++mPosition == end) {
+ return NS_ERROR_XPATH_UNEXPECTED_END;
+ }
+ if (XMLUtils::isLetter(*mPosition)) {
+ while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
+ /* just go */
+ }
+ }
+ else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
+ // eat wildcard for NameTest, bail for var ref at COLON
+ ++mPosition;
+ }
+ else {
+ --mPosition; // step back
+ }
+ }
+ if (nextIsOperatorToken(prevToken)) {
+ nsDependentSubstring op(Substring(start, mPosition));
+ if (nsGkAtoms::_and->Equals(op)) {
+ defType = Token::AND_OP;
+ }
+ else if (nsGkAtoms::_or->Equals(op)) {
+ defType = Token::OR_OP;
+ }
+ else if (nsGkAtoms::mod->Equals(op)) {
+ defType = Token::MODULUS_OP;
+ }
+ else if (nsGkAtoms::div->Equals(op)) {
+ defType = Token::DIVIDE_OP;
+ }
+ else {
+ // XXX QUESTION: spec is not too precise
+ // badops is sure an error, but is bad:ops, too? We say yes!
+ return NS_ERROR_XPATH_OPERATOR_EXPECTED;
+ }
+ }
+ newToken = new Token(start, mPosition, defType);
+ }
+ else if (isXPathDigit(*mPosition)) {
+ start = mPosition;
+ while (++mPosition < end && isXPathDigit(*mPosition)) {
+ /* just go */
+ }
+ if (mPosition < end && *mPosition == '.') {
+ while (++mPosition < end && isXPathDigit(*mPosition)) {
+ /* just go */
+ }
+ }
+ newToken = new Token(start, mPosition, Token::NUMBER);
+ }
+ else {
+ switch (*mPosition) {
+ //-- ignore whitespace
+ case SPACE:
+ case TX_TAB:
+ case TX_CR:
+ case TX_LF:
+ ++mPosition;
+ isToken = false;
+ break;
+ case S_QUOTE :
+ case D_QUOTE :
+ start = mPosition;
+ while (++mPosition < end && *mPosition != *start) {
+ // eat literal
+ }
+ if (mPosition == end) {
+ mPosition = start;
+ return NS_ERROR_XPATH_UNCLOSED_LITERAL;
+ }
+ newToken = new Token(start + 1, mPosition, Token::LITERAL);
+ ++mPosition;
+ break;
+ case PERIOD:
+ // period can be .., .(DIGITS)+ or ., check next
+ if (++mPosition == end) {
+ newToken = new Token(mPosition - 1, Token::SELF_NODE);
+ }
+ else if (isXPathDigit(*mPosition)) {
+ start = mPosition - 1;
+ while (++mPosition < end && isXPathDigit(*mPosition)) {
+ /* just go */
+ }
+ newToken = new Token(start, mPosition, Token::NUMBER);
+ }
+ else if (*mPosition == PERIOD) {
+ ++mPosition;
+ newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
+ }
+ else {
+ newToken = new Token(mPosition - 1, Token::SELF_NODE);
+ }
+ break;
+ case COLON: // QNames are dealt above, must be axis ident
+ if (++mPosition >= end || *mPosition != COLON ||
+ prevToken->mType != Token::CNAME) {
+ return NS_ERROR_XPATH_BAD_COLON;
+ }
+ prevToken->mType = Token::AXIS_IDENTIFIER;
+ ++mPosition;
+ isToken = false;
+ break;
+ case FORWARD_SLASH :
+ if (++mPosition < end && *mPosition == FORWARD_SLASH) {
+ ++mPosition;
+ newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
+ }
+ else {
+ newToken = new Token(mPosition - 1, Token::PARENT_OP);
+ }
+ break;
+ case BANG : // can only be !=
+ if (++mPosition < end && *mPosition == EQUAL) {
+ ++mPosition;
+ newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
+ break;
+ }
+ // Error ! is not not()
+ return NS_ERROR_XPATH_BAD_BANG;
+ case EQUAL:
+ newToken = new Token(mPosition, Token::EQUAL_OP);
+ ++mPosition;
+ break;
+ case L_ANGLE:
+ if (++mPosition == end) {
+ return NS_ERROR_XPATH_UNEXPECTED_END;
+ }
+ if (*mPosition == EQUAL) {
+ ++mPosition;
+ newToken = new Token(mPosition - 2, mPosition,
+ Token::LESS_OR_EQUAL_OP);
+ }
+ else {
+ newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
+ }
+ break;
+ case R_ANGLE:
+ if (++mPosition == end) {
+ return NS_ERROR_XPATH_UNEXPECTED_END;
+ }
+ if (*mPosition == EQUAL) {
+ ++mPosition;
+ newToken = new Token(mPosition - 2, mPosition,
+ Token::GREATER_OR_EQUAL_OP);
+ }
+ else {
+ newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
+ }
+ break;
+ case HYPHEN :
+ newToken = new Token(mPosition, Token::SUBTRACTION_OP);
+ ++mPosition;
+ break;
+ case ASTERISK:
+ if (nextIsOperatorToken(prevToken)) {
+ newToken = new Token(mPosition, Token::MULTIPLY_OP);
+ }
+ else {
+ newToken = new Token(mPosition, Token::CNAME);
+ }
+ ++mPosition;
+ break;
+ case L_PAREN:
+ if (prevToken->mType == Token::CNAME) {
+ const nsDependentSubstring& val = prevToken->Value();
+ if (val.EqualsLiteral("comment")) {
+ prevToken->mType = Token::COMMENT_AND_PAREN;
+ }
+ else if (val.EqualsLiteral("node")) {
+ prevToken->mType = Token::NODE_AND_PAREN;
+ }
+ else if (val.EqualsLiteral("processing-instruction")) {
+ prevToken->mType = Token::PROC_INST_AND_PAREN;
+ }
+ else if (val.EqualsLiteral("text")) {
+ prevToken->mType = Token::TEXT_AND_PAREN;
+ }
+ else {
+ prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
+ }
+ isToken = false;
+ }
+ else {
+ newToken = new Token(mPosition, Token::L_PAREN);
+ }
+ ++mPosition;
+ break;
+ case R_PAREN:
+ newToken = new Token(mPosition, Token::R_PAREN);
+ ++mPosition;
+ break;
+ case L_BRACKET:
+ newToken = new Token(mPosition, Token::L_BRACKET);
+ ++mPosition;
+ break;
+ case R_BRACKET:
+ newToken = new Token(mPosition, Token::R_BRACKET);
+ ++mPosition;
+ break;
+ case COMMA:
+ newToken = new Token(mPosition, Token::COMMA);
+ ++mPosition;
+ break;
+ case AT_SIGN :
+ newToken = new Token(mPosition, Token::AT_SIGN);
+ ++mPosition;
+ break;
+ case PLUS:
+ newToken = new Token(mPosition, Token::ADDITION_OP);
+ ++mPosition;
+ break;
+ case VERT_BAR:
+ newToken = new Token(mPosition, Token::UNION_OP);
+ ++mPosition;
+ break;
+ default:
+ // Error, don't grok character :-(
+ return NS_ERROR_XPATH_ILLEGAL_CHAR;
+ }
+ }
+ if (isToken) {
+ NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
+ prevToken = newToken;
+ addToken(newToken);
+ }
+ }
+
+ // add a endToken to the list
+ newToken = new Token(end, end, Token::END);
+ addToken(newToken);
+
+ return NS_OK;
+}