summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/harness/wptrunner/wptmanifest/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/web-platform/harness/wptrunner/wptmanifest/parser.py')
-rw-r--r--testing/web-platform/harness/wptrunner/wptmanifest/parser.py744
1 files changed, 744 insertions, 0 deletions
diff --git a/testing/web-platform/harness/wptrunner/wptmanifest/parser.py b/testing/web-platform/harness/wptrunner/wptmanifest/parser.py
new file mode 100644
index 000000000..eeac66d31
--- /dev/null
+++ b/testing/web-platform/harness/wptrunner/wptmanifest/parser.py
@@ -0,0 +1,744 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#default_value:foo
+#include: other.manifest
+#
+#[test_name.js]
+# expected: ERROR
+#
+# [subtest 1]
+# expected:
+# os == win: FAIL #This is a comment
+# PASS
+#
+
+# TODO: keep comments in the tree
+
+import types
+from cStringIO import StringIO
+
+from node import *
+
+
+class ParseError(Exception):
+ def __init__(self, filename, line, detail):
+ self.line = line
+ self.filename = filename
+ self.detail = detail
+ self.message = "%s: %s line %s" % (self.detail, self.filename, self.line)
+ Exception.__init__(self, self.message)
+
+eol = object
+group_start = object
+group_end = object
+digits = "0123456789"
+open_parens = "[("
+close_parens = "])"
+parens = open_parens + close_parens
+operator_chars = "=!"
+
+unary_operators = ["not"]
+binary_operators = ["==", "!=", "and", "or"]
+
+operators = ["==", "!=", "not", "and", "or"]
+
+atoms = {"True": True,
+ "False": False,
+ "Reset": object()}
+
+def decode(byte_str):
+ return byte_str.decode("utf8")
+
+
+def precedence(operator_node):
+ return len(operators) - operators.index(operator_node.data)
+
+
+class TokenTypes(object):
+ def __init__(self):
+ for type in ["group_start", "group_end", "paren", "list_start", "list_end", "separator", "ident", "string", "number", "atom", "eof"]:
+ setattr(self, type, type)
+
+token_types = TokenTypes()
+
+
+class Tokenizer(object):
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.indent_levels = [0]
+ self.state = self.line_start_state
+ self.next_state = self.data_line_state
+ self.line_number = 0
+
+ def tokenize(self, stream):
+ self.reset()
+ if type(stream) in types.StringTypes:
+ stream = StringIO(stream)
+ if not hasattr(stream, "name"):
+ self.filename = ""
+ else:
+ self.filename = stream.name
+
+ self.next_line_state = self.line_start_state
+ for i, line in enumerate(stream):
+ self.state = self.next_line_state
+ assert self.state is not None
+ states = []
+ self.next_line_state = None
+ self.line_number = i + 1
+ self.index = 0
+ self.line = line.rstrip()
+ while self.state != self.eol_state:
+ states.append(self.state)
+ tokens = self.state()
+ if tokens:
+ for token in tokens:
+ yield token
+ self.state()
+ while True:
+ yield (token_types.eof, None)
+
+ def char(self):
+ if self.index == len(self.line):
+ return eol
+ return self.line[self.index]
+
+ def consume(self):
+ if self.index < len(self.line):
+ self.index += 1
+
+ def peek(self, length):
+ return self.line[self.index:self.index + length]
+
+ def skip_whitespace(self):
+ while self.char() == " ":
+ self.consume()
+
+ def eol_state(self):
+ if self.next_line_state is None:
+ self.next_line_state = self.line_start_state
+
+ def line_start_state(self):
+ self.skip_whitespace()
+ if self.char() == eol:
+ self.state = self.eol_state
+ return
+ if self.index > self.indent_levels[-1]:
+ self.indent_levels.append(self.index)
+ yield (token_types.group_start, None)
+ else:
+ while self.index < self.indent_levels[-1]:
+ self.indent_levels.pop()
+ yield (token_types.group_end, None)
+ # This is terrible; if we were parsing an expression
+ # then the next_state will be expr_or_value but when we deindent
+ # it must always be a heading or key next so we go back to data_line_state
+ self.next_state = self.data_line_state
+ if self.index != self.indent_levels[-1]:
+ raise ParseError(self.filename, self.line_number, "Unexpected indent")
+
+ self.state = self.next_state
+
+ def data_line_state(self):
+ if self.char() == "[":
+ yield (token_types.paren, self.char())
+ self.consume()
+ self.state = self.heading_state
+ else:
+ self.state = self.key_state
+
+ def heading_state(self):
+ rv = ""
+ while True:
+ c = self.char()
+ if c == "\\":
+ rv += self.consume_escape()
+ elif c == "]":
+ break
+ elif c == eol:
+ raise ParseError(self.filename, self.line_number, "EOL in heading")
+ else:
+ rv += c
+ self.consume()
+
+ yield (token_types.string, decode(rv))
+ yield (token_types.paren, "]")
+ self.consume()
+ self.state = self.line_end_state
+ self.next_state = self.data_line_state
+
+ def key_state(self):
+ rv = ""
+ while True:
+ c = self.char()
+ if c == " ":
+ self.skip_whitespace()
+ if self.char() != ":":
+ raise ParseError(self.filename, self.line_number, "Space in key name")
+ break
+ elif c == ":":
+ break
+ elif c == eol:
+ raise ParseError(self.filename, self.line_number, "EOL in key name (missing ':'?)")
+ elif c == "\\":
+ rv += self.consume_escape()
+ else:
+ rv += c
+ self.consume()
+ yield (token_types.string, decode(rv))
+ yield (token_types.separator, ":")
+ self.consume()
+ self.state = self.after_key_state
+
+ def after_key_state(self):
+ self.skip_whitespace()
+ c = self.char()
+ if c == "#":
+ self.next_state = self.expr_or_value_state
+ self.state = self.comment_state
+ elif c == eol:
+ self.next_state = self.expr_or_value_state
+ self.state = self.eol_state
+ elif c == "[":
+ self.state = self.list_start_state
+ else:
+ self.state = self.value_state
+
+ def list_start_state(self):
+ yield (token_types.list_start, "[")
+ self.consume()
+ self.state = self.list_value_start_state
+
+ def list_value_start_state(self):
+ self.skip_whitespace()
+ if self.char() == "]":
+ self.state = self.list_end_state
+ elif self.char() in ("'", '"'):
+ quote_char = self.char()
+ self.consume()
+ yield (token_types.string, self.consume_string(quote_char))
+ self.skip_whitespace()
+ if self.char() == "]":
+ self.state = self.list_end_state
+ elif self.char() != ",":
+ raise ParseError(self.filename, self.line_number, "Junk after quoted string")
+ self.consume()
+ elif self.char() == "#":
+ self.state = self.comment_state
+ self.next_line_state = self.list_value_start_state
+ elif self.char() == eol:
+ self.next_line_state = self.list_value_start_state
+ self.state = self.eol_state
+ elif self.char() == ",":
+ raise ParseError(self.filename, self.line_number, "List item started with separator")
+ elif self.char() == "@":
+ self.state = self.list_value_atom_state
+ else:
+ self.state = self.list_value_state
+
+ def list_value_state(self):
+ rv = ""
+ spaces = 0
+ while True:
+ c = self.char()
+ if c == "\\":
+ escape = self.consume_escape()
+ rv += escape
+ elif c == eol:
+ raise ParseError(self.filename, self.line_number, "EOL in list value")
+ elif c == "#":
+ raise ParseError(self.filename, self.line_number, "EOL in list value (comment)")
+ elif c == ",":
+ self.state = self.list_value_start_state
+ self.consume()
+ break
+ elif c == " ":
+ spaces += 1
+ self.consume()
+ elif c == "]":
+ self.state = self.list_end_state
+ self.consume()
+ break
+ else:
+ rv += " " * spaces
+ spaces = 0
+ rv += c
+ self.consume()
+
+ if rv:
+ yield (token_types.string, decode(rv))
+
+ def list_value_atom_state(self):
+ self.consume()
+ for _, value in self.list_value_state():
+ yield token_types.atom, value
+
+ def list_end_state(self):
+ self.consume()
+ yield (token_types.list_end, "]")
+ self.state = self.line_end_state
+
+ def value_state(self):
+ self.skip_whitespace()
+ if self.char() in ("'", '"'):
+ quote_char = self.char()
+ self.consume()
+ yield (token_types.string, self.consume_string(quote_char))
+ if self.char() == "#":
+ self.state = self.comment_state
+ else:
+ self.state = self.line_end_state
+ elif self.char() == "@":
+ self.consume()
+ for _, value in self.value_inner_state():
+ yield token_types.atom, value
+ else:
+ self.state = self.value_inner_state
+
+ def value_inner_state(self):
+ rv = ""
+ spaces = 0
+ while True:
+ c = self.char()
+ if c == "\\":
+ rv += self.consume_escape()
+ elif c == "#":
+ self.state = self.comment_state
+ break
+ elif c == " ":
+ # prevent whitespace before comments from being included in the value
+ spaces += 1
+ self.consume()
+ elif c == eol:
+ self.state = self.line_end_state
+ break
+ else:
+ rv += " " * spaces
+ spaces = 0
+ rv += c
+ self.consume()
+ yield (token_types.string, decode(rv))
+
+ def comment_state(self):
+ while self.char() is not eol:
+ self.consume()
+ self.state = self.eol_state
+
+ def line_end_state(self):
+ self.skip_whitespace()
+ c = self.char()
+ if c == "#":
+ self.state = self.comment_state
+ elif c == eol:
+ self.state = self.eol_state
+ else:
+ raise ParseError(self.filename, self.line_number, "Junk before EOL %s" % c)
+
+ def consume_string(self, quote_char):
+ rv = ""
+ while True:
+ c = self.char()
+ if c == "\\":
+ rv += self.consume_escape()
+ elif c == quote_char:
+ self.consume()
+ break
+ elif c == eol:
+ raise ParseError(self.filename, self.line_number, "EOL in quoted string")
+ else:
+ rv += c
+ self.consume()
+
+ return decode(rv)
+
+ def expr_or_value_state(self):
+ if self.peek(3) == "if ":
+ self.state = self.expr_state
+ else:
+ self.state = self.value_state
+
+ def expr_state(self):
+ self.skip_whitespace()
+ c = self.char()
+ if c == eol:
+ raise ParseError(self.filename, self.line_number, "EOL in expression")
+ elif c in "'\"":
+ self.consume()
+ yield (token_types.string, self.consume_string(c))
+ elif c == "#":
+ raise ParseError(self.filename, self.line_number, "Comment before end of expression")
+ elif c == ":":
+ yield (token_types.separator, c)
+ self.consume()
+ self.state = self.value_state
+ elif c in parens:
+ self.consume()
+ yield (token_types.paren, c)
+ elif c in ("!", "="):
+ self.state = self.operator_state
+ elif c in digits:
+ self.state = self.digit_state
+ else:
+ self.state = self.ident_state
+
+ def operator_state(self):
+ # Only symbolic operators
+ index_0 = self.index
+ while True:
+ c = self.char()
+ if c == eol:
+ break
+ elif c in operator_chars:
+ self.consume()
+ else:
+ self.state = self.expr_state
+ break
+ yield (token_types.ident, self.line[index_0:self.index])
+
+ def digit_state(self):
+ index_0 = self.index
+ seen_dot = False
+ while True:
+ c = self.char()
+ if c == eol:
+ break
+ elif c in digits:
+ self.consume()
+ elif c == ".":
+ if seen_dot:
+ raise ParseError(self.filename, self.line_number, "Invalid number")
+ self.consume()
+ seen_dot = True
+ elif c in parens:
+ break
+ elif c in operator_chars:
+ break
+ elif c == " ":
+ break
+ elif c == ":":
+ break
+ else:
+ raise ParseError(self.filename, self.line_number, "Invalid character in number")
+
+ self.state = self.expr_state
+ yield (token_types.number, self.line[index_0:self.index])
+
+ def ident_state(self):
+ index_0 = self.index
+ while True:
+ c = self.char()
+ if c == eol:
+ break
+ elif c == ".":
+ break
+ elif c in parens:
+ break
+ elif c in operator_chars:
+ break
+ elif c == " ":
+ break
+ elif c == ":":
+ break
+ else:
+ self.consume()
+ self.state = self.expr_state
+ yield (token_types.ident, self.line[index_0:self.index])
+
+ def consume_escape(self):
+ assert self.char() == "\\"
+ self.consume()
+ c = self.char()
+ self.consume()
+ if c == "x":
+ return self.decode_escape(2)
+ elif c == "u":
+ return self.decode_escape(4)
+ elif c == "U":
+ return self.decode_escape(6)
+ elif c in ["a", "b", "f", "n", "r", "t", "v"]:
+ return eval("'\%s'" % c)
+ elif c is eol:
+ raise ParseError(self.filename, self.line_number, "EOL in escape")
+ else:
+ return c
+
+ def decode_escape(self, length):
+ value = 0
+ for i in xrange(length):
+ c = self.char()
+ value *= 16
+ value += self.escape_value(c)
+ self.consume()
+
+ return unichr(value).encode("utf8")
+
+ def escape_value(self, c):
+ if '0' <= c <= '9':
+ return ord(c) - ord('0')
+ elif 'a' <= c <= 'f':
+ return ord(c) - ord('a') + 10
+ elif 'A' <= c <= 'F':
+ return ord(c) - ord('A') + 10
+ else:
+ raise ParseError(self.filename, self.line_number, "Invalid character escape")
+
+
+class Parser(object):
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.token = None
+ self.unary_operators = "!"
+ self.binary_operators = frozenset(["&&", "||", "=="])
+ self.tokenizer = Tokenizer()
+ self.token_generator = None
+ self.tree = Treebuilder(DataNode(None))
+ self.expr_builder = None
+ self.expr_builders = []
+
+ def parse(self, input):
+ self.reset()
+ self.token_generator = self.tokenizer.tokenize(input)
+ self.consume()
+ self.manifest()
+ return self.tree.node
+
+ def consume(self):
+ self.token = self.token_generator.next()
+
+ def expect(self, type, value=None):
+ if self.token[0] != type:
+ raise ParseError
+ if value is not None:
+ if self.token[1] != value:
+ raise ParseError
+
+ self.consume()
+
+ def manifest(self):
+ self.data_block()
+ self.expect(token_types.eof)
+
+ def data_block(self):
+ while self.token[0] == token_types.string:
+ self.tree.append(KeyValueNode(self.token[1]))
+ self.consume()
+ self.expect(token_types.separator)
+ self.value_block()
+ self.tree.pop()
+
+ while self.token == (token_types.paren, "["):
+ self.consume()
+ if self.token[0] != token_types.string:
+ raise ParseError
+ self.tree.append(DataNode(self.token[1]))
+ self.consume()
+ self.expect(token_types.paren, "]")
+ if self.token[0] == token_types.group_start:
+ self.consume()
+ self.data_block()
+ self.eof_or_end_group()
+ self.tree.pop()
+
+ def eof_or_end_group(self):
+ if self.token[0] != token_types.eof:
+ self.expect(token_types.group_end)
+
+ def value_block(self):
+ if self.token[0] == token_types.list_start:
+ self.consume()
+ self.list_value()
+ elif self.token[0] == token_types.string:
+ self.value()
+ elif self.token[0] == token_types.group_start:
+ self.consume()
+ self.expression_values()
+ if self.token[0] == token_types.string:
+ self.value()
+ self.eof_or_end_group()
+ elif self.token[0] == token_types.atom:
+ self.atom()
+ else:
+ raise ParseError
+
+ def list_value(self):
+ self.tree.append(ListNode())
+ while self.token[0] in (token_types.atom, token_types.string):
+ if self.token[0] == token_types.atom:
+ self.atom()
+ else:
+ self.value()
+ self.expect(token_types.list_end)
+ self.tree.pop()
+
+ def expression_values(self):
+ while self.token == (token_types.ident, "if"):
+ self.consume()
+ self.tree.append(ConditionalNode())
+ self.expr_start()
+ self.expect(token_types.separator)
+ if self.token[0] == token_types.string:
+ self.value()
+ else:
+ raise ParseError
+ self.tree.pop()
+
+ def value(self):
+ self.tree.append(ValueNode(self.token[1]))
+ self.consume()
+ self.tree.pop()
+
+ def atom(self):
+ if self.token[1] not in atoms:
+ raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Unrecognised symbol @%s" % self.token[1])
+ self.tree.append(AtomNode(atoms[self.token[1]]))
+ self.consume()
+ self.tree.pop()
+
+ def expr_start(self):
+ self.expr_builder = ExpressionBuilder(self.tokenizer)
+ self.expr_builders.append(self.expr_builder)
+ self.expr()
+ expression = self.expr_builder.finish()
+ self.expr_builders.pop()
+ self.expr_builder = self.expr_builders[-1] if self.expr_builders else None
+ if self.expr_builder:
+ self.expr_builder.operands[-1].children[-1].append(expression)
+ else:
+ self.tree.append(expression)
+ self.tree.pop()
+
+ def expr(self):
+ self.expr_operand()
+ while (self.token[0] == token_types.ident and self.token[1] in binary_operators):
+ self.expr_bin_op()
+ self.expr_operand()
+
+ def expr_operand(self):
+ if self.token == (token_types.paren, "("):
+ self.consume()
+ self.expr_builder.left_paren()
+ self.expr()
+ self.expect(token_types.paren, ")")
+ self.expr_builder.right_paren()
+ elif self.token[0] == token_types.ident and self.token[1] in unary_operators:
+ self.expr_unary_op()
+ self.expr_operand()
+ elif self.token[0] in [token_types.string, token_types.ident]:
+ self.expr_value()
+ elif self.token[0] == token_types.number:
+ self.expr_number()
+ else:
+ raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Unrecognised operand")
+
+ def expr_unary_op(self):
+ if self.token[1] in unary_operators:
+ self.expr_builder.push_operator(UnaryOperatorNode(self.token[1]))
+ self.consume()
+ else:
+ raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Expected unary operator")
+
+ def expr_bin_op(self):
+ if self.token[1] in binary_operators:
+ self.expr_builder.push_operator(BinaryOperatorNode(self.token[1]))
+ self.consume()
+ else:
+ raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Expected binary operator")
+
+ def expr_value(self):
+ node_type = {token_types.string: StringNode,
+ token_types.ident: VariableNode}[self.token[0]]
+ self.expr_builder.push_operand(node_type(self.token[1]))
+ self.consume()
+ if self.token == (token_types.paren, "["):
+ self.consume()
+ self.expr_builder.operands[-1].append(IndexNode())
+ self.expr_start()
+ self.expect(token_types.paren, "]")
+
+ def expr_number(self):
+ self.expr_builder.push_operand(NumberNode(self.token[1]))
+ self.consume()
+
+
+class Treebuilder(object):
+ def __init__(self, root):
+ self.root = root
+ self.node = root
+
+ def append(self, node):
+ self.node.append(node)
+ self.node = node
+ return node
+
+ def pop(self):
+ node = self.node
+ self.node = self.node.parent
+ return node
+
+
+class ExpressionBuilder(object):
+ def __init__(self, tokenizer):
+ self.operands = []
+ self.operators = [None]
+ self.tokenizer = tokenizer
+
+ def finish(self):
+ while self.operators[-1] is not None:
+ self.pop_operator()
+ rv = self.pop_operand()
+ assert self.is_empty()
+ return rv
+
+ def left_paren(self):
+ self.operators.append(None)
+
+ def right_paren(self):
+ while self.operators[-1] is not None:
+ self.pop_operator()
+ if not self.operators:
+ raise ParseError(self.tokenizer.filename, self.tokenizer.line,
+ "Unbalanced parens")
+
+ assert self.operators.pop() is None
+
+ def push_operator(self, operator):
+ assert operator is not None
+ while self.precedence(self.operators[-1]) > self.precedence(operator):
+ self.pop_operator()
+
+ self.operators.append(operator)
+
+ def pop_operator(self):
+ operator = self.operators.pop()
+ if isinstance(operator, BinaryOperatorNode):
+ operand_1 = self.operands.pop()
+ operand_0 = self.operands.pop()
+ self.operands.append(BinaryExpressionNode(operator, operand_0, operand_1))
+ else:
+ operand_0 = self.operands.pop()
+ self.operands.append(UnaryExpressionNode(operator, operand_0))
+
+ def push_operand(self, node):
+ self.operands.append(node)
+
+ def pop_operand(self):
+ return self.operands.pop()
+
+ def is_empty(self):
+ return len(self.operands) == 0 and all(item is None for item in self.operators)
+
+ def precedence(self, operator):
+ if operator is None:
+ return 0
+ return precedence(operator)
+
+
+def parse(stream):
+ p = Parser()
+ return p.parse(stream)