1 files changed, 426 insertions, 0 deletions
diff --git a/testing/web-platform/tests/tools/lint/lint.py b/testing/web-platform/tests/tools/lint/lint.py
new file mode 100644
index 000000000..2aee3da1c
--- /dev/null
+++ b/testing/web-platform/tests/tools/lint/lint.py
@@ -0,0 +1,426 @@
+from __future__ import print_function, unicode_literals
+
+import abc
+import argparse
+import ast
+import fnmatch
+import json
+import os
+import re
+import subprocess
+import sys
+
+from collections import defaultdict
+
+from ..localpaths import repo_root
+
+from manifest.sourcefile import SourceFile
+from six import iteritems, itervalues
+from six.moves import range
+
+here = os.path.abspath(os.path.split(__file__)[0])
+
+ERROR_MSG = """You must fix all errors; for details on how to fix them, see
+https://github.com/w3c/web-platform-tests/blob/master/docs/lint-tool.md
+
+However, instead of fixing a particular error, it's sometimes
+OK to add a line to the lint.whitelist file in the root of the
+web-platform-tests directory to make the lint tool ignore it.
+
+For example, to make the lint tool ignore all '%s'
+errors in the %s file,
+you could add the following line to the lint.whitelist file.
+
+%s:%s"""
+
+def all_git_paths(repo_root):
+    command_line = ["git", "ls-tree", "-r", "--name-only", "HEAD"]
+    output = subprocess.check_output(command_line, cwd=repo_root)
+    for item in output.split("\n"):
+        yield item
+
+
+def check_path_length(repo_root, path):
+    if len(path) + 1 > 150:
+        return [("PATH LENGTH", "/%s longer than maximum path length (%d > 150)" % (path, len(path) + 1), None)]
+    return []
+
+
+def parse_whitelist(f):
+    """
+    Parse the whitelist file given by `f`, and return the parsed structure.
+    """
+
+    data = defaultdict(lambda:defaultdict(set))
+    ignored_files = set()
+
+    for line in f:
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        parts = [item.strip() for item in line.split(":")]
+        if len(parts) == 2:
+            parts.append(None)
+        else:
+            parts[-1] = int(parts[-1])
+
+        error_type, file_match, line_number = parts
+
+        if error_type == "*":
+            ignored_files.add(file_match)
+        else:
+            data[file_match][error_type].add(line_number)
+
+    return data, ignored_files
+
+
+def filter_whitelist_errors(data, path, errors):
+    """
+    Filter out those errors that are whitelisted in `data`.
+    """
+
+    whitelisted = [False for item in range(len(errors))]
+
+    for file_match, whitelist_errors in iteritems(data):
+        if fnmatch.fnmatch(path, file_match):
+            for i, (error_type, msg, path, line) in enumerate(errors):
+                if error_type in whitelist_errors:
+                    allowed_lines = whitelist_errors[error_type]
+                    if None in allowed_lines or line in allowed_lines:
+                        whitelisted[i] = True
+
+    return [item for i, item in enumerate(errors) if not whitelisted[i]]
+
+class Regexp(object):
+    pattern = None
+    file_extensions = None
+    error = None
+    _re = None
+
+    def __init__(self):
+        self._re = re.compile(self.pattern)
+
+    def applies(self, path):
+        return (self.file_extensions is None or
+                os.path.splitext(path)[1] in self.file_extensions)
+
+    def search(self, line):
+        return self._re.search(line)
+
+class TrailingWhitespaceRegexp(Regexp):
+    pattern = b"[ \t\f\v]$"
+    error = "TRAILING WHITESPACE"
+    description = "Whitespace at EOL"
+
+class TabsRegexp(Regexp):
+    pattern = b"^\t"
+    error = "INDENT TABS"
+    description = "Tabs used for indentation"
+
+class CRRegexp(Regexp):
+    pattern = b"\r$"
+    error = "CR AT EOL"
+    description = "CR character in line separator"
+
+class W3CTestOrgRegexp(Regexp):
+    pattern = b"w3c\-test\.org"
+    error = "W3C-TEST.ORG"
+    description = "External w3c-test.org domain used"
+
+class Webidl2Regexp(Regexp):
+    pattern = b"webidl2\.js"
+    error = "WEBIDL2.JS"
+    description = "Legacy webidl2.js script used"
+
+class ConsoleRegexp(Regexp):
+    pattern = b"console\.[a-zA-Z]+\s*\("
+    error = "CONSOLE"
+    file_extensions = [".html", ".htm", ".js", ".xht", ".xhtml", ".svg"]
+    description = "Console logging API used"
+
+class PrintRegexp(Regexp):
+    pattern = b"print(?:\s|\s*\()"
+    error = "PRINT STATEMENT"
+    file_extensions = [".py"]
+    description = "Print function used"
+
+regexps = [item() for item in
+           [TrailingWhitespaceRegexp,
+            TabsRegexp,
+            CRRegexp,
+            W3CTestOrgRegexp,
+            Webidl2Regexp,
+            ConsoleRegexp,
+            PrintRegexp]]
+
+def check_regexp_line(repo_root, path, f):
+    errors = []
+
+    applicable_regexps = [regexp for regexp in regexps if regexp.applies(path)]
+
+    for i, line in enumerate(f):
+        for regexp in applicable_regexps:
+            if regexp.search(line):
+                errors.append((regexp.error, regexp.description, path, i+1))
+
+    return errors
+
+def check_parsed(repo_root, path, f):
+    source_file = SourceFile(repo_root, path, "/", contents=f.read())
+
+    errors = []
+
+    if source_file.name_is_non_test or source_file.name_is_manual:
+        return []
+
+    if source_file.markup_type is None:
+        return []
+
+    if source_file.root is None:
+        return [("PARSE-FAILED", "Unable to parse file", path, None)]
+
+    if len(source_file.timeout_nodes) > 1:
+        errors.append(("MULTIPLE-TIMEOUT", "More than one meta name='timeout'", path, None))
+
+    for timeout_node in source_file.timeout_nodes:
+        timeout_value = timeout_node.attrib.get("content", "").lower()
+        if timeout_value != "long":
+            errors.append(("INVALID-TIMEOUT", "Invalid timeout value %s" % timeout_value, path, None))
+
+    if source_file.testharness_nodes:
+        if len(source_file.testharness_nodes) > 1:
+            errors.append(("MULTIPLE-TESTHARNESS",
+                           "More than one <script src='/resources/testharness.js'>", path, None))
+
+        testharnessreport_nodes = source_file.root.findall(".//{http://www.w3.org/1999/xhtml}script[@src='/resources/testharnessreport.js']")
+        if not testharnessreport_nodes:
+            errors.append(("MISSING-TESTHARNESSREPORT",
+                           "Missing <script src='/resources/testharnessreport.js'>", path, None))
+        else:
+            if len(testharnessreport_nodes) > 1:
+                errors.append(("MULTIPLE-TESTHARNESSREPORT",
+                               "More than one <script src='/resources/testharnessreport.js'>", path, None))
+
+        testharnesscss_nodes = source_file.root.findall(".//{http://www.w3.org/1999/xhtml}link[@href='/resources/testharness.css']")
+        if testharnesscss_nodes:
+            errors.append(("PRESENT-TESTHARNESSCSS",
+                           "Explicit link to testharness.css present", path, None))
+
+        for element in source_file.variant_nodes:
+            if "content" not in element.attrib:
+                errors.append(("VARIANT-MISSING",
+                               "<meta name=variant> missing 'content' attribute", path, None))
+            else:
+                variant = element.attrib["content"]
+                if variant != "" and variant[0] not in ("?", "#"):
+                    errors.append(("MALFORMED-VARIANT",
+                               "%s <meta name=variant> 'content' attribute must be the empty string or start with '?' or '#'" % path, None))
+
+        seen_elements = {"timeout": False,
+                         "testharness": False,
+                         "testharnessreport": False}
+        required_elements = [key for key, value in {"testharness": True,
+                                                    "testharnessreport": len(testharnessreport_nodes) > 0,
+                                                    "timeout": len(source_file.timeout_nodes) > 0}.items()
+                             if value]
+
+        for elem in source_file.root.iter():
+            if source_file.timeout_nodes and elem == source_file.timeout_nodes[0]:
+                seen_elements["timeout"] = True
+                if seen_elements["testharness"]:
+                    errors.append(("LATE-TIMEOUT",
+                                   "<meta name=timeout> seen after testharness.js script", path, None))
+
+            elif elem == source_file.testharness_nodes[0]:
+                seen_elements["testharness"] = True
+
+            elif testharnessreport_nodes and elem == testharnessreport_nodes[0]:
+                seen_elements["testharnessreport"] = True
+                if not seen_elements["testharness"]:
+                    errors.append(("EARLY-TESTHARNESSREPORT",
+                                   "testharnessreport.js script seen before testharness.js script", path, None))
+
+            if all(seen_elements[name] for name in required_elements):
+                break
+
+
+    for element in source_file.root.findall(".//{http://www.w3.org/1999/xhtml}script[@src]"):
+        src = element.attrib["src"]
+        for name in ["testharness", "testharnessreport"]:
+            if "%s.js" % name == src or ("/%s.js" % name in src and src != "/resources/%s.js" % name):
+                errors.append(("%s-PATH" % name.upper(), "%s.js script seen with incorrect path" % name, path, None))
+
+
+    return errors
+
+class ASTCheck(object):
+    __metaclass__ = abc.ABCMeta
+    error = None
+    description = None
+
+    @abc.abstractmethod
+    def check(self, root):
+        pass
+
+class OpenModeCheck(ASTCheck):
+    error = "OPEN-NO-MODE"
+    description = "File opened without providing an explicit mode (note: binary files must be read with 'b' in the mode flags)"
+
+    def check(self, root):
+        errors = []
+        for node in ast.walk(root):
+            if isinstance(node, ast.Call):
+                if hasattr(node.func, "id") and node.func.id in ("open", "file"):
+                    if (len(node.args) < 2 and
+                        all(item.arg != "mode" for item in node.keywords)):
+                        errors.append(node.lineno)
+        return errors
+
+ast_checkers = [item() for item in [OpenModeCheck]]
+
+def check_python_ast(repo_root, path, f):
+    if not path.endswith(".py"):
+        return []
+
+    try:
+        root = ast.parse(f.read())
+    except SyntaxError as e:
+        return [("PARSE-FAILED", "Unable to parse file", path, e.lineno)]
+
+    errors = []
+    for checker in ast_checkers:
+        for lineno in checker.check(root):
+            errors.append((checker.error, checker.description, path, lineno))
+    return errors
+
+
+def check_path(repo_root, path):
+    """
+    Runs lints that check the file path.
+
+    :param repo_root: the repository root
+    :param path: the path of the file within the repository
+    :returns: a list of errors found in ``path``
+    """
+
+    errors = []
+    for path_fn in path_lints:
+        errors.extend(path_fn(repo_root, path))
+    return errors
+
+
+def check_file_contents(repo_root, path, f):
+    """
+    Runs lints that check the file contents.
+
+    :param repo_root: the repository root
+    :param path: the path of the file within the repository
+    :param f: a file-like object with the file contents
+    :returns: a list of errors found in ``f``
+    """
+
+    errors = []
+    for file_fn in file_lints:
+        errors.extend(file_fn(repo_root, path, f))
+        f.seek(0)
+    return errors
+
+
+def output_errors_text(errors):
+    for error_type, description, path, line_number in errors:
+        pos_string = path
+        if line_number:
+            pos_string += " %s" % line_number
+        print("%s: %s %s" % (error_type, pos_string, description))
+
+def output_errors_json(errors):
+    for error_type, error, path, line_number in errors:
+        print(json.dumps({"path": path, "lineno": line_number,
+                          "rule": error_type, "message": error}))
+
+def output_error_count(error_count):
+    if not error_count:
+        return
+
+    by_type = " ".join("%s: %d" % item for item in error_count.items())
+    count = sum(error_count.values())
+    if count == 1:
+        print("There was 1 error (%s)" % (by_type,))
+    else:
+        print("There were %d errors (%s)" % (count, by_type))
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("paths", nargs="*",
+                        help="List of paths to lint")
+    parser.add_argument("--json", action="store_true",
+                        help="Output machine-readable JSON format")
+    return parser.parse_args()
+
+def main():
+    args = parse_args()
+    paths = args.paths if args.paths else all_git_paths(repo_root)
+    return lint(repo_root, paths, args.json)
+
+def lint(repo_root, paths, output_json):
+    error_count = defaultdict(int)
+    last = None
+
+    with open(os.path.join(repo_root, "lint.whitelist")) as f:
+        whitelist, ignored_files = parse_whitelist(f)
+
+    if output_json:
+        output_errors = output_errors_json
+    else:
+        output_errors = output_errors_text
+
+    def process_errors(path, errors):
+        """
+        Filters and prints the errors, and updates the ``error_count`` object.
+
+        :param path: the path of the file that contains the errors
+        :param errors: a list of error tuples (error type, message, path, line number)
+        :returns: ``None`` if there were no errors, or
+                  a tuple of the error type and the path otherwise
+        """
+
+        errors = filter_whitelist_errors(whitelist, path, errors)
+
+        if not errors:
+            return None
+
+        output_errors(errors)
+        for error_type, error, path, line in errors:
+            error_count[error_type] += 1
+
+        return (errors[-1][0], path)
+
+    for path in paths:
+        abs_path = os.path.join(repo_root, path)
+        if not os.path.exists(abs_path):
+            continue
+
+        if any(fnmatch.fnmatch(path, file_match) for file_match in ignored_files):
+            continue
+
+        errors = check_path(repo_root, path)
+        last = process_errors(path, errors) or last
+
+        if not os.path.isdir(abs_path):
+            with open(abs_path, 'rb') as f:
+                errors = check_file_contents(repo_root, path, f)
+                last = process_errors(path, errors) or last
+
+    if not output_json:
+        output_error_count(error_count)
+        if error_count:
+            print(ERROR_MSG % (last[0], last[1], last[0], last[1]))
+    return sum(itervalues(error_count))
+
+path_lints = [check_path_length]
+file_lints = [check_regexp_line, check_parsed, check_python_ast]
+
+if __name__ == "__main__":
+    error_count = main()
+    if error_count > 0:
+        sys.exit(1)