1 files changed, 57 insertions, 0 deletions
diff --git a/testing/web-platform/tests/2dcontext/tools/specextract.py b/testing/web-platform/tests/2dcontext/tools/specextract.py
new file mode 100644
index 000000000..042c0bd84
--- /dev/null
+++ b/testing/web-platform/tests/2dcontext/tools/specextract.py
@@ -0,0 +1,57 @@
+import html5lib
+import html5lib.treebuilders.dom
+
+# Expected use:
+#   curl --compressed http://www.whatwg.org/specs/web-apps/current-work/ >current-work
+#   python specextract.py
+#
+# Generates current-work-canvas.xhtml, for use by gentest.py to create the annotated spec document
+
+def extract():
+    parser = html5lib.html5parser.HTMLParser(tree=html5lib.treebuilders.dom.TreeBuilder)
+    doc = parser.parse(open('current-work', "r"), encoding='utf-8')
+
+    head = doc.getElementsByTagName('head')[0]
+    for n in head.childNodes:
+        if n.tagName == 'script':
+            head.removeChild(n)
+
+    header = doc.getElementsByTagName('header')[0]
+    #thecanvas = doc.getElementById('the-canvas') # doesn't work (?!)
+    thecanvas = [ n for n in doc.getElementsByTagName('h4') if n.getAttribute('id') == 'the-canvas-element' ][0]
+
+    keep = [header, thecanvas]
+    node = thecanvas.nextSibling
+    while node.nodeName != 'h4':
+        keep.append(node)
+        node = node.nextSibling
+    p = thecanvas.parentNode
+    for n in p.childNodes[:]:
+        if n not in keep:
+            p.removeChild(n)
+
+    for n in header.childNodes[3:-4]:
+        header.removeChild(n)
+
+    def make_absolute(uri):
+        if uri.startswith('data:'):
+            return uri
+        elif uri[0] == '/':
+            return 'http://www.whatwg.org' + uri
+        else:
+            return 'http://www.whatwg.org/specs/web-apps/current-work/' + uri
+
+    # Fix the stylesheet, icon and image references
+    for e in doc.getElementsByTagName('link'):
+        e.setAttribute('href', make_absolute(e.getAttribute('href')))
+    for img in doc.getElementsByTagName('img'):
+        img.setAttribute('src', make_absolute(img.getAttribute('src')))
+
+    # Convert to XHTML, because it's quicker to re-parse than HTML5
+    doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/1999/xhtml')
+    doc.documentElement.setAttribute('xml:lang', doc.documentElement.getAttribute('lang'))
+    doc.removeChild(doc.firstChild) # remove the DOCTYPE
+
+    open('current-work-canvas.xhtml', 'w').write(doc.toxml(encoding = 'UTF-8'))
+
+extract()