diff options
Diffstat (limited to 'testing/web-platform/tests/2dcontext/tools/specextract.py')
-rw-r--r-- | testing/web-platform/tests/2dcontext/tools/specextract.py | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/testing/web-platform/tests/2dcontext/tools/specextract.py b/testing/web-platform/tests/2dcontext/tools/specextract.py new file mode 100644 index 000000000..042c0bd84 --- /dev/null +++ b/testing/web-platform/tests/2dcontext/tools/specextract.py @@ -0,0 +1,57 @@ +import html5lib +import html5lib.treebuilders.dom + +# Expected use: +# curl --compressed http://www.whatwg.org/specs/web-apps/current-work/ >current-work +# python specextract.py +# +# Generates current-work-canvas.xhtml, for use by gentest.py to create the annotated spec document + +def extract(): + parser = html5lib.html5parser.HTMLParser(tree=html5lib.treebuilders.dom.TreeBuilder) + doc = parser.parse(open('current-work', "r"), encoding='utf-8') + + head = doc.getElementsByTagName('head')[0] + for n in head.childNodes: + if n.tagName == 'script': + head.removeChild(n) + + header = doc.getElementsByTagName('header')[0] + #thecanvas = doc.getElementById('the-canvas') # doesn't work (?!) + thecanvas = [ n for n in doc.getElementsByTagName('h4') if n.getAttribute('id') == 'the-canvas-element' ][0] + + keep = [header, thecanvas] + node = thecanvas.nextSibling + while node.nodeName != 'h4': + keep.append(node) + node = node.nextSibling + p = thecanvas.parentNode + for n in p.childNodes[:]: + if n not in keep: + p.removeChild(n) + + for n in header.childNodes[3:-4]: + header.removeChild(n) + + def make_absolute(uri): + if uri.startswith('data:'): + return uri + elif uri[0] == '/': + return 'http://www.whatwg.org' + uri + else: + return 'http://www.whatwg.org/specs/web-apps/current-work/' + uri + + # Fix the stylesheet, icon and image references + for e in doc.getElementsByTagName('link'): + e.setAttribute('href', make_absolute(e.getAttribute('href'))) + for img in doc.getElementsByTagName('img'): + img.setAttribute('src', make_absolute(img.getAttribute('src'))) + + # Convert to XHTML, because it's quicker to re-parse than HTML5 + doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/1999/xhtml') + doc.documentElement.setAttribute('xml:lang', doc.documentElement.getAttribute('lang')) + doc.removeChild(doc.firstChild) # remove the DOCTYPE + + open('current-work-canvas.xhtml', 'w').write(doc.toxml(encoding = 'UTF-8')) + +extract() |