diff options
author | Matt A. Tobin <email@mattatobin.com> | 2020-01-15 14:56:04 -0500 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2020-01-15 14:56:04 -0500 |
commit | 6168dbe21f5f83b906e562ea0ab232d499b275a6 (patch) | |
tree | 658a4b27554c85ebcaad655fc83f2c2bb99e8e80 /parser/html/java/htmlparser/gwt-src/nu | |
parent | 09314667a692fedff8564fc347c8a3663474faa6 (diff) | |
download | UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.gz UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.lz UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.xz UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.zip |
Add java htmlparser sources that match the original 52-level state
https://hg.mozilla.org/projects/htmlparser/
Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d
Diffstat (limited to 'parser/html/java/htmlparser/gwt-src/nu')
8 files changed, 1139 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml new file mode 100644 index 000000000..1eab09c21 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml @@ -0,0 +1,12 @@ +<module> + <inherits name="com.google.gwt.core.Core"/> + <inherits name="com.google.gwt.user.User"/> + <super-source path="translatable"/> + <source path="annotation"/> + <source path="common"/> + <source path="impl"/> + <source path="gwt"/> + <set-property name="user.agent" value="gecko1_8"/> + <entry-point class="nu.validator.htmlparser.gwt.HtmlParserModule"/> + <add-linker name="sso"/> +</module> diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java new file mode 100644 index 000000000..29ef2a43a --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import java.util.LinkedList; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.impl.CoalescingTreeBuilder; +import nu.validator.htmlparser.impl.HtmlAttributes; + +import org.xml.sax.SAXException; + +import com.google.gwt.core.client.JavaScriptException; +import com.google.gwt.core.client.JavaScriptObject; + +class BrowserTreeBuilder extends CoalescingTreeBuilder<JavaScriptObject> { + + private JavaScriptObject document; + + private JavaScriptObject script; + + private JavaScriptObject placeholder; + + private boolean readyToRun; + + private final LinkedList<ScriptHolder> scriptStack = new LinkedList<ScriptHolder>(); + + private class ScriptHolder { + private final JavaScriptObject script; + + private final JavaScriptObject placeholder; + + /** + * @param script + * @param placeholder + */ + public ScriptHolder(JavaScriptObject script, + JavaScriptObject placeholder) { + this.script = script; + this.placeholder = placeholder; + } + + /** + * Returns the script. + * + * @return the script + */ + public JavaScriptObject getScript() { + return script; + } + + /** + * Returns the placeholder. + * + * @return the placeholder + */ + public JavaScriptObject getPlaceholder() { + return placeholder; + } + } + + protected BrowserTreeBuilder(JavaScriptObject document) { + super(); + this.document = document; + installExplorerCreateElementNS(document); + } + + private static native boolean installExplorerCreateElementNS( + JavaScriptObject doc) /*-{ + if (!doc.createElementNS) { + doc.createElementNS = function (uri, local) { + if ("http://www.w3.org/1999/xhtml" == uri) { + return doc.createElement(local); + } else if ("http://www.w3.org/1998/Math/MathML" == uri) { + if (!doc.mathplayerinitialized) { + var obj = document.createElement("object"); + obj.setAttribute("id", "mathplayer"); + obj.setAttribute("classid", "clsid:32F66A20-7614-11D4-BD11-00104BD3F987"); + document.getElementsByTagName("head")[0].appendChild(obj); + document.namespaces.add("m", "http://www.w3.org/1998/Math/MathML", "#mathplayer"); + doc.mathplayerinitialized = true; + } + return doc.createElement("m:" + local); + } else if ("http://www.w3.org/2000/svg" == uri) { + if (!doc.renesisinitialized) { + var obj = document.createElement("object"); + obj.setAttribute("id", "renesis"); + obj.setAttribute("classid", "clsid:AC159093-1683-4BA2-9DCF-0C350141D7F2"); + document.getElementsByTagName("head")[0].appendChild(obj); + document.namespaces.add("s", "http://www.w3.org/2000/svg", "#renesis"); + doc.renesisinitialized = true; + } + return doc.createElement("s:" + local); + } else { + // throw + } + } + } + }-*/; + + private static native boolean hasAttributeNS(JavaScriptObject element, + String uri, String localName) /*-{ + return element.hasAttributeNS(uri, localName); + }-*/; + + private static native void setAttributeNS(JavaScriptObject element, + String uri, String localName, String value) /*-{ + element.setAttributeNS(uri, localName, value); + }-*/; + + @Override protected void addAttributesToElement(JavaScriptObject element, + HtmlAttributes attributes) throws SAXException { + try { + for (int i = 0; i < attributes.getLength(); i++) { + String localName = attributes.getLocalNameNoBoundsCheck(i); + String uri = attributes.getURINoBoundsCheck(i); + if (!hasAttributeNS(element, uri, localName)) { + setAttributeNS(element, uri, localName, + attributes.getValueNoBoundsCheck(i)); + } + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native void appendChild(JavaScriptObject parent, + JavaScriptObject child) /*-{ + parent.appendChild(child); + }-*/; + + private static native JavaScriptObject createTextNode(JavaScriptObject doc, + String text) /*-{ + return doc.createTextNode(text); + }-*/; + + private static native JavaScriptObject getLastChild(JavaScriptObject node) /*-{ + return node.lastChild; + }-*/; + + private static native void extendTextNode(JavaScriptObject node, String text) /*-{ + node.data += text; + }-*/; + + @Override protected void appendCharacters(JavaScriptObject parent, + String text) throws SAXException { + try { + if (parent == placeholder) { + appendChild(script, createTextNode(document, text)); + + } + JavaScriptObject lastChild = getLastChild(parent); + if (lastChild != null && getNodeType(lastChild) == 3) { + extendTextNode(lastChild, text); + return; + } + appendChild(parent, createTextNode(document, text)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native boolean hasChildNodes(JavaScriptObject element) /*-{ + return element.hasChildNodes(); + }-*/; + + private static native JavaScriptObject getFirstChild( + JavaScriptObject element) /*-{ + return element.firstChild; + }-*/; + + @Override protected void appendChildrenToNewParent( + JavaScriptObject oldParent, JavaScriptObject newParent) + throws SAXException { + try { + while (hasChildNodes(oldParent)) { + appendChild(newParent, getFirstChild(oldParent)); + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native JavaScriptObject createComment(JavaScriptObject doc, + String text) /*-{ + return doc.createComment(text); + }-*/; + + @Override protected void appendComment(JavaScriptObject parent, + String comment) throws SAXException { + try { + if (parent == placeholder) { + appendChild(script, createComment(document, comment)); + } + appendChild(parent, createComment(document, comment)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected void appendCommentToDocument(String comment) + throws SAXException { + try { + appendChild(document, createComment(document, comment)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native JavaScriptObject createElementNS( + JavaScriptObject doc, String ns, String local) /*-{ + return doc.createElementNS(ns, local); + }-*/; + + @Override protected JavaScriptObject createElement(String ns, String name, + HtmlAttributes attributes) throws SAXException { + try { + JavaScriptObject rv = createElementNS(document, ns, name); + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + + if ("script" == name) { + if (placeholder != null) { + scriptStack.addLast(new ScriptHolder(script, placeholder)); + } + script = rv; + placeholder = createElementNS(document, + "http://n.validator.nu/placeholder/", "script"); + rv = placeholder; + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + } + + return rv; + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override protected JavaScriptObject createHtmlElementSetAsRoot( + HtmlAttributes attributes) throws SAXException { + try { + JavaScriptObject rv = createElementNS(document, + "http://www.w3.org/1999/xhtml", "html"); + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + appendChild(document, rv); + return rv; + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + private static native JavaScriptObject getParentNode( + JavaScriptObject element) /*-{ + return element.parentNode; + }-*/; + + @Override protected void appendElement(JavaScriptObject child, + JavaScriptObject newParent) throws SAXException { + try { + if (newParent == placeholder) { + appendChild(script, cloneNodeDeep(child)); + } + appendChild(newParent, child); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected boolean hasChildren(JavaScriptObject element) + throws SAXException { + try { + return hasChildNodes(element); + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + private static native void insertBeforeNative(JavaScriptObject parent, + JavaScriptObject child, JavaScriptObject sibling) /*-{ + parent.insertBefore(child, sibling); + }-*/; + + private static native int getNodeType(JavaScriptObject node) /*-{ + return node.nodeType; + }-*/; + + private static native JavaScriptObject cloneNodeDeep(JavaScriptObject node) /*-{ + return node.cloneNode(true); + }-*/; + + /** + * Returns the document. + * + * @return the document + */ + JavaScriptObject getDocument() { + JavaScriptObject rv = document; + document = null; + return rv; + } + + private static native JavaScriptObject createDocumentFragment( + JavaScriptObject doc) /*-{ + return doc.createDocumentFragment(); + }-*/; + + JavaScriptObject getDocumentFragment() { + JavaScriptObject rv = createDocumentFragment(document); + JavaScriptObject rootElt = getFirstChild(document); + while (hasChildNodes(rootElt)) { + appendChild(rv, getFirstChild(rootElt)); + } + document = null; + return rv; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#createJavaScriptObject(String, + * java.lang.String, org.xml.sax.Attributes, java.lang.Object) + */ + @Override protected JavaScriptObject createElement(String ns, String name, + HtmlAttributes attributes, JavaScriptObject form) + throws SAXException { + try { + JavaScriptObject rv = createElement(ns, name, attributes); + // rv.setUserData("nu.validator.form-pointer", form, null); + return rv; + } catch (JavaScriptException e) { + fatal(e); + return null; + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override protected void start(boolean fragment) throws SAXException { + script = null; + placeholder = null; + readyToRun = false; + } + + protected void documentMode(DocumentMode mode, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + // document.setUserData("nu.validator.document-mode", mode, null); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String, + * java.lang.String, java.lang.Object) + */ + @Override protected void elementPopped(String ns, String name, + JavaScriptObject node) throws SAXException { + if (node == placeholder) { + readyToRun = true; + requestSuspension(); + } + } + + private static native void replace(JavaScriptObject oldNode, + JavaScriptObject newNode) /*-{ + oldNode.parentNode.replaceChild(newNode, oldNode); + }-*/; + + private static native JavaScriptObject getPreviousSibling(JavaScriptObject node) /*-{ + return node.previousSibling; + }-*/; + + void maybeRunScript() { + if (readyToRun) { + readyToRun = false; + replace(placeholder, script); + if (scriptStack.isEmpty()) { + script = null; + placeholder = null; + } else { + ScriptHolder scriptHolder = scriptStack.removeLast(); + script = scriptHolder.getScript(); + placeholder = scriptHolder.getPlaceholder(); + } + } + } + + @Override protected void insertFosterParentedCharacters(String text, + JavaScriptObject table, JavaScriptObject stackParent) + throws SAXException { + try { + JavaScriptObject parent = getParentNode(table); + if (parent != null) { // always an element if not null + JavaScriptObject previousSibling = getPreviousSibling(table); + if (previousSibling != null + && getNodeType(previousSibling) == 3) { + extendTextNode(previousSibling, text); + return; + } + insertBeforeNative(parent, createTextNode(document, text), table); + return; + } + JavaScriptObject lastChild = getLastChild(stackParent); + if (lastChild != null && getNodeType(lastChild) == 3) { + extendTextNode(lastChild, text); + return; + } + appendChild(stackParent, createTextNode(document, text)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected void insertFosterParentedChild(JavaScriptObject child, + JavaScriptObject table, JavaScriptObject stackParent) + throws SAXException { + JavaScriptObject parent = getParentNode(table); + try { + if (parent != null && getNodeType(parent) == 1) { + insertBeforeNative(parent, child, table); + } else { + appendChild(stackParent, child); + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native void removeChild(JavaScriptObject parent, + JavaScriptObject child) /*-{ + parent.removeChild(child); + }-*/; + + @Override protected void detachFromParent(JavaScriptObject element) + throws SAXException { + try { + JavaScriptObject parent = getParentNode(element); + if (parent != null) { + removeChild(parent, element); + } + } catch (JavaScriptException e) { + fatal(e); + } + } +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java new file mode 100644 index 000000000..1d71cdfd6 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import java.util.LinkedList; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.impl.UTF16Buffer; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import com.google.gwt.core.client.JavaScriptObject; +import com.google.gwt.user.client.Timer; + +/** + * This class implements an HTML5 parser that exposes data through the DOM + * interface. + * + * <p>By default, when using the constructor without arguments, the + * this parser treats XML 1.0-incompatible infosets as fatal errors. + * This corresponds to + * <code>FATAL</code> as the general XML violation policy. To make the parser + * support non-conforming HTML fully per the HTML 5 spec while on the other + * hand potentially violating the DOM API contract, set the general XML + * violation policy to <code>ALLOW</code>. This does not work with a standard + * DOM implementation. Handling all input without fatal errors and without + * violating the DOM API contract is possible by setting + * the general XML violation policy to <code>ALTER_INFOSET</code>. <em>This + * makes the parser non-conforming</em> but is probably the most useful + * setting for most applications. + * + * <p>The doctype is not represented in the tree. + * + * <p>The document mode is represented as user data <code>DocumentMode</code> + * object with the key <code>nu.validator.document-mode</code> on the document + * node. + * + * <p>The form pointer is also stored as user data with the key + * <code>nu.validator.form-pointer</code>. + * + * @version $Id: HtmlDocumentBuilder.java 255 2008-05-29 08:57:38Z hsivonen $ + * @author hsivonen + */ +public class HtmlParser { + + private static final int CHUNK_SIZE = 512; + + private final Tokenizer tokenizer; + + private final BrowserTreeBuilder domTreeBuilder; + + private final StringBuilder documentWriteBuffer = new StringBuilder(); + + private ErrorHandler errorHandler; + + private UTF16Buffer stream; + + private int streamLength; + + private boolean lastWasCR; + + private boolean ending; + + private ParseEndListener parseEndListener; + + private final LinkedList<UTF16Buffer> bufferStack = new LinkedList<UTF16Buffer>(); + + /** + * Instantiates the parser + * + * @param implementation + * the DOM implementation + * @param xmlPolicy the policy + */ + public HtmlParser(JavaScriptObject document) { + this.domTreeBuilder = new BrowserTreeBuilder(document); + this.tokenizer = new ErrorReportingTokenizer(domTreeBuilder); + this.domTreeBuilder.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); + } + + /** + * Parses a document from a SAX <code>InputSource</code>. + * @param is the source + * @return the doc + * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) + */ + public void parse(String source, ParseEndListener callback) throws SAXException { + parseEndListener = callback; + domTreeBuilder.setFragmentContext(null); + tokenize(source, null); + } + + /** + * @param is + * @throws SAXException + * @throws IOException + * @throws MalformedURLException + */ + private void tokenize(String source, String context) throws SAXException { + lastWasCR = false; + ending = false; + documentWriteBuffer.setLength(0); + streamLength = source.length(); + stream = new UTF16Buffer(source.toCharArray(), 0, + (streamLength < CHUNK_SIZE ? streamLength : CHUNK_SIZE)); + bufferStack.clear(); + push(stream); + domTreeBuilder.setFragmentContext(context == null ? null : context.intern()); + tokenizer.start(); + pump(); + } + + private void pump() throws SAXException { + if (ending) { + tokenizer.end(); + domTreeBuilder.getDocument(); // drops the internal reference + parseEndListener.parseComplete(); + // Don't schedule timeout + return; + } + + int docWriteLen = documentWriteBuffer.length(); + if (docWriteLen > 0) { + char[] newBuf = new char[docWriteLen]; + documentWriteBuffer.getChars(0, docWriteLen, newBuf, 0); + push(new UTF16Buffer(newBuf, 0, docWriteLen)); + documentWriteBuffer.setLength(0); + } + + for (;;) { + UTF16Buffer buffer = peek(); + if (!buffer.hasMore()) { + if (buffer == stream) { + if (buffer.getEnd() == streamLength) { + // Stop parsing + tokenizer.eof(); + ending = true; + break; + } else { + int newEnd = buffer.getStart() + CHUNK_SIZE; + buffer.setEnd(newEnd < streamLength ? newEnd + : streamLength); + continue; + } + } else { + pop(); + continue; + } + } + // now we have a non-empty buffer + buffer.adjust(lastWasCR); + lastWasCR = false; + if (buffer.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(buffer); + domTreeBuilder.maybeRunScript(); + break; + } else { + continue; + } + } + + // schedule + Timer timer = new Timer() { + + @Override public void run() { + try { + pump(); + } catch (SAXException e) { + ending = true; + if (errorHandler != null) { + try { + errorHandler.fatalError(new SAXParseException( + e.getMessage(), null, null, -1, -1, e)); + } catch (SAXException e1) { + } + } + } + } + + }; + timer.schedule(1); + } + + private void push(UTF16Buffer buffer) { + bufferStack.addLast(buffer); + } + + private UTF16Buffer peek() { + return bufferStack.getLast(); + } + + private void pop() { + bufferStack.removeLast(); + } + + public void documentWrite(String text) throws SAXException { + UTF16Buffer buffer = new UTF16Buffer(text.toCharArray(), 0, text.length()); + while (buffer.hasMore()) { + buffer.adjust(lastWasCR); + lastWasCR = false; + if (buffer.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(buffer); + domTreeBuilder.maybeRunScript(); + } + } + } + + /** + * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + domTreeBuilder.setErrorHandler(errorHandler); + tokenizer.setErrorHandler(errorHandler); + } + + /** + * Sets whether comment nodes appear in the tree. + * @param ignoreComments <code>true</code> to ignore comments + * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) + */ + public void setIgnoringComments(boolean ignoreComments) { + domTreeBuilder.setIgnoringComments(ignoreComments); + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled <code>true</code> to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + domTreeBuilder.setScriptingEnabled(scriptingEnabled); + } + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java new file mode 100644 index 000000000..255a02d13 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import org.xml.sax.SAXException; + +import com.google.gwt.core.client.EntryPoint; +import com.google.gwt.core.client.JavaScriptObject; + +public class HtmlParserModule implements EntryPoint { + + private static native void zapChildren(JavaScriptObject node) /*-{ + while (node.hasChildNodes()) { + node.removeChild(node.lastChild); + } + }-*/; + + private static native void installDocWrite(JavaScriptObject doc, HtmlParser parser) /*-{ + doc.write = function() { + if (arguments.length == 0) { + return; + } + var text = arguments[0]; + for (var i = 1; i < arguments.length; i++) { + text += arguments[i]; + } + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); + } + doc.writeln = function() { + if (arguments.length == 0) { + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)("\n"); + return; + } + var text = arguments[0]; + for (var i = 1; i < arguments.length; i++) { + text += arguments[i]; + } + text += "\n"; + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); + } + }-*/; + + @SuppressWarnings("unused") + private static void parseHtmlDocument(String source, JavaScriptObject document, JavaScriptObject readyCallback, JavaScriptObject errorHandler) throws SAXException { + if (readyCallback == null) { + readyCallback = JavaScriptObject.createFunction(); + } + zapChildren(document); + HtmlParser parser = new HtmlParser(document); + parser.setScriptingEnabled(true); + // XXX error handler + + installDocWrite(document, parser); + + parser.parse(source, new ParseEndListener(readyCallback)); + } + + private static native void exportEntryPoints() /*-{ + $wnd.parseHtmlDocument = @nu.validator.htmlparser.gwt.HtmlParserModule::parseHtmlDocument(Ljava/lang/String;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;); + }-*/; + + + public void onModuleLoad() { + exportEntryPoints(); + } + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java new file mode 100644 index 000000000..43235c5be --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import com.google.gwt.core.client.JavaScriptObject; + +public class ParseEndListener { + + private final JavaScriptObject callback; + + /** + * @param callback + */ + public ParseEndListener(JavaScriptObject callback) { + this.callback = callback; + } + + public void parseComplete() { + call(callback); + } + + private static native void call(JavaScriptObject callback) /*-{ + callback(); + }-*/; + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html new file mode 100644 index 000000000..4d9cde81c --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html @@ -0,0 +1,225 @@ +<!DOCTYPE HTML> +<html> + <head> + <title>Live DOM Viewer</title> + <script type="text/javascript" language="javascript" src="nu.validator.htmlparser.HtmlParser.nocache.js"></script> + <style> + h1 { margin: 0; } + h2 { font-size: small; margin: 1em 0 0; } + p, ul, pre { margin: 0; } + p { border: inset thin; } + textarea { width: 100%; -width: 99%; height: 8em; border: 0; } + iframe { width: 100%; height: 12em; border: 0; } +/* iframe.large { height: 24em; } */ + pre { border: inset thin; padding: 0.5em; color: gray; } + pre samp { color: black; } + #dom { border: inset thin; padding: 0.5em 0.5em 0.5em 1em; color: black; min-height: 5em; font-family: monospace; background: white; } + #dom ul { padding: 0 0 0 1em; margin: 0; } + #dom li { padding: 0; margin: 0; list-style: none; position: relative; } + #dom li li { list-style: disc; } + #dom .t1 code { color: purple; font-weight: bold; } + #dom .t2 { font-style: normal; font-family: monospace; } + #dom .t2 .name { color: black; font-weight: bold; } + #dom .t2 .value { color: blue; font-weight: normal; } + #dom .t3 code, #dom .t4 code, #dom .t5 code { color: gray; } + #dom .t7 code, #dom .t8 code { color: green; } + #dom span { font-style: italic; font-family: serif; } + #dom .t10 code { color: teal; } + #dom .misparented, #dom .misparented code { color: red; font-weight: bold; } + #dom.hidden, .hidden { visibility: hidden; margin: 0.5em 0; padding: 0; height: 0; min-height: 0; } + pre#log { color: black; font: small monospace; } + script + p { border: none; font-size: smaller; margin: 0.8em 0.3em; } + </style> + <style title="Tree View"> + #dom li li { list-style: none; } + #dom li:first-child::before { position: absolute; top: 0; height: 0.6em; left: -0.75em; width: 0.5em; border-style: none none solid solid; content: ''; border-width: 0.1em; } + #dom li:not(:last-child)::after { position: absolute; top: 0; bottom: -0.6em; left: -0.75em; width: 0.5em; border-style: none none solid solid; content: ''; border-width: 0.1em; } + </style> + <script> + if (navigator.userAgent.match('Gecko/(\\d+)') && RegExp.$1 == '20060217' && RegExp.$1 != '00000000') { + var style = document.getElementsByTagName('style')[1]; + style.parentNode.removeChild(style); + } + </script> + </head> + <body onload="init()"> + <h1>Live DOM Viewer</h1> + <h2>Markup to test (<a href="data:," id="permalink" rel="bookmark">permalink</a>, <a href="javascript:up()">upload</a>, <a href="javascript:down()">download</a>, <a href="#" onclick="toggleVisibility(this); return false">hide</a>): <span id="updown-status"></span></h2> + <p><textarea oninput="updateInput(event)" onkeydown="updateInput(event)"><!DOCTYPE html> +...</textarea></p> + <h2><a href="data:," id="domview">DOM view</a> (<a href="#" onclick="toggleVisibility(this); return false;">hide</a>, <a href="#" onclick="updateDOM()">refresh</a>):</h2> + <ul id="dom"></ul> + <h2><a href="data:," id="link">Rendered view</a>: (<a href="#" onclick="toggleVisibility(this); return false;">hide</a><!--, <a href="#" onclick="grow(this)">grow</a>-->):</h2> + <p><iframe src="blank.html"></iframe></p> <!-- data:, --> + <h2>innerHTML view: (<a href="#" onclick="toggleVisibility(this); return false;">show</a>, <a href="#" onclick="updateDOM()">refresh</a>):</h2> + <pre class="hidden"><!DOCTYPE HTML><html><samp></samp></html></pre> + <h2>Log: (<a href="#" onclick="toggleVisibility(this); return false;">hide</a>):</h2> + <pre id="log">Script not loaded.</pre> + <script> + var iframe = document.getElementsByTagName('iframe')[0]; + var textarea = document.getElementsByTagName('textarea')[0]; + var pre = document.getElementsByTagName('samp')[0]; + var dom = document.getElementsByTagName('ul')[0]; + var log = document.getElementById('log'); + var updownStatus = document.getElementById('updown-status'); + var delayedUpdater = 0; + var lastString = ''; + var logBuffer = ''; + var logBuffering = false; + function updateInput(event) { + if (delayedUpdater) { + clearTimeout(delayedUpdater); + delayedUpdater = 0; + } + delayedUpdater = setTimeout(update, 100); + } + function afterParse() { + lastString = textarea.value; + setTimeout(updateDOM, 100); + updown(''); + } + function update() { + if (lastString != textarea.value) { + logBuffering = true; + document.getElementById('link').href = 'data:text/html;charset=utf-8,' + encodeURIComponent(textarea.value); + iframe.contentWindow.onerror = function (a, b, c) { + record('error: ' + a + ' on line ' + c); + } + iframe.contentWindow.w = function (s) { + record('log: ' + s); + } + window.parseHtmlDocument(textarea.value, iframe.contentWindow.document, afterParse, null); + } + } + function updateDOM() { + while (pre.firstChild) pre.removeChild(pre.firstChild); + pre.appendChild(document.createTextNode(iframe.contentWindow.document.documentElement.innerHTML)); + printDOM(dom, iframe.contentWindow.document); + document.getElementById('domview').href = 'data:text/plain;charset=utf-8,<ul class="domTree">' + encodeURIComponent(dom.innerHTML + '</ul>'); + document.getElementById('permalink').href = '?' + encodeURIComponent(textarea.value); + record('rendering mode: ' + iframe.contentWindow.document.compatMode); + if (iframe.contentWindow.document.title) + record('document.title: ' + iframe.contentWindow.document.title); + else + record('document has no title'); + while (log.firstChild != log.lastChild) + log.removeChild(log.lastChild); + log.firstChild.data = logBuffer; + logBuffering = false; + logBuffer = ''; + } + function printDOM(ul, node) { + while (ul.firstChild) ul.removeChild(ul.firstChild); + for (var i = 0; i < node.childNodes.length; i += 1) { + var li = document.createElement('li'); + li.className = 't' + node.childNodes[i].nodeType; + if (node.childNodes[i].nodeType == 10) { + li.appendChild(document.createTextNode('DOCTYPE: ')); + } + var code = document.createElement('code'); + code.appendChild(document.createTextNode(node.childNodes[i].nodeName)); + li.appendChild(code); + if (node.childNodes[i].nodeValue) { + var span = document.createElement('span'); + span.appendChild(document.createTextNode(node.childNodes[i].nodeValue)); + li.appendChild(document.createTextNode(': ')); + li.appendChild(span); + } + if (node.childNodes[i].attributes) + for (var j = 0; j < node.childNodes[i].attributes.length; j += 1) { + if (node.childNodes[i].attributes[j].specified) { + var attName = document.createElement('code'); + attName.appendChild(document.createTextNode(node.childNodes[i].attributes[j].nodeName)); + attName.className = 'attribute name'; + var attValue = document.createElement('code'); + attValue.appendChild(document.createTextNode(node.childNodes[i].attributes[j].nodeValue)); + attValue.className = 'attribute value'; + var att = document.createElement('span'); + att.className = 't2'; + att.appendChild(attName); + att.appendChild(document.createTextNode('="')); + att.appendChild(attValue); + att.appendChild(document.createTextNode('"')); + li.appendChild(document.createTextNode(' ')); + li.appendChild(att); + } + } + if (node.childNodes[i].parentNode == node) { + if (node.childNodes[i].childNodes.length) { + var ul2 = document.createElement('ul'); + li.appendChild(ul2); + printDOM(ul2, node.childNodes[i]); + } + } else { + li.className += ' misparented'; + } + ul.appendChild(li); + } + } + function toggleVisibility(link) { + var n = link.parentNode.nextSibling; + if (n.nodeType == 3 /* text node */) n = n.nextSibling; // we should always do this but in IE, text nodes vanish + n.className = (n.className == "hidden") ? '' : 'hidden'; + link.firstChild.data = n.className == "hidden" ? "show" : "hide"; + } +/* + function grow(link) { + var n = link.parentNode.nextSibling; + if (n.nodeType == 3 /-* text node *-/) n = n.nextSibling; // we should always do this but in IE, text nodes vanish + n.className = (n.className == "large") ? '' : 'large'; + link.firstChild.data = n.className == "grow" ? "shrink" : "grow"; + } +*/ + function down() { + updown('downloading...'); + var request = window.XMLHttpRequest ? new XMLHttpRequest() : new ActiveXObject("Microsoft.XMLHTTP"); + request.onreadystatechange = function () { + updown('downloading... ' + request.readyState + '/4'); + if (request.readyState == 4) { + textarea.value = request.responseText; + update(); + updown('downloaded'); + } + }; + request.open('GET', 'clipboard.cgi', true); + request.send(null); + } + function up() { + updown('uploading...'); + var request = window.XMLHttpRequest ? new XMLHttpRequest() : new ActiveXObject("Microsoft.XMLHTTP"); + request.onreadystatechange = function () { + updown('uploading... ' + request.readyState + '/4'); + if (request.readyState == 4) { + updown('uploaded'); + } + }; + request.open('POST', 'clipboard.cgi', true); + request.setRequestHeader('Content-Type', 'text/plain'); + request.send(textarea.value); + } + function init() { + var uri = location.search; + if (uri) + textarea.value = decodeURIComponent(uri.substring(1, uri.length)); + update(); + } + function record(s) { + if (logBuffering) + logBuffer += s + '\r\n'; + else + log.appendChild(document.createTextNode(s + '\r\n')); + } + function updown(s) { + while (updownStatus.firstChild) updownStatus.removeChild(updownStatus.firstChild); + updownStatus.appendChild(document.createTextNode(s)); + } + </script> + <p>This script puts a function <code>w(<var>s</var>)</code> into the + global scope of the test page, where <var>s</vaR> is a string to + output to the log. Also, five files are accessible in the current + directory for test purposes: <code>image</code> (a GIF image), + <code>flash</code> (a Flash file), <code>script</code> (a JS file), + <code>style</code> (a CSS file), and <code>document</code> (an HTML + file).</p> + </body> +</html>
\ No newline at end of file diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt new file mode 100644 index 000000000..bd2f4fcf1 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt @@ -0,0 +1,25 @@ +From: +http://software.hixie.ch/utilities/js/live-dom-viewer/LICENSE +regarding the upstream of HtmlParser.html: + +The MIT License + +Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html new file mode 100644 index 000000000..a8756c9f7 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html @@ -0,0 +1,2 @@ +<!DOCTYPE html> +<title></title>
\ No newline at end of file |