From 6168dbe21f5f83b906e562ea0ab232d499b275a6 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Wed, 15 Jan 2020 14:56:04 -0500 Subject: Add java htmlparser sources that match the original 52-level state https://hg.mozilla.org/projects/htmlparser/ Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d --- .../nu/validator/htmlparser/HtmlParser.gwt.xml | 12 + .../htmlparser/gwt/BrowserTreeBuilder.java | 477 +++++++++++++++++++++ .../nu/validator/htmlparser/gwt/HtmlParser.java | 265 ++++++++++++ .../validator/htmlparser/gwt/HtmlParserModule.java | 87 ++++ .../validator/htmlparser/gwt/ParseEndListener.java | 46 ++ .../nu/validator/htmlparser/public/HtmlParser.html | 225 ++++++++++ .../htmlparser/public/LICENSE.Live-DOM-viewer.txt | 25 ++ .../nu/validator/htmlparser/public/blank.html | 2 + 8 files changed, 1139 insertions(+) create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html (limited to 'parser/html/java/htmlparser/gwt-src/nu') diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml new file mode 100644 index 000000000..1eab09c21 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java new file mode 100644 index 000000000..29ef2a43a --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import java.util.LinkedList; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.impl.CoalescingTreeBuilder; +import nu.validator.htmlparser.impl.HtmlAttributes; + +import org.xml.sax.SAXException; + +import com.google.gwt.core.client.JavaScriptException; +import com.google.gwt.core.client.JavaScriptObject; + +class BrowserTreeBuilder extends CoalescingTreeBuilder { + + private JavaScriptObject document; + + private JavaScriptObject script; + + private JavaScriptObject placeholder; + + private boolean readyToRun; + + private final LinkedList scriptStack = new LinkedList(); + + private class ScriptHolder { + private final JavaScriptObject script; + + private final JavaScriptObject placeholder; + + /** + * @param script + * @param placeholder + */ + public ScriptHolder(JavaScriptObject script, + JavaScriptObject placeholder) { + this.script = script; + this.placeholder = placeholder; + } + + /** + * Returns the script. + * + * @return the script + */ + public JavaScriptObject getScript() { + return script; + } + + /** + * Returns the placeholder. + * + * @return the placeholder + */ + public JavaScriptObject getPlaceholder() { + return placeholder; + } + } + + protected BrowserTreeBuilder(JavaScriptObject document) { + super(); + this.document = document; + installExplorerCreateElementNS(document); + } + + private static native boolean installExplorerCreateElementNS( + JavaScriptObject doc) /*-{ + if (!doc.createElementNS) { + doc.createElementNS = function (uri, local) { + if ("http://www.w3.org/1999/xhtml" == uri) { + return doc.createElement(local); + } else if ("http://www.w3.org/1998/Math/MathML" == uri) { + if (!doc.mathplayerinitialized) { + var obj = document.createElement("object"); + obj.setAttribute("id", "mathplayer"); + obj.setAttribute("classid", "clsid:32F66A20-7614-11D4-BD11-00104BD3F987"); + document.getElementsByTagName("head")[0].appendChild(obj); + document.namespaces.add("m", "http://www.w3.org/1998/Math/MathML", "#mathplayer"); + doc.mathplayerinitialized = true; + } + return doc.createElement("m:" + local); + } else if ("http://www.w3.org/2000/svg" == uri) { + if (!doc.renesisinitialized) { + var obj = document.createElement("object"); + obj.setAttribute("id", "renesis"); + obj.setAttribute("classid", "clsid:AC159093-1683-4BA2-9DCF-0C350141D7F2"); + document.getElementsByTagName("head")[0].appendChild(obj); + document.namespaces.add("s", "http://www.w3.org/2000/svg", "#renesis"); + doc.renesisinitialized = true; + } + return doc.createElement("s:" + local); + } else { + // throw + } + } + } + }-*/; + + private static native boolean hasAttributeNS(JavaScriptObject element, + String uri, String localName) /*-{ + return element.hasAttributeNS(uri, localName); + }-*/; + + private static native void setAttributeNS(JavaScriptObject element, + String uri, String localName, String value) /*-{ + element.setAttributeNS(uri, localName, value); + }-*/; + + @Override protected void addAttributesToElement(JavaScriptObject element, + HtmlAttributes attributes) throws SAXException { + try { + for (int i = 0; i < attributes.getLength(); i++) { + String localName = attributes.getLocalNameNoBoundsCheck(i); + String uri = attributes.getURINoBoundsCheck(i); + if (!hasAttributeNS(element, uri, localName)) { + setAttributeNS(element, uri, localName, + attributes.getValueNoBoundsCheck(i)); + } + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native void appendChild(JavaScriptObject parent, + JavaScriptObject child) /*-{ + parent.appendChild(child); + }-*/; + + private static native JavaScriptObject createTextNode(JavaScriptObject doc, + String text) /*-{ + return doc.createTextNode(text); + }-*/; + + private static native JavaScriptObject getLastChild(JavaScriptObject node) /*-{ + return node.lastChild; + }-*/; + + private static native void extendTextNode(JavaScriptObject node, String text) /*-{ + node.data += text; + }-*/; + + @Override protected void appendCharacters(JavaScriptObject parent, + String text) throws SAXException { + try { + if (parent == placeholder) { + appendChild(script, createTextNode(document, text)); + + } + JavaScriptObject lastChild = getLastChild(parent); + if (lastChild != null && getNodeType(lastChild) == 3) { + extendTextNode(lastChild, text); + return; + } + appendChild(parent, createTextNode(document, text)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native boolean hasChildNodes(JavaScriptObject element) /*-{ + return element.hasChildNodes(); + }-*/; + + private static native JavaScriptObject getFirstChild( + JavaScriptObject element) /*-{ + return element.firstChild; + }-*/; + + @Override protected void appendChildrenToNewParent( + JavaScriptObject oldParent, JavaScriptObject newParent) + throws SAXException { + try { + while (hasChildNodes(oldParent)) { + appendChild(newParent, getFirstChild(oldParent)); + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native JavaScriptObject createComment(JavaScriptObject doc, + String text) /*-{ + return doc.createComment(text); + }-*/; + + @Override protected void appendComment(JavaScriptObject parent, + String comment) throws SAXException { + try { + if (parent == placeholder) { + appendChild(script, createComment(document, comment)); + } + appendChild(parent, createComment(document, comment)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected void appendCommentToDocument(String comment) + throws SAXException { + try { + appendChild(document, createComment(document, comment)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native JavaScriptObject createElementNS( + JavaScriptObject doc, String ns, String local) /*-{ + return doc.createElementNS(ns, local); + }-*/; + + @Override protected JavaScriptObject createElement(String ns, String name, + HtmlAttributes attributes) throws SAXException { + try { + JavaScriptObject rv = createElementNS(document, ns, name); + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + + if ("script" == name) { + if (placeholder != null) { + scriptStack.addLast(new ScriptHolder(script, placeholder)); + } + script = rv; + placeholder = createElementNS(document, + "http://n.validator.nu/placeholder/", "script"); + rv = placeholder; + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + } + + return rv; + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override protected JavaScriptObject createHtmlElementSetAsRoot( + HtmlAttributes attributes) throws SAXException { + try { + JavaScriptObject rv = createElementNS(document, + "http://www.w3.org/1999/xhtml", "html"); + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + appendChild(document, rv); + return rv; + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + private static native JavaScriptObject getParentNode( + JavaScriptObject element) /*-{ + return element.parentNode; + }-*/; + + @Override protected void appendElement(JavaScriptObject child, + JavaScriptObject newParent) throws SAXException { + try { + if (newParent == placeholder) { + appendChild(script, cloneNodeDeep(child)); + } + appendChild(newParent, child); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected boolean hasChildren(JavaScriptObject element) + throws SAXException { + try { + return hasChildNodes(element); + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + private static native void insertBeforeNative(JavaScriptObject parent, + JavaScriptObject child, JavaScriptObject sibling) /*-{ + parent.insertBefore(child, sibling); + }-*/; + + private static native int getNodeType(JavaScriptObject node) /*-{ + return node.nodeType; + }-*/; + + private static native JavaScriptObject cloneNodeDeep(JavaScriptObject node) /*-{ + return node.cloneNode(true); + }-*/; + + /** + * Returns the document. + * + * @return the document + */ + JavaScriptObject getDocument() { + JavaScriptObject rv = document; + document = null; + return rv; + } + + private static native JavaScriptObject createDocumentFragment( + JavaScriptObject doc) /*-{ + return doc.createDocumentFragment(); + }-*/; + + JavaScriptObject getDocumentFragment() { + JavaScriptObject rv = createDocumentFragment(document); + JavaScriptObject rootElt = getFirstChild(document); + while (hasChildNodes(rootElt)) { + appendChild(rv, getFirstChild(rootElt)); + } + document = null; + return rv; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#createJavaScriptObject(String, + * java.lang.String, org.xml.sax.Attributes, java.lang.Object) + */ + @Override protected JavaScriptObject createElement(String ns, String name, + HtmlAttributes attributes, JavaScriptObject form) + throws SAXException { + try { + JavaScriptObject rv = createElement(ns, name, attributes); + // rv.setUserData("nu.validator.form-pointer", form, null); + return rv; + } catch (JavaScriptException e) { + fatal(e); + return null; + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override protected void start(boolean fragment) throws SAXException { + script = null; + placeholder = null; + readyToRun = false; + } + + protected void documentMode(DocumentMode mode, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + // document.setUserData("nu.validator.document-mode", mode, null); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String, + * java.lang.String, java.lang.Object) + */ + @Override protected void elementPopped(String ns, String name, + JavaScriptObject node) throws SAXException { + if (node == placeholder) { + readyToRun = true; + requestSuspension(); + } + } + + private static native void replace(JavaScriptObject oldNode, + JavaScriptObject newNode) /*-{ + oldNode.parentNode.replaceChild(newNode, oldNode); + }-*/; + + private static native JavaScriptObject getPreviousSibling(JavaScriptObject node) /*-{ + return node.previousSibling; + }-*/; + + void maybeRunScript() { + if (readyToRun) { + readyToRun = false; + replace(placeholder, script); + if (scriptStack.isEmpty()) { + script = null; + placeholder = null; + } else { + ScriptHolder scriptHolder = scriptStack.removeLast(); + script = scriptHolder.getScript(); + placeholder = scriptHolder.getPlaceholder(); + } + } + } + + @Override protected void insertFosterParentedCharacters(String text, + JavaScriptObject table, JavaScriptObject stackParent) + throws SAXException { + try { + JavaScriptObject parent = getParentNode(table); + if (parent != null) { // always an element if not null + JavaScriptObject previousSibling = getPreviousSibling(table); + if (previousSibling != null + && getNodeType(previousSibling) == 3) { + extendTextNode(previousSibling, text); + return; + } + insertBeforeNative(parent, createTextNode(document, text), table); + return; + } + JavaScriptObject lastChild = getLastChild(stackParent); + if (lastChild != null && getNodeType(lastChild) == 3) { + extendTextNode(lastChild, text); + return; + } + appendChild(stackParent, createTextNode(document, text)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected void insertFosterParentedChild(JavaScriptObject child, + JavaScriptObject table, JavaScriptObject stackParent) + throws SAXException { + JavaScriptObject parent = getParentNode(table); + try { + if (parent != null && getNodeType(parent) == 1) { + insertBeforeNative(parent, child, table); + } else { + appendChild(stackParent, child); + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native void removeChild(JavaScriptObject parent, + JavaScriptObject child) /*-{ + parent.removeChild(child); + }-*/; + + @Override protected void detachFromParent(JavaScriptObject element) + throws SAXException { + try { + JavaScriptObject parent = getParentNode(element); + if (parent != null) { + removeChild(parent, element); + } + } catch (JavaScriptException e) { + fatal(e); + } + } +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java new file mode 100644 index 000000000..1d71cdfd6 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import java.util.LinkedList; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.impl.UTF16Buffer; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import com.google.gwt.core.client.JavaScriptObject; +import com.google.gwt.user.client.Timer; + +/** + * This class implements an HTML5 parser that exposes data through the DOM + * interface. + * + *

By default, when using the constructor without arguments, the + * this parser treats XML 1.0-incompatible infosets as fatal errors. + * This corresponds to + * FATAL as the general XML violation policy. To make the parser + * support non-conforming HTML fully per the HTML 5 spec while on the other + * hand potentially violating the DOM API contract, set the general XML + * violation policy to ALLOW. This does not work with a standard + * DOM implementation. Handling all input without fatal errors and without + * violating the DOM API contract is possible by setting + * the general XML violation policy to ALTER_INFOSET. This + * makes the parser non-conforming but is probably the most useful + * setting for most applications. + * + *

The doctype is not represented in the tree. + * + *

The document mode is represented as user data DocumentMode + * object with the key nu.validator.document-mode on the document + * node. + * + *

The form pointer is also stored as user data with the key + * nu.validator.form-pointer. + * + * @version $Id: HtmlDocumentBuilder.java 255 2008-05-29 08:57:38Z hsivonen $ + * @author hsivonen + */ +public class HtmlParser { + + private static final int CHUNK_SIZE = 512; + + private final Tokenizer tokenizer; + + private final BrowserTreeBuilder domTreeBuilder; + + private final StringBuilder documentWriteBuffer = new StringBuilder(); + + private ErrorHandler errorHandler; + + private UTF16Buffer stream; + + private int streamLength; + + private boolean lastWasCR; + + private boolean ending; + + private ParseEndListener parseEndListener; + + private final LinkedList bufferStack = new LinkedList(); + + /** + * Instantiates the parser + * + * @param implementation + * the DOM implementation + * @param xmlPolicy the policy + */ + public HtmlParser(JavaScriptObject document) { + this.domTreeBuilder = new BrowserTreeBuilder(document); + this.tokenizer = new ErrorReportingTokenizer(domTreeBuilder); + this.domTreeBuilder.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); + } + + /** + * Parses a document from a SAX InputSource. + * @param is the source + * @return the doc + * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) + */ + public void parse(String source, ParseEndListener callback) throws SAXException { + parseEndListener = callback; + domTreeBuilder.setFragmentContext(null); + tokenize(source, null); + } + + /** + * @param is + * @throws SAXException + * @throws IOException + * @throws MalformedURLException + */ + private void tokenize(String source, String context) throws SAXException { + lastWasCR = false; + ending = false; + documentWriteBuffer.setLength(0); + streamLength = source.length(); + stream = new UTF16Buffer(source.toCharArray(), 0, + (streamLength < CHUNK_SIZE ? streamLength : CHUNK_SIZE)); + bufferStack.clear(); + push(stream); + domTreeBuilder.setFragmentContext(context == null ? null : context.intern()); + tokenizer.start(); + pump(); + } + + private void pump() throws SAXException { + if (ending) { + tokenizer.end(); + domTreeBuilder.getDocument(); // drops the internal reference + parseEndListener.parseComplete(); + // Don't schedule timeout + return; + } + + int docWriteLen = documentWriteBuffer.length(); + if (docWriteLen > 0) { + char[] newBuf = new char[docWriteLen]; + documentWriteBuffer.getChars(0, docWriteLen, newBuf, 0); + push(new UTF16Buffer(newBuf, 0, docWriteLen)); + documentWriteBuffer.setLength(0); + } + + for (;;) { + UTF16Buffer buffer = peek(); + if (!buffer.hasMore()) { + if (buffer == stream) { + if (buffer.getEnd() == streamLength) { + // Stop parsing + tokenizer.eof(); + ending = true; + break; + } else { + int newEnd = buffer.getStart() + CHUNK_SIZE; + buffer.setEnd(newEnd < streamLength ? newEnd + : streamLength); + continue; + } + } else { + pop(); + continue; + } + } + // now we have a non-empty buffer + buffer.adjust(lastWasCR); + lastWasCR = false; + if (buffer.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(buffer); + domTreeBuilder.maybeRunScript(); + break; + } else { + continue; + } + } + + // schedule + Timer timer = new Timer() { + + @Override public void run() { + try { + pump(); + } catch (SAXException e) { + ending = true; + if (errorHandler != null) { + try { + errorHandler.fatalError(new SAXParseException( + e.getMessage(), null, null, -1, -1, e)); + } catch (SAXException e1) { + } + } + } + } + + }; + timer.schedule(1); + } + + private void push(UTF16Buffer buffer) { + bufferStack.addLast(buffer); + } + + private UTF16Buffer peek() { + return bufferStack.getLast(); + } + + private void pop() { + bufferStack.removeLast(); + } + + public void documentWrite(String text) throws SAXException { + UTF16Buffer buffer = new UTF16Buffer(text.toCharArray(), 0, text.length()); + while (buffer.hasMore()) { + buffer.adjust(lastWasCR); + lastWasCR = false; + if (buffer.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(buffer); + domTreeBuilder.maybeRunScript(); + } + } + } + + /** + * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + domTreeBuilder.setErrorHandler(errorHandler); + tokenizer.setErrorHandler(errorHandler); + } + + /** + * Sets whether comment nodes appear in the tree. + * @param ignoreComments true to ignore comments + * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) + */ + public void setIgnoringComments(boolean ignoreComments) { + domTreeBuilder.setIgnoringComments(ignoreComments); + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled true to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + domTreeBuilder.setScriptingEnabled(scriptingEnabled); + } + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java new file mode 100644 index 000000000..255a02d13 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import org.xml.sax.SAXException; + +import com.google.gwt.core.client.EntryPoint; +import com.google.gwt.core.client.JavaScriptObject; + +public class HtmlParserModule implements EntryPoint { + + private static native void zapChildren(JavaScriptObject node) /*-{ + while (node.hasChildNodes()) { + node.removeChild(node.lastChild); + } + }-*/; + + private static native void installDocWrite(JavaScriptObject doc, HtmlParser parser) /*-{ + doc.write = function() { + if (arguments.length == 0) { + return; + } + var text = arguments[0]; + for (var i = 1; i < arguments.length; i++) { + text += arguments[i]; + } + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); + } + doc.writeln = function() { + if (arguments.length == 0) { + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)("\n"); + return; + } + var text = arguments[0]; + for (var i = 1; i < arguments.length; i++) { + text += arguments[i]; + } + text += "\n"; + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); + } + }-*/; + + @SuppressWarnings("unused") + private static void parseHtmlDocument(String source, JavaScriptObject document, JavaScriptObject readyCallback, JavaScriptObject errorHandler) throws SAXException { + if (readyCallback == null) { + readyCallback = JavaScriptObject.createFunction(); + } + zapChildren(document); + HtmlParser parser = new HtmlParser(document); + parser.setScriptingEnabled(true); + // XXX error handler + + installDocWrite(document, parser); + + parser.parse(source, new ParseEndListener(readyCallback)); + } + + private static native void exportEntryPoints() /*-{ + $wnd.parseHtmlDocument = @nu.validator.htmlparser.gwt.HtmlParserModule::parseHtmlDocument(Ljava/lang/String;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;); + }-*/; + + + public void onModuleLoad() { + exportEntryPoints(); + } + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java new file mode 100644 index 000000000..43235c5be --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import com.google.gwt.core.client.JavaScriptObject; + +public class ParseEndListener { + + private final JavaScriptObject callback; + + /** + * @param callback + */ + public ParseEndListener(JavaScriptObject callback) { + this.callback = callback; + } + + public void parseComplete() { + call(callback); + } + + private static native void call(JavaScriptObject callback) /*-{ + callback(); + }-*/; + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html new file mode 100644 index 000000000..4d9cde81c --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html @@ -0,0 +1,225 @@ + + + + Live DOM Viewer + + + + + + +

Live DOM Viewer

+

Markup to test (, upload, download, hide):

+

+

DOM view (hide, refresh):

+
    +

    Rendered view: (hide):

    +

    +

    innerHTML view: (show, refresh):

    + +

    Log: (hide):

    +
    Script not loaded.
    + +

    This script puts a function w(s) into the + global scope of the test page, where s is a string to + output to the log. Also, five files are accessible in the current + directory for test purposes: image (a GIF image), + flash (a Flash file), script (a JS file), + style (a CSS file), and document (an HTML + file).

    + + \ No newline at end of file diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt new file mode 100644 index 000000000..bd2f4fcf1 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt @@ -0,0 +1,25 @@ +From: +http://software.hixie.ch/utilities/js/live-dom-viewer/LICENSE +regarding the upstream of HtmlParser.html: + +The MIT License + +Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html new file mode 100644 index 000000000..a8756c9f7 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html @@ -0,0 +1,2 @@ + + \ No newline at end of file -- cgit v1.2.3