summaryrefslogtreecommitdiffstats
path: root/parser/html/java/htmlparser/test-src/nu
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-01-15 14:56:04 -0500
committerMatt A. Tobin <email@mattatobin.com>2020-01-15 14:56:04 -0500
commit6168dbe21f5f83b906e562ea0ab232d499b275a6 (patch)
tree658a4b27554c85ebcaad655fc83f2c2bb99e8e80 /parser/html/java/htmlparser/test-src/nu
parent09314667a692fedff8564fc347c8a3663474faa6 (diff)
downloadUXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar
UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.gz
UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.lz
UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.xz
UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.zip
Add java htmlparser sources that match the original 52-level state
https://hg.mozilla.org/projects/htmlparser/ Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d
Diffstat (limited to 'parser/html/java/htmlparser/test-src/nu')
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java96
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java491
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java115
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java49
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java40
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java123
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java185
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java66
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java201
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java210
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java211
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java239
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java50
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java246
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java97
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java63
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java33
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html29
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java87
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java86
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java89
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java89
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java237
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java162
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java169
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html29
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java67
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html29
28 files changed, 3588 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java
new file mode 100644
index 000000000..395f9eb15
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding.test;
+
+import nu.validator.encoding.Encoding;
+
+public class Big5Tester extends EncodingTester {
+
+ public static void main(String[] args) {
+ new Big5Tester().test();
+ }
+
+ private void test() {
+ // ASCII
+ decodeBig5("\u6162", "\u0061\u0062");
+ // Edge cases
+ decodeBig5("\u8740", "\u43F0");
+ decodeBig5("\uFEFE", "\u79D4");
+ decodeBig5("\uFEFD", "\uD864\uDD0D");
+ decodeBig5("\u8862", "\u00CA\u0304");
+ decodeBig5("\u8864", "\u00CA\u030C");
+ decodeBig5("\u8866", "\u00CA");
+ decodeBig5("\u88A3", "\u00EA\u0304");
+ decodeBig5("\u88A5", "\u00EA\u030C");
+ decodeBig5("\u88A7", "\u00EA");
+ decodeBig5("\u99D4", "\u8991");
+ decodeBig5("\u99D5", "\uD85E\uDD67");
+ decodeBig5("\u99D6", "\u8A29");
+ // Edge cases surrounded with ASCII
+ decodeBig5("\u6187\u4062", "\u0061\u43F0\u0062");
+ decodeBig5("\u61FE\uFE62", "\u0061\u79D4\u0062");
+ decodeBig5("\u61FE\uFD62", "\u0061\uD864\uDD0D\u0062");
+ decodeBig5("\u6188\u6262", "\u0061\u00CA\u0304\u0062");
+ decodeBig5("\u6188\u6462", "\u0061\u00CA\u030C\u0062");
+ decodeBig5("\u6188\u6662", "\u0061\u00CA\u0062");
+ decodeBig5("\u6188\uA362", "\u0061\u00EA\u0304\u0062");
+ decodeBig5("\u6188\uA562", "\u0061\u00EA\u030C\u0062");
+ decodeBig5("\u6188\uA762", "\u0061\u00EA\u0062");
+ decodeBig5("\u6199\uD462", "\u0061\u8991\u0062");
+ decodeBig5("\u6199\uD562", "\u0061\uD85E\uDD67\u0062");
+ decodeBig5("\u6199\uD662", "\u0061\u8A29\u0062");
+ // Bad sequences
+ decodeBig5("\u8061", "\uFFFD\u0061");
+ decodeBig5("\uFF61", "\uFFFD\u0061");
+ decodeBig5("\uFE39", "\uFFFD\u0039");
+ decodeBig5("\u8766", "\uFFFD\u0066");
+ decodeBig5("\u8140", "\uFFFD\u0040");
+ decodeBig5("\u6181", "\u0061\uFFFD");
+
+ // ASCII
+ encodeBig5("\u0061\u0062", "\u6162");
+ // Edge cases
+ encodeBig5("\u9EA6\u0061", "\u3F61");
+ encodeBig5("\uD858\uDE6B\u0061", "\u3F61");
+ encodeBig5("\u3000", "\uA140");
+ encodeBig5("\u20AC", "\uA3E1");
+ encodeBig5("\u4E00", "\uA440");
+ encodeBig5("\uD85D\uDE07", "\uC8A4");
+ encodeBig5("\uFFE2", "\uC8CD");
+ encodeBig5("\u79D4", "\uFEFE");
+ // Not in index
+ encodeBig5("\u2603\u0061", "\u3F61");
+ // duplicate low bits
+ encodeBig5("\uD840\uDFB5", "\uFD6A");
+ // prefer last
+ encodeBig5("\u2550", "\uF9F9");
+ }
+
+ private void decodeBig5(String input, String expectation) {
+ decode(input, expectation, Encoding.BIG5);
+ }
+
+ private void encodeBig5(String input, String expectation) {
+ encode(input, expectation, Encoding.BIG5);
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java
new file mode 100644
index 000000000..a910a01e9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding.test;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import nu.validator.encoding.Encoding;
+
+public class EncodingTester {
+
+ protected byte[] stringToBytes(String str) {
+ byte[] bytes = new byte[str.length() * 2];
+ for (int i = 0; i < str.length(); i++) {
+ int pair = (int) str.charAt(i);
+ bytes[i * 2] = (byte) (pair >> 8);
+ bytes[i * 2 + 1] = (byte) (pair & 0xFF);
+ }
+ return bytes;
+ }
+
+ protected void decode(String input, String expectation, Encoding encoding) {
+ // Use the convenience method from Charset
+
+ byte[] bytes = stringToBytes(input);
+ ByteBuffer byteBuf = ByteBuffer.wrap(bytes);
+ CharBuffer charBuf = encoding.decode(byteBuf);
+
+ if (charBuf.remaining() != expectation.length()) {
+ err("When decoding from a single long buffer, the output length was wrong. Expected: "
+ + expectation.length() + ", got: " + charBuf.remaining(),
+ bytes, expectation);
+ return;
+ }
+
+ for (int i = 0; i < expectation.length(); i++) {
+ char expect = expectation.charAt(i);
+ char actual = charBuf.get();
+ if (actual != expect) {
+ err("When decoding from a single long buffer, failed at position "
+ + i
+ + ", expected: "
+ + charToHex(expect)
+ + ", got: "
+ + charToHex(actual), bytes, expectation);
+ return;
+ }
+ }
+
+ // Decode with a 1-byte input buffer
+
+ byteBuf = ByteBuffer.allocate(1);
+ charBuf = CharBuffer.allocate(expectation.length() + 2);
+ CharsetDecoder decoder = encoding.newDecoder();
+ decoder.onMalformedInput(CodingErrorAction.REPLACE);
+ for (int i = 0; i < bytes.length; i++) {
+ byteBuf.position(0);
+ byteBuf.put(bytes[i]);
+ byteBuf.position(0);
+ CoderResult result = decoder.decode(byteBuf, charBuf,
+ (i + 1) == bytes.length);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace at index: "
+ + i, bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence, which none of these decoders should do.",
+ bytes, expectation);
+ return;
+ } else if (result.isOverflow()) {
+ err("Decoder claimed overflow when the output buffer is know to be large enough.",
+ bytes, expectation);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result, expected underflow.", bytes,
+ expectation);
+ }
+ }
+ CoderResult result = decoder.flush(charBuf);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace when flushing.",
+ bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence when flushing, which none of these decoders should do.",
+ bytes, expectation);
+ return;
+ } else if (result.isOverflow()) {
+ err("Decoder claimed overflow when flushing when the output buffer is know to be large enough.",
+ bytes, expectation);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow.", bytes,
+ expectation);
+ }
+
+ charBuf.limit(charBuf.position());
+ charBuf.position(0);
+
+ for (int i = 0; i < expectation.length(); i++) {
+ char expect = expectation.charAt(i);
+ char actual = charBuf.get();
+ if (actual != expect) {
+ err("When decoding one byte at a time in REPORT mode, failed at position "
+ + i
+ + ", expected: "
+ + charToHex(expect)
+ + ", got: "
+ + charToHex(actual), bytes, expectation);
+ return;
+ }
+ }
+
+ // Decode with 1-char output buffer
+
+ byteBuf = ByteBuffer.wrap(bytes);
+ charBuf = CharBuffer.allocate(1);
+
+ decoder.reset(); // Let's test this while at it
+ decoder.onMalformedInput(CodingErrorAction.REPLACE);
+ int codeUnitPos = 0;
+ while (byteBuf.hasRemaining()) {
+ charBuf.position(0);
+ charBuf.put('\u0000');
+ charBuf.position(0);
+ result = decoder.decode(byteBuf, charBuf, false);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace at index (decoding one output code unit at a time): "
+ + byteBuf.position(), bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence (decoding one output code unit at a time), which none of these decoders should do.",
+ bytes, expectation);
+ return;
+ } else if (result.isUnderflow()) {
+ if (byteBuf.hasRemaining()) {
+ err("When decoding one output code unit at a time, decoder claimed underflow when there was input remaining.",
+ bytes, expectation);
+ return;
+ }
+ } else if (!result.isOverflow()) {
+ err("Bogus coder result, expected overflow.", bytes,
+ expectation);
+ }
+ if (charBuf.position() == 1) {
+ charBuf.position(0);
+ char actual = charBuf.get();
+ char expect = expectation.charAt(codeUnitPos);
+ if (actual != expect) {
+ err("When decoding one output code unit at a time in REPLACE mode, failed at position "
+ + byteBuf.position()
+ + ", expected: "
+ + charToHex(expect) + ", got: " + charToHex(actual),
+ bytes, expectation);
+ return;
+ }
+ codeUnitPos++;
+ }
+ }
+
+ charBuf.position(0);
+ charBuf.put('\u0000');
+ charBuf.position(0);
+ result = decoder.decode(byteBuf, charBuf, true);
+
+ if (charBuf.position() == 1) {
+ charBuf.position(0);
+ char actual = charBuf.get();
+ char expect = expectation.charAt(codeUnitPos);
+ if (actual != expect) {
+ err("When decoding one output code unit at a time in REPLACE mode, failed at position "
+ + byteBuf.position()
+ + ", expected: "
+ + charToHex(expect) + ", got: " + charToHex(actual),
+ bytes, expectation);
+ return;
+ }
+ codeUnitPos++;
+ }
+
+ charBuf.position(0);
+ charBuf.put('\u0000');
+ charBuf.position(0);
+ result = decoder.flush(charBuf);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace when flushing (one output at a time).",
+ bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence when flushing, which none of these decoders should do (one output at a time).",
+ bytes, expectation);
+ return;
+ } else if (result.isOverflow()) {
+ err("Decoder claimed overflow when flushing when the output buffer is know to be large enough (one output at a time).",
+ bytes, expectation);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow (one output at a time).",
+ bytes, expectation);
+ }
+
+ if (charBuf.position() == 1) {
+ charBuf.position(0);
+ char actual = charBuf.get();
+ char expect = expectation.charAt(codeUnitPos);
+ if (actual != expect) {
+ err("When decoding one output code unit at a time in REPLACE mode, failed when flushing, expected: "
+ + charToHex(expect) + ", got: " + charToHex(actual),
+ bytes, expectation);
+ return;
+ }
+ }
+
+ // TODO: 2 bytes at a time starting at 0 and 2 bytes at a time starting
+ // at 1
+ }
+
+ protected void encode(String input, String expectation, Encoding encoding) {
+ byte[] expectedBytes = stringToBytes(expectation);
+ CharBuffer charBuf = CharBuffer.wrap(input);
+
+ // Use the convenience method from Charset
+
+ ByteBuffer byteBuf = encoding.encode(charBuf);
+
+ if (byteBuf.remaining() != expectedBytes.length) {
+ err("When encoding from a single long buffer, the output length was wrong. Expected: "
+ + expectedBytes.length + ", got: " + byteBuf.remaining(),
+ input, expectedBytes);
+ return;
+ }
+
+ for (int i = 0; i < expectedBytes.length; i++) {
+ byte expect = expectedBytes[i];
+ byte actual = byteBuf.get();
+ if (actual != expect) {
+ err("When encoding from a single long buffer, failed at position "
+ + i
+ + ", expected: "
+ + byteToHex(expect)
+ + ", got: "
+ + byteToHex(actual), input, expectedBytes);
+ return;
+ }
+ }
+
+ // Encode with a 1-char input buffer
+
+ charBuf = CharBuffer.allocate(1);
+ byteBuf = ByteBuffer.allocate(expectedBytes.length + 2);
+ CharsetEncoder encoder = encoding.newEncoder();
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ for (int i = 0; i < input.length(); i++) {
+ charBuf.position(0);
+ charBuf.put(input.charAt(i));
+ charBuf.position(0);
+ CoderResult result = encoder.encode(charBuf, byteBuf,
+ (i + 1) == input.length());
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace at index: "
+ + i, input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an upmappable sequence when asked to replace at index: "
+ + i, input, expectedBytes);
+ return;
+ } else if (result.isOverflow()) {
+ err("Encoder claimed overflow when the output buffer is know to be large enough.",
+ input, expectedBytes);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result, expected underflow.", input,
+ expectedBytes);
+ }
+ }
+ CoderResult result = encoder.flush(byteBuf);
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace when flushing.",
+ input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an unmappable sequence when asked to replace when flushing.",
+ input, expectedBytes);
+ return;
+ } else if (result.isOverflow()) {
+ err("Encoder claimed overflow when flushing when the output buffer is know to be large enough.",
+ input, expectedBytes);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow.", input,
+ expectedBytes);
+
+ }
+
+ byteBuf.limit(byteBuf.position());
+ byteBuf.position(0);
+
+ for (int i = 0; i < expectedBytes.length; i++) {
+ byte expect = expectedBytes[i];
+ byte actual = byteBuf.get();
+ if (actual != expect) {
+ err("When encoding one char at a time in REPORT mode, failed at position "
+ + i
+ + ", expected: "
+ + byteToHex(expect)
+ + ", got: "
+ + byteToHex(actual), input, expectedBytes);
+ return;
+ }
+ }
+
+ // Decode with 1-byte output buffer
+
+ charBuf = CharBuffer.wrap(input);
+ byteBuf = ByteBuffer.allocate(1);
+
+ encoder.reset(); // Let's test this while at it
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ int bytePos = 0;
+ while (charBuf.hasRemaining()) {
+ byteBuf.position(0);
+ byteBuf.put((byte)0);
+ byteBuf.position(0);
+ result = encoder.encode(charBuf, byteBuf, false);
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace at index (decoding one output code unit at a time): "
+ + charBuf.position(), input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an unmappable sequence when asked to replace at index (decoding one output code unit at a time): "
+ + charBuf.position(), input, expectedBytes);
+ return;
+ } else if (result.isUnderflow()) {
+ if (charBuf.hasRemaining()) {
+ err("When encoding one output byte at a time, encoder claimed underflow when there was input remaining.",
+ input, expectedBytes);
+ return;
+ }
+ } else if (!result.isOverflow()) {
+ err("Bogus coder result, expected overflow.", input, expectedBytes);
+ }
+ if (byteBuf.position() == 1) {
+ byteBuf.position(0);
+ byte actual = byteBuf.get();
+ byte expect = expectedBytes[bytePos];
+ if (actual != expect) {
+ err("When encoding one output byte at a time in REPLACE mode, failed at position "
+ + charBuf.position()
+ + ", expected: "
+ + byteToHex(expect) + ", got: " + byteToHex(actual),
+ input, expectedBytes);
+ return;
+ }
+ bytePos++;
+ }
+ }
+
+ byteBuf.position(0);
+ byteBuf.put((byte)0);
+ byteBuf.position(0);
+ result = encoder.encode(charBuf, byteBuf, true);
+
+ if (byteBuf.position() == 1) {
+ byteBuf.position(0);
+ byte actual = byteBuf.get();
+ byte expect = expectedBytes[bytePos];
+ if (actual != expect) {
+ err("When encoding one output byte at a time in REPLACE mode, failed at position "
+ + charBuf.position()
+ + ", expected: "
+ + byteToHex(expect) + ", got: " + byteToHex(actual),
+ input, expectedBytes);
+ return;
+ }
+ bytePos++;
+ }
+
+ byteBuf.position(0);
+ byteBuf.put((byte)0);
+ byteBuf.position(0);
+ result = encoder.flush(byteBuf);
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace when flushing (one output at a time).",
+ input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an unmappable sequence when asked to replace when flushing (one output at a time).",
+ input, expectedBytes);
+ return;
+ } else if (result.isOverflow()) {
+ err("Encoder claimed overflow when flushing when the output buffer is know to be large enough (one output at a time).",
+ input, expectedBytes);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow (one output at a time).",
+ input, expectedBytes);
+ }
+
+ if (byteBuf.position() == 1) {
+ byteBuf.position(0);
+ byte actual = byteBuf.get();
+ byte expect = expectedBytes[bytePos];
+ if (actual != expect) {
+ err("When encoding one output code unit at a time in REPLACE mode, failed when flushing, expected: "
+ + byteToHex(expect) + ", got: " + byteToHex(actual),
+ input, expectedBytes);
+ return;
+ }
+ }
+
+ // TODO: 2 bytes at a time starting at 0 and 2 bytes at a time starting
+ // at 1
+ }
+
+ private String charToHex(char c) {
+ String hex = Integer.toHexString(c);
+ switch (hex.length()) {
+ case 1:
+ return "000" + hex;
+ case 2:
+ return "00" + hex;
+ case 3:
+ return "0" + hex;
+ default:
+ return hex;
+ }
+ }
+
+ private String byteToHex(byte b) {
+ String hex = Integer.toHexString(((int) b & 0xFF));
+ switch (hex.length()) {
+ case 1:
+ return "0" + hex;
+ default:
+ return hex;
+ }
+ }
+
+ private void err(String msg, byte[] bytes, String expectation) {
+ System.err.println(msg);
+ System.err.print("Input:");
+ for (int i = 0; i < bytes.length; i++) {
+ System.err.print(' ');
+ System.err.print(byteToHex(bytes[i]));
+ }
+ System.err.println();
+ System.err.print("Expect:");
+ for (int i = 0; i < expectation.length(); i++) {
+ System.err.print(' ');
+ System.err.print(charToHex(expectation.charAt(i)));
+ }
+ System.err.println();
+ }
+
+ private void err(String msg, String chars, byte[] expectation) {
+ System.err.println(msg);
+ System.err.print("Input:");
+ for (int i = 0; i < chars.length(); i++) {
+ System.err.print(' ');
+ System.err.print(charToHex(chars.charAt(i)));
+ }
+ System.err.println();
+ System.err.print("Expect:");
+ for (int i = 0; i < expectation.length; i++) {
+ System.err.print(' ');
+ System.err.print(byteToHex(expectation[i]));
+ }
+ System.err.println();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java
new file mode 100644
index 000000000..3337a6555
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.io.Encoding;
+import nu.validator.htmlparser.io.HtmlInputStreamReader;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+
+public class DecoderLoopTester {
+
+ private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
+
+ private static final int NUMBER_OR_ASTRAL_CHARS = 24500;
+
+ private void runTest(int padding) throws SAXException, IOException {
+ Encoding utf8 = Encoding.forName("UTF-8");
+ char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS];
+ byte[] byteArr;
+ int i = 0;
+ charArr[i++] = '\uFEFF';
+ for (int j = 0; j < padding; j++) {
+ charArr[i++] = 'x';
+ }
+ for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) {
+ int value = 0x10000 + j;
+ charArr[i++] = (char) (LEAD_OFFSET + (value >> 10));
+ charArr[i++] = (char) (0xDC00 + (value & 0x3FF));
+// charArr[i++] = 'y';
+// charArr[i++] = 'z';
+
+ }
+ CharBuffer charBuffer = CharBuffer.wrap(charArr);
+ CharsetEncoder enc = utf8.newEncoder();
+ enc.onMalformedInput(CodingErrorAction.REPORT);
+ enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+ ByteBuffer byteBuffer = enc.encode(charBuffer);
+ byteArr = new byte[byteBuffer.limit()];
+ byteBuffer.get(byteArr);
+
+ ErrorHandler eh = new SystemErrErrorHandler();
+ compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, Heuristics.NONE), padding, charArr, byteArr);
+ compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8), padding, charArr, byteArr);
+ }
+
+ /**
+ * @param padding
+ * @param charArr
+ * @param byteArr
+ * @throws SAXException
+ * @throws IOException
+ */
+ private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException {
+ char[] readBuffer = new char[2048];
+ int offset = 0;
+ int num = 0;
+ int readNum = 0;
+ while ((num = reader.read(readBuffer)) != -1) {
+ for (int j = 0; j < num; j++) {
+ System.out.println(offset + j);
+ if (readBuffer[j] != charArr[offset + j]) {
+ throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum);
+ }
+ }
+ offset += num;
+ readNum++;
+ }
+ }
+
+ void runTests() throws SAXException, IOException {
+ for (int i = 0; i < 4; i++) {
+ runTest(i);
+ }
+ }
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws IOException, SAXException {
+ new DecoderLoopTester().runTests();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java
new file mode 100644
index 000000000..a3866f5d9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
+
+public class DomIdTester {
+
+ private static final String testSrc = "<div><h1 id='bar' class='foo'>buoeoa</h1><p id='foo'>uoeuo</p></div>";
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws SAXException, IOException {
+ HtmlDocumentBuilder builder = new HtmlDocumentBuilder();
+ Document doc = builder.parse(new InputSource(new StringReader(testSrc)));
+ System.out.println(doc.getElementById("foo").getLocalName());
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java
new file mode 100644
index 000000000..07d054b9e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+public class DomTest {
+ public static void main(String[] args) throws Exception {
+ DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
+ f.setNamespaceAware(true); // not setting this causes pain and suffering with SVG
+ DocumentBuilder b = f.newDocumentBuilder();
+ Document d = b.newDocument();
+ Element e = d.createElementNS("http://www.w3.org/1999/xhtml", "html");
+ e.setAttribute("xmlns:foo", "bar");
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java
new file mode 100644
index 000000000..95cd3018e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.io.Encoding;
+import nu.validator.htmlparser.io.HtmlInputStreamReader;
+
+import org.xml.sax.SAXException;
+
+public class EncodingTester {
+
+ private final InputStream aggregateStream;
+
+ private final StringBuilder builder = new StringBuilder();
+
+ /**
+ * @param aggregateStream
+ */
+ public EncodingTester(InputStream aggregateStream) {
+ this.aggregateStream = aggregateStream;
+ }
+
+ private void runTests() throws IOException, SAXException {
+ while (runTest()) {
+ // spin
+ }
+ }
+
+ private boolean runTest() throws IOException, SAXException {
+ if (skipLabel()) {
+ return false;
+ }
+ UntilHashInputStream stream = new UntilHashInputStream(aggregateStream);
+ HtmlInputStreamReader reader = new HtmlInputStreamReader(stream, null,
+ null, null, Heuristics.NONE);
+ Charset charset = reader.getCharset();
+ stream.close();
+ if (skipLabel()) {
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ builder.setLength(0);
+ loop: for (;;) {
+ int b = aggregateStream.read();
+ switch (b) {
+ case '\n':
+ break loop;
+ case -1:
+ System.err.println("Premature end of test data.");
+ return false;
+ default:
+ builder.append(((char) b));
+ }
+ }
+ String sniffed = charset.name();
+ String expected = Encoding.forName(builder.toString()).newDecoder().charset().name();
+ if (expected.equalsIgnoreCase(sniffed)) {
+ System.err.println("Success.");
+ // System.err.println(stream);
+ } else {
+ System.err.println("Failure. Expected: " + expected + " got "
+ + sniffed + ".");
+ System.err.println(stream);
+ }
+ return true;
+ }
+
+ private boolean skipLabel() throws IOException {
+ int b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ }
+ for (;;) {
+ b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ } else if (b == 0x0A) {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * @param args
+ * @throws SAXException
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException, SAXException {
+ for (int i = 0; i < args.length; i++) {
+ EncodingTester tester = new EncodingTester(new FileInputStream(
+ args[i]));
+ tester.runTests();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java
new file mode 100644
index 000000000..2fcfc4960
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.impl.ElementName;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.Tokenizer;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import com.sdicons.json.model.JSONArray;
+import com.sdicons.json.model.JSONBoolean;
+import com.sdicons.json.model.JSONNull;
+import com.sdicons.json.model.JSONObject;
+import com.sdicons.json.model.JSONString;
+
+public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler {
+
+ private static final JSONString DOCTYPE = new JSONString("DOCTYPE");
+
+ private static final JSONString START_TAG = new JSONString("StartTag");
+
+ private static final JSONString END_TAG = new JSONString("EndTag");
+
+ private static final JSONString COMMENT = new JSONString("Comment");
+
+ private static final JSONString CHARACTER = new JSONString("Character");
+
+ private static final JSONString PARSE_ERROR = new JSONString("ParseError");
+
+ private static final char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+
+ private final StringBuilder builder = new StringBuilder();
+
+ private JSONArray array = null;
+
+ private int contentModelFlag;
+
+ private String contentModelElement;
+
+ public void setContentModelFlag(int contentModelFlag, String contentModelElement) {
+ this.contentModelFlag = contentModelFlag;
+ this.contentModelElement = contentModelElement;
+ }
+
+ public void characters(char[] buf, int start, int length)
+ throws SAXException {
+ builder.append(buf, start, length);
+ }
+
+ private void flushCharacters() {
+ if (builder.length() > 0) {
+ JSONArray token = new JSONArray();
+ token.getValue().add(CHARACTER);
+ token.getValue().add(new JSONString(builder.toString()));
+ array.getValue().add(token);
+ builder.setLength(0);
+ }
+ }
+
+ public void comment(char[] buf, int start, int length) throws SAXException {
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(COMMENT);
+ token.getValue().add(new JSONString(new String(buf, start, length)));
+ array.getValue().add(token);
+ }
+
+ public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(DOCTYPE);
+ token.getValue().add(new JSONString(name));
+ token.getValue().add(publicIdentifier == null ? JSONNull.NULL : new JSONString(publicIdentifier));
+ token.getValue().add(systemIdentifier == null ? JSONNull.NULL : new JSONString(systemIdentifier));
+ token.getValue().add(new JSONBoolean(!forceQuirks));
+ array.getValue().add(token);
+ }
+
+ public void endTag(ElementName eltName) throws SAXException {
+ String name = eltName.name;
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(END_TAG);
+ token.getValue().add(new JSONString(name));
+ array.getValue().add(token);
+ }
+
+ public void eof() throws SAXException {
+ flushCharacters();
+ }
+
+ public void startTokenization(Tokenizer self) throws SAXException {
+ array = new JSONArray();
+ if (contentModelElement != null) {
+ self.setStateAndEndTagExpectation(contentModelFlag, contentModelElement);
+ }
+ }
+
+ public void startTag(ElementName eltName, HtmlAttributes attributes,
+ boolean selfClosing) throws SAXException {
+ String name = eltName.name;
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(START_TAG);
+ token.getValue().add(new JSONString(name));
+ JSONObject attrs = new JSONObject();
+ for (int i = 0; i < attributes.getLength(); i++) {
+ attrs.getValue().put(attributes.getQNameNoBoundsCheck(i),
+ new JSONString(attributes.getValueNoBoundsCheck(i)));
+ }
+ token.getValue().add(attrs);
+ if (selfClosing) {
+ token.getValue().add(JSONBoolean.TRUE);
+ }
+ array.getValue().add(token);
+ }
+
+ public boolean wantsComments() throws SAXException {
+ return true;
+ }
+
+ public void error(SAXParseException exception) throws SAXException {
+ flushCharacters();
+ array.getValue().add(PARSE_ERROR);
+ }
+
+ public void fatalError(SAXParseException exception) throws SAXException {
+ throw new RuntimeException("Should never happen.");
+ }
+
+ public void warning(SAXParseException exception) throws SAXException {
+ }
+
+ /**
+ * Returns the array.
+ *
+ * @return the array
+ */
+ public JSONArray getArray() {
+ return array;
+ }
+
+ public void endTokenization() throws SAXException {
+
+ }
+
+ @Override public void zeroOriginatingReplacementCharacter()
+ throws SAXException {
+ builder.append(REPLACEMENT_CHARACTER, 0, 1);
+ }
+
+ @Override public boolean cdataSectionAllowed() throws SAXException {
+ return false;
+ }
+
+ @Override public void ensureBufferSpace(int inputLength)
+ throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java
new file mode 100644
index 000000000..9a207f277
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.util.LinkedList;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class ListErrorHandler implements ErrorHandler {
+
+ private boolean fatal = false;
+
+ private LinkedList<String> errors = new LinkedList<String>();
+
+ public void error(SAXParseException spe) throws SAXException {
+ errors.add(Integer.toString(spe.getColumnNumber()) + ": " + spe.getMessage());
+ }
+
+ public void fatalError(SAXParseException arg0) throws SAXException {
+ fatal = true;
+ }
+
+ public void warning(SAXParseException arg0) throws SAXException {
+ }
+
+ /**
+ * Returns the errors.
+ *
+ * @return the errors
+ */
+ public LinkedList<String> getErrors() {
+ return errors;
+ }
+
+ /**
+ * Returns the fatal.
+ *
+ * @return the fatal
+ */
+ public boolean isFatal() {
+ return fatal;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java
new file mode 100644
index 000000000..9ee490b9e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2005, 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import javax.xml.transform.ErrorListener;
+import javax.xml.transform.SourceLocator;
+import javax.xml.transform.TransformerException;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public class SystemErrErrorHandler implements ErrorHandler, ErrorListener {
+
+ private Writer out;
+
+ private boolean inError = false;
+
+ public SystemErrErrorHandler() {
+ try {
+ out = new OutputStreamWriter(System.err, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
+ */
+ public void warning(SAXParseException e) throws SAXException {
+ try {
+ out.write("Warning:\n");
+ out.write(e.getMessage());
+ out.write("\nFile: ");
+ String systemId = e.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(e.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(e.getColumnNumber()));
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new SAXException(e1);
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
+ */
+ public void error(SAXParseException e) throws SAXException {
+ inError = true;
+ try {
+ out.write("Error:\n");
+ out.write(e.getMessage());
+ out.write("\nFile: ");
+ String systemId = e.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(e.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(e.getColumnNumber()));
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new SAXException(e1);
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
+ */
+ public void fatalError(SAXParseException e) throws SAXException {
+ inError = true;
+ try {
+ out.write("Fatal Error:\n");
+ out.write(e.getMessage());
+ out.write("\nFile: ");
+ String systemId = e.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(e.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(e.getColumnNumber()));
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new SAXException(e1);
+ }
+ }
+
+ /**
+ * Returns the inError.
+ *
+ * @return the inError
+ */
+ public boolean isInError() {
+ return inError;
+ }
+
+ public void reset() {
+ inError = false;
+ }
+
+ public void error(TransformerException e) throws TransformerException {
+ inError = true;
+ try {
+ out.write("Error:\n");
+ out.write(e.getMessage());
+ SourceLocator sourceLocator = e.getLocator();
+ if (sourceLocator != null) {
+ out.write("\nFile: ");
+ String systemId = sourceLocator.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(sourceLocator.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(sourceLocator.getColumnNumber()));
+ }
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new TransformerException(e1);
+ }
+ }
+
+ public void fatalError(TransformerException e)
+ throws TransformerException {
+ inError = true;
+ try {
+ out.write("Fatal Error:\n");
+ out.write(e.getMessage());
+ SourceLocator sourceLocator = e.getLocator();
+ if (sourceLocator != null) {
+ out.write("\nFile: ");
+ String systemId = sourceLocator.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(sourceLocator.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(sourceLocator.getColumnNumber()));
+ }
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new TransformerException(e1);
+ }
+ }
+
+ public void warning(TransformerException e)
+ throws TransformerException {
+ try {
+ out.write("Warning:\n");
+ out.write(e.getMessage());
+ SourceLocator sourceLocator = e.getLocator();
+ if (sourceLocator != null) {
+ out.write("\nFile: ");
+ String systemId = sourceLocator.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(sourceLocator.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(sourceLocator.getColumnNumber()));
+ }
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new TransformerException(e1);
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java
new file mode 100644
index 000000000..0fa5972c8
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.impl.ElementName;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class TokenPrinter implements TokenHandler, ErrorHandler {
+
+ private final Writer writer;
+
+ public void characters(char[] buf, int start, int length)
+ throws SAXException {
+ try {
+ boolean lineStarted = true;
+ writer.write('-');
+ for (int i = start; i < start + length; i++) {
+ if (!lineStarted) {
+ writer.write("\n-");
+ lineStarted = true;
+ }
+ char c = buf[i];
+ if (c == '\n') {
+ writer.write("\\n");
+ lineStarted = false;
+ } else {
+ writer.write(c);
+ }
+ }
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void comment(char[] buf, int start, int length) throws SAXException {
+ try {
+ writer.write('!');
+ writer.write(buf, start, length);
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
+ try {
+ writer.write('D');
+ writer.write(name);
+ writer.write(' ');
+ writer.write("" + forceQuirks);
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endTag(ElementName eltName) throws SAXException {
+ try {
+ writer.write(')');
+ writer.write(eltName.name);
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void eof() throws SAXException {
+ try {
+ writer.write("E\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startTokenization(Tokenizer self) throws SAXException {
+
+ }
+
+ public void startTag(ElementName eltName, HtmlAttributes attributes, boolean selfClosing)
+ throws SAXException {
+ try {
+ writer.write('(');
+ writer.write(eltName.name);
+ writer.write('\n');
+ for (int i = 0; i < attributes.getLength(); i++) {
+ writer.write('A');
+ writer.write(attributes.getQNameNoBoundsCheck(i));
+ writer.write(' ');
+ writer.write(attributes.getValueNoBoundsCheck(i));
+ writer.write('\n');
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public boolean wantsComments() throws SAXException {
+ return true;
+ }
+
+ public static void main(String[] args) throws SAXException, IOException {
+ TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8"));
+ Driver tokenizer = new Driver(new ErrorReportingTokenizer(printer));
+ tokenizer.setErrorHandler(printer);
+ File file = new File(args[0]);
+ InputSource is = new InputSource(new FileInputStream(file));
+ is.setSystemId(file.toURI().toASCIIString());
+ tokenizer.tokenize(is);
+ }
+
+ /**
+ * @param writer
+ */
+ public TokenPrinter(final Writer writer) {
+ this.writer = writer;
+ }
+
+ public void error(SAXParseException exception) throws SAXException {
+ try {
+ writer.write("R ");
+ writer.write(exception.getMessage());
+ writer.write("\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void fatalError(SAXParseException exception) throws SAXException {
+ try {
+ writer.write("F ");
+ writer.write(exception.getMessage());
+ writer.write("\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void warning(SAXParseException exception) throws SAXException {
+ try {
+ writer.write("W ");
+ writer.write(exception.getMessage());
+ writer.write("\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endTokenization() throws SAXException {
+ try {
+ writer.flush();
+ writer.close();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ @Override public void zeroOriginatingReplacementCharacter()
+ throws SAXException {
+ try {
+ writer.write("0\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ @Override public boolean cdataSectionAllowed() throws SAXException {
+ return false;
+ }
+
+ @Override public void ensureBufferSpace(int inputLength)
+ throws SAXException {
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java
new file mode 100644
index 000000000..76ea7543a
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import antlr.RecognitionException;
+import antlr.TokenStreamException;
+
+import com.sdicons.json.model.JSONArray;
+import com.sdicons.json.model.JSONObject;
+import com.sdicons.json.model.JSONString;
+import com.sdicons.json.model.JSONValue;
+import com.sdicons.json.parser.JSONParser;
+
+public class TokenizerTester {
+
+ private static JSONString PLAINTEXT = new JSONString("PLAINTEXT state");
+
+ private static JSONString PCDATA = new JSONString("DATA state");
+
+ private static JSONString RCDATA = new JSONString("RCDATA state");
+
+ private static JSONString RAWTEXT = new JSONString("RAWTEXT state");
+
+ private static boolean jsonDeepEquals(JSONValue one, JSONValue other) {
+ if (one.isSimple()) {
+ return one.equals(other);
+ } else if (one.isArray()) {
+ if (other.isArray()) {
+ JSONArray oneArr = (JSONArray) one;
+ JSONArray otherArr = (JSONArray) other;
+ return oneArr.getValue().equals(otherArr.getValue());
+ } else {
+ return false;
+ }
+ } else if (one.isObject()) {
+ if (other.isObject()) {
+ JSONObject oneObject = (JSONObject) one;
+ JSONObject otherObject = (JSONObject) other;
+ return oneObject.getValue().equals(otherObject.getValue());
+ } else {
+ return false;
+ }
+ } else {
+ throw new RuntimeException("Should never happen.");
+ }
+ }
+
+ private JSONArray tests;
+
+ private final JSONArrayTokenHandler tokenHandler;
+
+ private final Driver driver;
+
+ private final Writer writer;
+
+ private TokenizerTester(InputStream stream) throws TokenStreamException,
+ RecognitionException, UnsupportedEncodingException {
+ tokenHandler = new JSONArrayTokenHandler();
+ driver = new Driver(new ErrorReportingTokenizer(tokenHandler));
+ driver.setCommentPolicy(XmlViolationPolicy.ALLOW);
+ driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
+ driver.setContentSpacePolicy(XmlViolationPolicy.ALLOW);
+ driver.setNamePolicy(XmlViolationPolicy.ALLOW);
+ driver.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
+ driver.setErrorHandler(tokenHandler);
+ writer = new OutputStreamWriter(System.out, "UTF-8");
+ JSONParser jsonParser = new JSONParser(new InputStreamReader(stream,
+ "UTF-8"));
+ JSONObject obj = (JSONObject) jsonParser.nextValue();
+ tests = (JSONArray) obj.get("tests");
+ if (tests == null) {
+ tests = (JSONArray) obj.get("xmlViolationTests");
+ driver.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ driver.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
+ driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ }
+ }
+
+ private void runTests() throws SAXException, IOException {
+ for (JSONValue val : tests.getValue()) {
+ runTest((JSONObject) val);
+ }
+ writer.flush();
+ }
+
+ private void runTest(JSONObject test) throws SAXException, IOException {
+ String inputString = ((JSONString) test.get("input")).getValue();
+ JSONArray expectedTokens = (JSONArray) test.get("output");
+ String description = ((JSONString) test.get("description")).getValue();
+ JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag"));
+ String lastStartTag = lastStartTagJSON == null ? null
+ : lastStartTagJSON.getValue();
+ JSONArray contentModelFlags = (JSONArray) test.get("initialStates");
+ if (contentModelFlags == null) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.DATA, null);
+ } else {
+ for (JSONValue value : contentModelFlags.getValue()) {
+ if (PCDATA.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.DATA, lastStartTag);
+ } else if (RAWTEXT.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.RAWTEXT, lastStartTag);
+ } else if (RCDATA.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.RCDATA, lastStartTag);
+ } else if (PLAINTEXT.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.PLAINTEXT, lastStartTag);
+ } else {
+ throw new RuntimeException("Broken test data.");
+ }
+ }
+ }
+ }
+
+ /**
+ * @param contentModelElement
+ * @param contentModelFlag
+ * @param test
+ * @throws SAXException
+ * @throws IOException
+ */
+ private void runTestInner(String inputString, JSONArray expectedTokens,
+ String description, int contentModelFlag,
+ String contentModelElement) throws SAXException, IOException {
+ tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement);
+ InputSource is = new InputSource(new StringReader(inputString));
+ try {
+ driver.tokenize(is);
+ JSONArray actualTokens = tokenHandler.getArray();
+ if (jsonDeepEquals(actualTokens, expectedTokens)) {
+ writer.write("Success\n");
+ } else {
+ writer.write("Failure\n");
+ writer.write(description);
+ writer.write("\nInput:\n");
+ writer.write(inputString);
+ writer.write("\nExpected tokens:\n");
+ writer.write(expectedTokens.render(false));
+ writer.write("\nActual tokens:\n");
+ writer.write(actualTokens.render(false));
+ writer.write("\n");
+ }
+ } catch (Throwable t) {
+ writer.write("Failure\n");
+ writer.write(description);
+ writer.write("\nInput:\n");
+ writer.write(inputString);
+ writer.write("\n");
+ t.printStackTrace(new PrintWriter(writer, false));
+ }
+ }
+
+ /**
+ * @param args
+ * @throws RecognitionException
+ * @throws TokenStreamException
+ * @throws IOException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws TokenStreamException,
+ RecognitionException, SAXException, IOException {
+ for (int i = 0; i < args.length; i++) {
+ TokenizerTester tester = new TokenizerTester(new FileInputStream(
+ args[i]));
+ tester.runTests();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java
new file mode 100644
index 000000000..9b95b763e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class TreeDumpContentHandler implements ContentHandler, LexicalHandler {
+
+ private final Writer writer;
+
+ private int level = 0;
+
+ private boolean inCharacters = false;
+
+ private boolean close;
+
+ /**
+ * @param writer
+ */
+ public TreeDumpContentHandler(final Writer writer, boolean close) {
+ this.writer = writer;
+ this.close = close;
+ }
+
+ public TreeDumpContentHandler(final Writer writer) {
+ this(writer, true);
+ }
+
+ private void printLead() throws IOException {
+ if (inCharacters) {
+ writer.write("\"\n");
+ inCharacters = false;
+ }
+ writer.write("| ");
+ for (int i = 0; i < level; i++) {
+ writer.write(" ");
+ }
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ if (!inCharacters) {
+ printLead();
+ writer.write('"');
+ inCharacters = true;
+ }
+ writer.write(ch, start, length);
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ try {
+ if (inCharacters) {
+ writer.write("\"\n");
+ inCharacters = false;
+ }
+ level--;
+ if ("http://www.w3.org/1999/xhtml" == uri &&
+ "template" == localName) {
+ // decrement level for the "content"
+ level--;
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startElement(String uri, String localName, String qName,
+ Attributes atts) throws SAXException {
+ try {
+ printLead();
+ writer.write('<');
+ if ("http://www.w3.org/1998/Math/MathML" == uri) {
+ writer.write("math ");
+ } else if ("http://www.w3.org/2000/svg" == uri) {
+ writer.write("svg ");
+ } else if ("http://www.w3.org/1999/xhtml" != uri) {
+ writer.write("otherns ");
+ }
+ writer.write(localName);
+ writer.write(">\n");
+ level++;
+ TreeMap<String, String> map = new TreeMap<String, String>();
+ for (int i = 0; i < atts.getLength(); i++) {
+ String ns = atts.getURI(i);
+ String name;
+ if ("http://www.w3.org/1999/xlink" == ns) {
+ name = "xlink " + atts.getLocalName(i);
+ } else if ("http://www.w3.org/XML/1998/namespace" == ns) {
+ name = "xml " + atts.getLocalName(i);
+ } else if ("http://www.w3.org/2000/xmlns/" == ns) {
+ name = "xmlns " + atts.getLocalName(i);
+ } else if ("" != uri) {
+ name = atts.getLocalName(i);
+ } else {
+ name = "otherns " + atts.getLocalName(i);
+ }
+ map.put(name, atts.getValue(i));
+ }
+ for (Map.Entry<String, String> entry : map.entrySet()) {
+ printLead();
+ writer.write(entry.getKey());
+ writer.write("=\"");
+ writer.write(entry.getValue());
+ writer.write("\"\n");
+ }
+ if ("http://www.w3.org/1999/xhtml" == uri &&
+ "template" == localName) {
+ printLead();
+ level++;
+ writer.write("content\n");
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void comment(char[] ch, int offset, int len) throws SAXException {
+ try {
+ printLead();
+ writer.write("<!-- ");
+ writer.write(ch, offset, len);
+ writer.write(" -->\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startDTD(String name, String publicIdentifier,
+ String systemIdentifier) throws SAXException {
+ try {
+ printLead();
+ writer.write("<!DOCTYPE ");
+ writer.write(name);
+ if (publicIdentifier.length() > 0 || systemIdentifier.length() > 0) {
+ writer.write(' ');
+ writer.write('\"');
+ writer.write(publicIdentifier);
+ writer.write('\"');
+ writer.write(' ');
+ writer.write('\"');
+ writer.write(systemIdentifier);
+ writer.write('\"');
+ }
+ writer.write(">\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endDocument() throws SAXException {
+ try {
+ if (inCharacters) {
+ writer.write("\"\n");
+ inCharacters = false;
+ }
+ if (close) {
+ writer.flush();
+ writer.close();
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ }
+
+ public void startEntity(String arg0) throws SAXException {
+ }
+
+ public void endCDATA() throws SAXException {
+ }
+
+ public void endDTD() throws SAXException {
+ }
+
+ public void endEntity(String arg0) throws SAXException {
+ }
+
+ public void startCDATA() throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException {
+ }
+
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+ public void startDocument() throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java
new file mode 100644
index 000000000..c09169383
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+
+public class TreePrinter {
+
+ public static void main(String[] args) throws SAXException, IOException {
+ TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(new OutputStreamWriter(System.out, "UTF-8"));
+ HtmlParser htmlParser = new HtmlParser();
+ htmlParser.setContentHandler(treeDumpContentHandler);
+ htmlParser.setLexicalHandler(treeDumpContentHandler);
+ htmlParser.setErrorHandler(new SystemErrErrorHandler());
+ htmlParser.setXmlPolicy(XmlViolationPolicy.ALLOW);
+ File file = new File(args[0]);
+ InputSource is = new InputSource(new FileInputStream(file));
+ is.setSystemId(file.toURI().toASCIIString());
+ htmlParser.parse(is);
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java
new file mode 100644
index 000000000..62d3ab530
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.StringWriter;
+import java.util.LinkedList;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXParseException;
+
+public class TreeTester {
+
+ private final BufferedInputStream aggregateStream;
+
+ private boolean streaming = false;
+
+ /**
+ * @param aggregateStream
+ */
+ public TreeTester(InputStream aggregateStream) {
+ this.aggregateStream = new BufferedInputStream(aggregateStream);
+ }
+
+ private void runTests() throws Throwable {
+ if (aggregateStream.read() != '#') {
+ System.err.println("No hash at start!");
+ return;
+ }
+ while (runTest()) {
+ // spin
+ }
+ }
+
+ private boolean runTest() throws Throwable {
+ UntilHashInputStream stream = null;
+ try {
+ String context = null;
+ boolean scriptingEnabled = true;
+ boolean hadScriptingDirective = false;
+ aggregateStream.mark(12288);
+ if (skipLabel()) { // #data
+ return false;
+ }
+ stream = new UntilHashInputStream(aggregateStream);
+ while (stream.read() != -1) {
+ // spin
+ }
+ if (skipLabel()) { // #errors
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ stream = new UntilHashInputStream(aggregateStream);
+ while (stream.read() != -1) {
+ // spin
+ }
+
+ StringBuilder sb = new StringBuilder();
+ int c;
+ while ((c = aggregateStream.read()) != '\n') {
+ sb.append((char) c);
+ }
+ String label = sb.toString();
+ if ("document-fragment".equals(label)) {
+ sb.setLength(0);
+ while ((c = aggregateStream.read()) != '\n') {
+ sb.append((char) c);
+ }
+ context = sb.toString();
+ // Now potentially gather #script-on/off
+ sb.setLength(0);
+ while ((c = aggregateStream.read()) != '\n') {
+ sb.append((char) c);
+ }
+ label = sb.toString();
+ }
+ if ("script-on".equals(label)) {
+ hadScriptingDirective = true;
+ } else if ("script-off".equals(label)) {
+ hadScriptingDirective = true;
+ scriptingEnabled = false;
+ }
+ aggregateStream.reset();
+ if (skipLabel()) { // #data
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ stream = new UntilHashInputStream(aggregateStream);
+ InputSource is = new InputSource(stream);
+ is.setEncoding("UTF-8");
+ StringWriter sw = new StringWriter();
+ ListErrorHandler leh = new ListErrorHandler();
+ TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(
+ sw);
+ HtmlParser htmlParser = new HtmlParser(XmlViolationPolicy.ALLOW);
+ if (streaming) {
+ htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
+ }
+ htmlParser.setContentHandler(treeDumpContentHandler);
+ htmlParser.setLexicalHandler(treeDumpContentHandler);
+ htmlParser.setErrorHandler(leh);
+ htmlParser.setScriptingEnabled(scriptingEnabled);
+ try {
+ if (context == null) {
+ htmlParser.parse(is);
+ } else {
+ String ns = "http://www.w3.org/1999/xhtml";
+ if (context.startsWith("svg ")) {
+ ns = "http://www.w3.org/2000/svg";
+ context = context.substring(4);
+ } else if (context.startsWith("math ")) {
+ ns = "http://www.w3.org/1998/Math/MathML";
+ context = context.substring(5);
+ }
+ htmlParser.parseFragment(is, context, ns);
+ treeDumpContentHandler.endDocument();
+ }
+ } catch (SAXParseException e) {
+ // ignore
+ }
+ stream.close();
+
+ if (skipLabel()) { // #errors
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ LinkedList<String> expectedErrors = new LinkedList<String>();
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ new UntilHashInputStream(aggregateStream), "UTF-8"));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ expectedErrors.add(line);
+ }
+
+ if (context != null) {
+ if (skipLabel()) { // #document-fragment
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ UntilHashInputStream stream2 = new UntilHashInputStream(aggregateStream);
+ while (stream2.read() != -1) {
+ // spin
+ }
+ }
+ if (hadScriptingDirective && skipLabel()) { // #script-on/off
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+
+ if (skipLabel()) { // #document
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+
+ StringBuilder expectedBuilder = new StringBuilder();
+ br = new BufferedReader(new InputStreamReader(
+ new UntilHashInputStream(aggregateStream), "UTF-8"));
+ int ch;
+ while ((ch = br.read()) != -1) {
+ expectedBuilder.append((char)ch);
+ }
+ String expected = expectedBuilder.toString();
+ String actual = sw.toString();
+
+ LinkedList<String> actualErrors = leh.getErrors();
+
+ if (expected.equals(actual) || (streaming && leh.isFatal()) /*
+ * && expectedErrors.size() ==
+ * actualErrors.size()
+ */) {
+ System.err.println("Success.");
+ // System.err.println(stream);
+ } else {
+ System.err.print("Failure.\nData:\n" + stream + "\nExpected:\n"
+ + expected + "Got: \n" + actual);
+ System.err.println("Expected errors:");
+ for (String err : expectedErrors) {
+ System.err.println(err);
+ }
+ System.err.println("Actual errors:");
+ for (String err : actualErrors) {
+ System.err.println(err);
+ }
+ }
+ } catch (Throwable t) {
+ System.err.println("Failure.\nData:\n" + stream);
+ throw t;
+ }
+ return true;
+ }
+
+ private boolean skipLabel() throws IOException {
+ int b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ }
+ for (;;) {
+ b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ } else if (b == 0x0A) {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * @param args
+ * @throws Throwable
+ */
+ public static void main(String[] args) throws Throwable {
+ for (int i = 0; i < args.length; i++) {
+ TreeTester tester = new TreeTester(new FileInputStream(args[i]));
+ tester.runTests();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java
new file mode 100644
index 000000000..473a9f7f9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class UntilHashInputStream extends InputStream {
+
+ private final StringBuilder builder = new StringBuilder();
+
+ private final InputStream delegate;
+
+ private int buffer = -1;
+
+ private boolean closed = false;
+
+ /**
+ * @param delegate
+ * @throws IOException
+ */
+ public UntilHashInputStream(final InputStream delegate) throws IOException {
+ this.delegate = delegate;
+ this.buffer = delegate.read();
+ if (buffer == '#') {
+ closed = true;
+ }
+ }
+
+ public int read() throws IOException {
+ if (closed) {
+ return -1;
+ }
+ int rv = buffer;
+ buffer = delegate.read();
+ if (buffer == '#' && rv == '\n') {
+ // end of stream
+ closed = true;
+ return -1;
+ } else {
+ if (rv >= 0x20 && rv < 0x80) {
+ builder.append(((char)rv));
+ } else {
+ builder.append("0x");
+ builder.append(Integer.toHexString(rv));
+ }
+ return rv;
+ }
+ }
+
+ /**
+ * @see java.io.InputStream#close()
+ */
+ @Override
+ public void close() throws IOException {
+ super.close();
+ if (closed) {
+ return;
+ }
+ for (;;) {
+ int b = delegate.read();
+ if (b == 0x23 || b == -1) {
+ break;
+ }
+ }
+ closed = true;
+ }
+
+ /**
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return builder.toString();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java
new file mode 100644
index 000000000..0d23fda3c
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+import nu.validator.htmlparser.sax.XmlSerializer;
+
+public class XmlSerializerTester {
+
+
+
+ /**
+ * @param args
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws SAXException {
+ AttributesImpl attrs = new AttributesImpl();
+ XmlSerializer serializer = new XmlSerializer(System.out);
+ serializer.startDocument();
+ serializer.startElement("1", "a", null, attrs);
+ serializer.startElement("1", "b", null, attrs);
+ serializer.endElement("1", "b", null);
+ serializer.startElement("2", "c", null, attrs);
+ serializer.endElement("2", "c", null);
+ attrs.addAttribute("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about", null, "CDATA", "");
+ serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null, attrs);
+ serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null);
+ serializer.startPrefixMapping("rdf", "foo");
+ serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null, attrs);
+ serializer.startPrefixMapping("p0", "bar");
+ serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null, attrs);
+ serializer.characters("a\uD834\uDD21a\uD834a\uDD21a".toCharArray(), 0, 8);
+ serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null);
+ serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null);
+
+ serializer.endPrefixMapping("rdf");
+ serializer.endElement("1", "a", null);
+ serializer.endDocument();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java
new file mode 100644
index 000000000..66d706ae9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import nu.xom.Attribute;
+import nu.xom.Element;
+
+public class XomTest {
+ public static void main(String[] args) {
+ Element elt = new Element("html", "http://www.w3.org/1999/xhtml");
+ elt.addAttribute(new Attribute("xmlns:foo", "bar"));
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html
new file mode 100644
index 000000000..57809b84e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>Test drivers.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java
new file mode 100644
index 000000000..5e2cf1f58
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class HTML2HTML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new HtmlSerializer(out);
+
+ HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
+
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java
new file mode 100644
index 000000000..57666f93b
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class HTML2XML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new XmlSerializer(out);
+
+ HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
+
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java
new file mode 100644
index 000000000..dad89a5b2
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+public class XML2HTML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new HtmlSerializer(out);
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader parser = factory.newSAXParser().getXMLReader();
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java
new file mode 100644
index 000000000..2f6aa24d8
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.sax.NameCheckingXmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+public class XML2XML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new NameCheckingXmlSerializer(out);
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader parser = factory.newSAXParser().getXMLReader();
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java
new file mode 100644
index 000000000..05d8193c1
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.Templates;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.sax.SAXResult;
+import javax.xml.transform.sax.SAXTransformerFactory;
+import javax.xml.transform.sax.TemplatesHandler;
+import javax.xml.transform.sax.TransformerHandler;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
+import nu.validator.htmlparser.sax.HtmlParser;
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.w3c.dom.Document;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.LexicalHandler;
+
+public class XSLT4HTML5 {
+
+ private enum Mode {
+ STREAMING_SAX, BUFFERED_SAX, DOM,
+ }
+
+ private static final String TEMPLATE = "--template=";
+
+ private static final String INPUT_HTML = "--input-html=";
+
+ private static final String INPUT_XML = "--input-xml=";
+
+ private static final String OUTPUT_HTML = "--output-html=";
+
+ private static final String OUTPUT_XML = "--output-xml=";
+
+ private static final String MODE = "--mode=";
+
+ /**
+ * @param args
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ * @throws MalformedURLException
+ * @throws TransformerException
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException, TransformerException {
+ if (args.length == 0) {
+ System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
+ System.exit(0);
+ }
+ String template = null;
+ String input = null;
+ boolean inputHtml = false;
+ String output = null;
+ boolean outputHtml = false;
+ Mode mode = null;
+ for (int i = 0; i < args.length; i++) {
+ String arg = args[i];
+ if (arg.startsWith(TEMPLATE)) {
+ if (template == null) {
+ template = arg.substring(TEMPLATE.length());
+ } else {
+ System.err.println("Tried to set template twice.");
+ System.exit(1);
+ }
+ } else if (arg.startsWith(INPUT_HTML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_HTML.length());
+ inputHtml = true;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(INPUT_XML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_XML.length());
+ inputHtml = false;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(OUTPUT_HTML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_HTML.length());
+ outputHtml = true;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ } else if (arg.startsWith(OUTPUT_XML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_XML.length());
+ outputHtml = false;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ } else if (arg.startsWith(MODE)) {
+ if (mode == null) {
+ String modeStr = arg.substring(MODE.length());
+ if ("dom".equals(modeStr)) {
+ mode = Mode.DOM;
+ } else if ("sax-buffered".equals(modeStr)) {
+ mode = Mode.BUFFERED_SAX;
+ } else if ("sax-streaming".equals(modeStr)) {
+ mode = Mode.STREAMING_SAX;
+ } else {
+ System.err.println("Unrecognized mode.");
+ System.exit(5);
+ }
+ } else {
+ System.err.println("Tried to set mode twice.");
+ System.exit(4);
+ }
+ }
+ }
+
+ if (template == null) {
+ System.err.println("No template specified.");
+ System.exit(6);
+ }
+ if (input == null) {
+ System.err.println("No input specified.");
+ System.exit(7);
+ }
+ if (output == null) {
+ System.err.println("No output specified.");
+ System.exit(8);
+ }
+ if (mode == null) {
+ mode = Mode.BUFFERED_SAX;
+ }
+
+ SystemErrErrorHandler errorHandler = new SystemErrErrorHandler();
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader reader = factory.newSAXParser().getXMLReader();
+ reader.setErrorHandler(errorHandler);
+
+ SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
+ transformerFactory.setErrorListener(errorHandler);
+ TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
+ reader.setContentHandler(templatesHandler);
+ reader.parse(new File(template).toURI().toASCIIString());
+
+ Templates templates = templatesHandler.getTemplates();
+
+ FileOutputStream outputStream = new FileOutputStream(output);
+ ContentHandler serializer;
+ if (outputHtml) {
+ serializer = new HtmlSerializer(outputStream);
+ } else {
+ serializer = new XmlSerializer(outputStream);
+ }
+ SAXResult result = new SAXResult(new XmlnsDropper(serializer));
+ result.setLexicalHandler((LexicalHandler) serializer);
+
+ if (mode == Mode.DOM) {
+ Document inputDoc;
+ DocumentBuilder builder;
+ if (inputHtml) {
+ builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET);
+ } else {
+ DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
+ factory.setNamespaceAware(true);
+ try {
+ builder = builderFactory.newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ inputDoc = builder.parse(new File(input));
+ DOMSource inputSource = new DOMSource(inputDoc,
+ new File(input).toURI().toASCIIString());
+ Transformer transformer = templates.newTransformer();
+ transformer.setErrorListener(errorHandler);
+ transformer.transform(inputSource, result);
+ } else {
+ if (inputHtml) {
+ reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
+ if (mode == Mode.STREAMING_SAX) {
+ reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL);
+ }
+ }
+ TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates);
+ transformerHandler.setResult(result);
+ reader.setErrorHandler(errorHandler);
+ reader.setContentHandler(transformerHandler);
+ reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler);
+ reader.parse(new File(input).toURI().toASCIIString());
+ }
+ outputStream.flush();
+ outputStream.close();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java
new file mode 100644
index 000000000..b364cc521
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.xom.HtmlBuilder;
+import nu.xom.Builder;
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Nodes;
+import nu.xom.ParsingException;
+import nu.xom.Serializer;
+import nu.xom.ValidityException;
+import nu.xom.converters.SAXConverter;
+import nu.xom.xslt.XSLException;
+import nu.xom.xslt.XSLTransform;
+
+import org.xml.sax.SAXException;
+
+public class XSLT4HTML5XOM {
+
+ private static final String TEMPLATE = "--template=";
+
+ private static final String INPUT_HTML = "--input-html=";
+
+ private static final String INPUT_XML = "--input-xml=";
+
+ private static final String OUTPUT_HTML = "--output-html=";
+
+ private static final String OUTPUT_XML = "--output-xml=";
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws ParsingException
+ * @throws ValidityException
+ * @throws XSLException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws ValidityException,
+ ParsingException, IOException, XSLException, SAXException {
+ if (args.length == 0) {
+ System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
+ System.exit(0);
+ }
+ String template = null;
+ String input = null;
+ boolean inputHtml = false;
+ String output = null;
+ boolean outputHtml = false;
+ for (int i = 0; i < args.length; i++) {
+ String arg = args[i];
+ if (arg.startsWith(TEMPLATE)) {
+ if (template == null) {
+ template = arg.substring(TEMPLATE.length());
+ } else {
+ System.err.println("Tried to set template twice.");
+ System.exit(1);
+ }
+ } else if (arg.startsWith(INPUT_HTML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_HTML.length());
+ inputHtml = true;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(INPUT_XML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_XML.length());
+ inputHtml = false;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(OUTPUT_HTML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_HTML.length());
+ outputHtml = true;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ } else if (arg.startsWith(OUTPUT_XML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_XML.length());
+ outputHtml = false;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ }
+ }
+
+ if (template == null) {
+ System.err.println("No template specified.");
+ System.exit(6);
+ }
+ if (input == null) {
+ System.err.println("No input specified.");
+ System.exit(7);
+ }
+ if (output == null) {
+ System.err.println("No output specified.");
+ System.exit(8);
+ }
+
+ Builder builder = new Builder();
+
+ Document transformationDoc = builder.build(new File(template));
+
+ XSLTransform transform = new XSLTransform(transformationDoc);
+
+ FileOutputStream outputStream = new FileOutputStream(output);
+
+ Document inputDoc;
+ if (inputHtml) {
+ builder = new HtmlBuilder(XmlViolationPolicy.ALTER_INFOSET);
+ }
+ inputDoc = builder.build(new File(input));
+ Nodes result = transform.transform(inputDoc);
+ Document outputDoc = new Document((Element) result.get(0));
+ if (outputHtml) {
+ HtmlSerializer htmlSerializer = new HtmlSerializer(outputStream);
+ SAXConverter converter = new SAXConverter(htmlSerializer);
+ converter.setLexicalHandler(htmlSerializer);
+ converter.convert(outputDoc);
+ } else {
+ Serializer serializer = new Serializer(outputStream);
+ serializer.write(outputDoc);
+ }
+ outputStream.flush();
+ outputStream.close();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java
new file mode 100644
index 000000000..0e6d4b1c2
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * Quick and dirty hack to work around Xalan xmlns weirdness.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+class XmlnsDropper implements ContentHandler {
+
+ private final ContentHandler delegate;
+
+ /**
+ * @param delegate
+ */
+ public XmlnsDropper(final ContentHandler delegate) {
+ this.delegate = delegate;
+ }
+
+ /**
+ * @param ch
+ * @param start
+ * @param length
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ delegate.characters(ch, start, length);
+ }
+
+ /**
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#endDocument()
+ */
+ public void endDocument() throws SAXException {
+ delegate.endDocument();
+ }
+
+ /**
+ * @param uri
+ * @param localName
+ * @param qName
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ delegate.endElement(uri, localName, qName);
+ }
+
+ /**
+ * @param prefix
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
+ */
+ public void endPrefixMapping(String prefix) throws SAXException {
+ delegate.endPrefixMapping(prefix);
+ }
+
+ /**
+ * @param ch
+ * @param start
+ * @param length
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
+ */
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ delegate.ignorableWhitespace(ch, start, length);
+ }
+
+ /**
+ * @param target
+ * @param data
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
+ */
+ public void processingInstruction(String target, String data) throws SAXException {
+ delegate.processingInstruction(target, data);
+ }
+
+ /**
+ * @param locator
+ * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
+ */
+ public void setDocumentLocator(Locator locator) {
+ delegate.setDocumentLocator(locator);
+ }
+
+ /**
+ * @param name
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
+ */
+ public void skippedEntity(String name) throws SAXException {
+ delegate.skippedEntity(name);
+ }
+
+ /**
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#startDocument()
+ */
+ public void startDocument() throws SAXException {
+ delegate.startDocument();
+ }
+
+ /**
+ * @param uri
+ * @param localName
+ * @param qName
+ * @param atts
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
+ */
+ public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
+ AttributesImpl ai = new AttributesImpl();
+ for (int i = 0; i < atts.getLength(); i++) {
+ String u = atts.getURI(i);
+ String t = atts.getType(i);
+ String v = atts.getValue(i);
+ String n = atts.getLocalName(i);
+ String q = atts.getQName(i);
+ if (q != null) {
+ if ("xmlns".equals(q) || q.startsWith("xmlns:")) {
+ continue;
+ }
+ }
+ ai.addAttribute(u, n, q, t, v);
+ }
+ delegate.startElement(uri, localName, qName, ai);
+ }
+
+ /**
+ * @param prefix
+ * @param uri
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
+ */
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ delegate.startPrefixMapping(prefix, uri);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html
new file mode 100644
index 000000000..a04bf3cd0
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>Demo apps.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java
new file mode 100644
index 000000000..df391d4b4
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree.test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.saxtree.Node;
+import nu.validator.saxtree.TreeBuilder;
+import nu.validator.saxtree.TreeParser;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.LexicalHandler;
+
+public class PassThruPrinter {
+ public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException {
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader reader = factory.newSAXParser().getXMLReader();
+
+ TreeBuilder treeBuilder = new TreeBuilder();
+ reader.setContentHandler(treeBuilder);
+ reader.setProperty("http://xml.org/sax/properties/lexical-handler", treeBuilder);
+
+ File file = new File(args[0]);
+ InputSource is = new InputSource(new FileInputStream(file));
+ is.setSystemId(file.toURI().toASCIIString());
+ reader.parse(is);
+
+ Node doc = treeBuilder.getRoot();
+
+ ContentHandler xmlSerializer = new XmlSerializer(System.out);
+
+ TreeParser treeParser = new TreeParser(xmlSerializer, (LexicalHandler) xmlSerializer);
+ treeParser.parse(doc);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html
new file mode 100644
index 000000000..57809b84e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>Test drivers.</p>
+</body>
+</html> \ No newline at end of file