diff options
Diffstat (limited to 'parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java')
-rw-r--r-- | parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java new file mode 100644 index 000000000..3337a6555 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.io.Encoding; +import nu.validator.htmlparser.io.HtmlInputStreamReader; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; + +public class DecoderLoopTester { + + private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); + + private static final int NUMBER_OR_ASTRAL_CHARS = 24500; + + private void runTest(int padding) throws SAXException, IOException { + Encoding utf8 = Encoding.forName("UTF-8"); + char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS]; + byte[] byteArr; + int i = 0; + charArr[i++] = '\uFEFF'; + for (int j = 0; j < padding; j++) { + charArr[i++] = 'x'; + } + for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) { + int value = 0x10000 + j; + charArr[i++] = (char) (LEAD_OFFSET + (value >> 10)); + charArr[i++] = (char) (0xDC00 + (value & 0x3FF)); +// charArr[i++] = 'y'; +// charArr[i++] = 'z'; + + } + CharBuffer charBuffer = CharBuffer.wrap(charArr); + CharsetEncoder enc = utf8.newEncoder(); + enc.onMalformedInput(CodingErrorAction.REPORT); + enc.onUnmappableCharacter(CodingErrorAction.REPORT); + ByteBuffer byteBuffer = enc.encode(charBuffer); + byteArr = new byte[byteBuffer.limit()]; + byteBuffer.get(byteArr); + + ErrorHandler eh = new SystemErrErrorHandler(); + compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, Heuristics.NONE), padding, charArr, byteArr); + compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8), padding, charArr, byteArr); + } + + /** + * @param padding + * @param charArr + * @param byteArr + * @throws SAXException + * @throws IOException + */ + private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException { + char[] readBuffer = new char[2048]; + int offset = 0; + int num = 0; + int readNum = 0; + while ((num = reader.read(readBuffer)) != -1) { + for (int j = 0; j < num; j++) { + System.out.println(offset + j); + if (readBuffer[j] != charArr[offset + j]) { + throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum); + } + } + offset += num; + readNum++; + } + } + + void runTests() throws SAXException, IOException { + for (int i = 0; i < 4; i++) { + runTest(i); + } + } + + /** + * @param args + * @throws IOException + * @throws SAXException + */ + public static void main(String[] args) throws IOException, SAXException { + new DecoderLoopTester().runTests(); + } + +} |