summaryrefslogtreecommitdiffstats
path: root/parser/html/java/htmlparser/src/nu/validator
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-01-16 07:32:48 -0500
committerMatt A. Tobin <email@mattatobin.com>2020-01-16 07:32:48 -0500
commited60101550022a2650edc41cd3a63b35fea836c5 (patch)
treee6967e47f27945599ec09c4401f7932751315beb /parser/html/java/htmlparser/src/nu/validator
parentfa816e1ec69d865114b7d061905574038fbd425b (diff)
parent927c386dd8c9526d8695d0202a08735984dc7b31 (diff)
downloadUXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar
UXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar.gz
UXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar.lz
UXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar.xz
UXP-ed60101550022a2650edc41cd3a63b35fea836c5.zip
Merge branch 'html5-parser-work'
Diffstat (limited to 'parser/html/java/htmlparser/src/nu/validator')
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java59
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java185
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java184
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java185
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java80
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java95
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java886
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java57
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java64
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java61
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java63
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java184
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java57
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java187
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java186
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java181
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java189
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java56
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java189
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java189
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java188
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java194
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java192
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java191
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java185
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java182
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java182
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java184
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java59
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java75
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java62
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java56
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java56
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java57
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java197
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java192
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java186
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java27
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java27
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html30
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java44
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java59
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java47
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java46
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java58
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java52
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java35
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java53
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java48
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java357
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java259
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java736
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java84
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java77
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java268
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java2475
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java90
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java1614
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java772
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java620
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java60
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java856
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java495
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java944
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java311
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java152
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java136
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java297
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java206
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java43
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java7080
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java6553
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java129
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java153
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html30
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java79
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java27
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java597
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java395
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java512
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java199
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java42
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java235
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java1097
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java269
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java47
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java51
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java196
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java210
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java737
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java49
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java87
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java773
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java75
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java48
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java102
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java351
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java70
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java62
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java66
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java118
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java70
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java58
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java172
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java86
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java104
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java307
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java76
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java85
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java208
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java94
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java77
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java250
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java301
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/package.html46
148 files changed, 42470 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java
new file mode 100644
index 000000000..00e5f7ca7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Big5 extends Encoding {
+
+ private static final String[] LABELS = {
+ "big5",
+ "big5-hkscs",
+ "cn-big5",
+ "csbig5",
+ "x-x-big5"
+ };
+
+ private static final String NAME = "big5";
+
+ static final Big5 INSTANCE = new Big5();
+
+ private Big5() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new Big5Decoder(this);
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return new Big5Encoder(this);
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java
new file mode 100644
index 000000000..9f35be341
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+final class Big5Data {
+
+ private static final String ASTRALNESS = "\uF829\u7A22\u1290\uC5C4\u0007\u0200\u7549\"\u0000\uA000\u3859\u0300\u002C\u573E\uF72B\u6EFC\u90F2\u3B7B\u83E9\uF049\u9DA6\uBBFC\uBEF7\uFDFE\u0C83\uABD1\u7BFF\u7FBF\u1804\u002C\u4840\u2046\u0408\u2A22\u4858\u091A\u5100\u3122\uC000\u5000\uC00D\u6110\uD44C\u9A24\u0180\u0004\u92B2\u0209\u8631\u1242\u8140\u0351\uAB48\u7460\uD5A2\u3E5C\uE361\u1083\u720B\u08A0\u51D6\uE00A\u8100\u1686\uC443\u1135\u6037\u7AE6\u056D\u7D0C\u0E66\u81E0\u7F88\u2420\u2406\u1D03\u340C\u4268\u454A\uF13F\u080D\u8084\uBB00\u0C4D\u6ED6\u97D7\u41DF\u5D3E\uDA68\u305C\uB800\u26E9\u80BC\u0151\uE078\u89A1\u59C0\u9679\u3BCC\u5EDE\uBC2C\uDF9B\u6C5D\u046D\u6043\u4A36\uD860\u073E\uC8C4\u6C69\uD8B1\u8302\u0F88\u0973\u806E\u3B6B\u5A17\uA503\u2D52\u3F40\u1120\u4101\u5024\uB903\u90EE\u1079\u5CAD\u1820\uDA0A\u8060\u9E26\u6E73\u1021\u080E\u4368\u6FB2\u161F\u8AFE\u76B6\u763A\u8262\u1894\u1801\uFE7D\u578D\u1327\u5BD2\u1937\uDB8C\u4862\u0024\u0000\u0010\u8000\u0000\u0000\u0038\u3800\uB9E2\uFD7D\u75F8\uDCF7\u6FF3\uBBF2\uFF4A\uAE3F\u9FC5\uEAFF\uBABA\uBC5D\u9F73\uD8FA\uDED6\u4B25\u975E\u2ADA\u6DB9\u06E6\u9D36\u53F9\u6FC5\uF98A\u49BF\uDB5D\uFFF8\u14A6\uE605\u96F7\u0A99\u00E5\u0800\u3D81\u5002\u0102\uBF49\u475E\u036F\u6280\uEECA\u4819\u6081\u205A\u24F7\u0000\u0004\u0000\u2804\u22C8\u0200\u0000\u2010\u5082\u3040\u0001\u0010\u1284\u0041\u0504\u2000\uC100\u3F7F\uB059\u8AC1\uAFAF\uAC05\u033F\u0204\u7280\u420A\u0426\u02D0\u0EC3\u0958\u0A80\u20B5\u9206\u8B77\u0560\u21C9\u4606\u6038\uC048\u24B4\u84DE\uC0E0\u3364\u3154\u300D\u688A\u5F2B\u0626\u8496\uB108\uE890\uA394\u734F\u50B8\u0D11\uDFA4\u4003\u5D20\u8480\u6160\u51CE\u800A\u58B7\u0050\uE862\u6750\u7220\u1228";
+
+ private static final String TABLE0 = "\u43F0\u4C32\u4603\u45A6\u4578\u7267\u4D77\u45B3\u7CB1\u4CE2\u7CC5\u3B95\u4736\u4744\u4C47\u4C40\u42BF\u3617\u7352\u6E8B\u70D2\u4C57\uA351\u474F\u45DA\u4C85\u7C6C\u4D07\u4AA4\u46A1\u6B23\u7225\u5A54\u1A63\u3E06\u3F61\u664D\u56FB\u0000\u7D95\u591D\u8BB9\u3DF4\u9734\u7BEF\u5BDB\u1D5E\u5AA4\u3625\u9EB0\u5AD1\u5BB7\u5CFC\u676E\u8593\u9945\u7461\u749D\u3875\u1D53\u369E\u6021\u3EEC\u58DE\u3AF5\u7AFC\u9F97\u4161\u890D\u31EA\u0A8A\u325E\u430A\u8484\u9F96\u942F\u4930\u8613\u5896\u974A\u9218\u79D0\u7A32\u6660\u6A29\u889D\u744C\u7BC5\u6782\u7A2C\u524F\u9046\u34E6\u73C4\u5DB9\u74C6\u9FC7\u57B3\u492F\u544C\u4131\u368E\u5818\u7A72\u7B65\u8B8F\u46AE\u6E88\u4181\u5D99\u7BAE\u24BC\u9FC8\u24C1\u24C9\u24CC\u9FC9\u8504\u35BB\u40B4\u9FCA\u44E1\uADFF\u62C1\u706E\u9FCB";
+
+ private static final String TABLE1 = "\u31C0\u31C1\u31C2\u31C3\u31C4\u010C\u31C5\u00D1\u00CD\u31C6\u31C7\u00CB\u1FE8\u31C8\u00CA\u31C9\u31CA\u31CB\u31CC\u010E\u31CD\u31CE\u0100\u00C1\u01CD\u00C0\u0112\u00C9\u011A\u00C8\u014C\u00D3\u01D1\u00D2\u0000\u1EBE\u0000\u1EC0\u00CA\u0101\u00E1\u01CE\u00E0\u0251\u0113\u00E9\u011B\u00E8\u012B\u00ED\u01D0\u00EC\u014D\u00F3\u01D2\u00F2\u016B\u00FA\u01D4\u00F9\u01D6\u01D8\u01DA\u01DC\u00FC\u0000\u1EBF\u0000\u1EC1\u00EA\u0261\u23DA\u23DB";
+
+ private static final String TABLE2 = "\uA3A9\u1145\u0000\u650A\u0000\u0000\u4E3D\u6EDD\u9D4E\u91DF\u0000\u0000\u7735\u6491\u4F1A\u4F28\u4FA8\u5156\u5174\u519C\u51E4\u52A1\u52A8\u533B\u534E\u53D1\u53D8\u56E2\u58F0\u5904\u5907\u5932\u5934\u5B66\u5B9E\u5B9F\u5C9A\u5E86\u603B\u6589\u67FE\u6804\u6865\u6D4E\u70BC\u7535\u7EA4\u7EAC\u7EBA\u7EC7\u7ECF\u7EDF\u7F06\u7F37\u827A\u82CF\u836F\u89C6\u8BBE\u8BE2\u8F66\u8F67\u8F6E\u7411\u7CFC\u7DCD\u6946\u7AC9\u5227\u0000\u0000\u0000\u0000\u918C\u78B8\u915E\u80BC\u0000\u8D0B\u80F6\u09E7\u0000\u0000\u809F\u9EC7\u4CCD\u9DC9\u9E0C\u4C3E\u9DF6\u700E\u9E0A\uA133\u35C1\u0000\u6E9A\u823E\u7519\u0000\u4911\u9A6C\u9A8F\u9F99\u7987\u846C\u1DCA\u05D0\u2AE6\u4E24\u4E81\u4E80\u4E87\u4EBF\u4EEB\u4F37\u344C\u4FBD\u3E48\u5003\u5088\u347D\u3493\u34A5\u5186\u5905\u51DB\u51FC\u5205\u4E89\u5279\u5290\u5327\u35C7\u53A9\u3551\u53B0\u3553\u53C2\u5423\u356D\u3572\u3681\u5493\u54A3\u54B4\u54B9\u54D0\u54EF\u5518\u5523\u5528\u3598\u553F\u35A5\u35BF\u55D7\u35C5\u7D84\u5525\u0000\u0C42\u0D15\u512B\u5590\u2CC6\u39EC\u0341\u8E46\u4DB8\u94E5\u4053\u80BE\u777A\u2C38\u3A34\u47D5\u815D\u69F2\u4DEA\u64DD\u0D7C\u0FB4\u0CD5\u10F4\u648D\u8E7E\u0E96\u0C0B\u0F64\u2CA9\u8256\u44D3\u0000\u0D46\u9A4D\u80E9\u47F4\u4EA7\u2CC2\u9AB2\u3A67\u95F4\u3FED\u3506\u52C7\u97D4\u78C8\u2D44\u9D6E\u9815\u0000\u43D9\u60A5\u64B4\u54E3\u2D4C\u2BCA\u1077\u39FB\u106F\u66DA\u6716\u79A0\u64EA\u5052\u0C43\u8E68\u21A1\u8B4C\u0731\u0000\u480B\u01A9\u3FFA\u5873\u2D8D\u0000\u45C8\u04FC\u6097\u0F4C\u0D96\u5579\u40BB\u43BA\u0000\u4AB4\u2A66\u109D\u81AA\u98F5\u0D9C\u6379\u39FE\u2775\u8DC0\u56A1\u647C\u3E43\u0000\uA601\u0E09\u2ACF\u2CC9\u0000\u10C8\u39C2\u3992\u3A06\u829B\u3578\u5E49\u20C7\u5652\u0F31\u2CB2\u9720\u34BC\u6C3D\u4E3B\u0000\u0000\u7574\u2E8B\u2208\uA65B\u8CCD\u0E7A\u0C34\u681C\u7F93\u10CF\u2803\u2939\u35FB\u51E3\u0E8C\u0F8D\u0EAA\u3F93\u0F30\u0D47\u114F\u0E4C\u0000\u0EAB\u0BA9\u0D48\u10C0\u113D\u3FF9\u2696\u6432\u0FAD\u33F4\u7639\u2BCE\u0D7E\u0D7F\u2C51\u2C55\u3A18\u0E98\u10C7\u0F2E\uA632\u6B50\u8CD2\u8D99\u8CCA\u95AA\u54CC\u82C4\u55B9\u0000\u9EC3\u9C26\u9AB6\u775E\u2DEE\u7140\u816D\u80EC\u5C1C\u6572\u8134\u3797\u535F\u80BD\u91B6\u0EFA\u0E0F\u0E77\u0EFB\u35DD\u4DEB\u3609\u0CD6\u56AF\u27B5\u10C9\u0E10\u0E78\u1078\u1148\u8207\u1455\u0E79\u4E50\u2DA4\u5A54\u101D\u101E\u10F5\u10F6\u579C\u0E11\u7694\u82CD\u0FB5\u0E7B\u517E\u3703\u0FB6\u1180\u52D8\uA2BD\u49DA\u183A\u4177\u827C\u5899\u5268\u361A\u573D\u7BB2\u5B68\u4800\u4B2C\u9F27\u49E7\u9C1F\u9B8D\u5B74\u313D\u55FB\u35F2\u5689\u4E28\u5902\u1BC1\uF878\u9751\u0086\u4E5B\u4EBB\u353E\u5C23\u5F51\u5FC4\u38FA\u624C\u6535\u6B7A\u6C35\u6C3A\u706C\u722B\u4E2C\u72AD\u48E9\u7F52\u793B\u7CF9\u7F53\u626A\u34C1\u0000\u634B\u8002\u8080\u6612\u6951\u535D\u8864\u89C1\u78B2\u8BA0\u8D1D\u9485\u9578\u957F\u95E8\u8E0F\u97E6\u9875\u98CE\u98DE\u9963\u9810\u9C7C\u9E1F\u9EC4\u6B6F\uF907\u4E37\u0087\u961D\u6237\u94A2\u0000\u503B\u6DFE\u9C73\u9FA6\u3DC9\u888F\u414E\u7077\u5CF5\u4B20\u51CD\u3559\u5D30\u6122\u8A32\u8FA7\u91F6\u7191\u6719\u73BA\u3281\uA107\u3C8B\u1980\u4B10\u78E4\u7402\u51AE\u870F\u4009\u6A63\uA2BA\u4223\u860F\u0A6F\u7A2A\u9947\u8AEA\u9755\u704D\u5324\u207E\u93F4\u76D9\u89E3\u9FA7\u77DD\u4EA3\u4FF0\u50BC\u4E2F\u4F17\u9FA8\u5434\u7D8B\u5892\u58D0\u1DB6\u5E92\u5E99\u5FC2\u2712\u658B\u33F9\u6919\u6A43\u3C63\u6CFF\u0000\u7200\u4505\u738C\u3EDB\u4A13\u5B15\u74B9\u8B83\u5CA4\u5695\u7A93\u7BEC\u7CC3\u7E6C\u82F8\u8597\u9FA9\u8890\u9FAA\u8EB9\u9FAB\u8FCF\u855F\u99E0\u9221\u9FAC\u8DB9\u143F\u4071\u42A2\u5A1A\u0000\u0000\u0000\u9868\u676B\u4276\u573D\u0000\u85D6\u497B\u82BF\u710D\u4C81\u6D74\u5D7B\u6B15\u6FBE\u9FAD\u9FAE\u5B96\u9FAF\u66E7\u7E5B\u6E57\u79CA\u3D88\u44C3\u3256\u2796\u439A\u4536\u0000\u5CD5\u3B1A\u8AF9\u5C78\u3D12\u3551\u5D78\u9FB2\u7157\u4558\u40EC\u1E23\u4C77\u3978\u344A\u01A4\u6C41\u8ACC\u4FB4\u0239\u59BF\u816C\u9856\u98FA\u5F3B\u0B9F\u0000\u21C1\u896D\u4102\u46BB\u9079\u3F07\u9FB3\uA1B5\u40F8\u37D6\u46F7\u6C46\u417C\u86B2\u73FF\u456D\u38D4\u549A\u4561\u451B\u4D89\u4C7B\u4D76\u45EA\u3FC8\u4B0F\u3661\u44DE\u44BD\u41ED\u5D3E\u5D48\u5D56\u3DFC\u380F\u5DA4\u5DB9\u3820\u3838\u5E42\u5EBD\u5F25\u5F83\u3908\u3914\u393F\u394D\u60D7\u613D\u5CE5\u3989\u61B7\u61B9\u61CF\u39B8\u622C\u6290\u62E5\u6318\u39F8\u56B1\u3A03\u63E2\u63FB\u6407\u645A\u3A4B\u64C0\u5D15\u5621\u9F9F\u3A97\u6586\u3ABD\u65FF\u6653\u3AF2\u6692\u3B22\u6716\u3B42\u67A4\u6800\u3B58\u684A\u6884\u3B72\u3B71\u3B7B\u6909\u6943\u725C\u6964\u699F\u6985\u3BBC\u69D6\u3BDD\u6A65\u6A74\u6A71\u6A82\u3BEC\u6A99\u3BF2\u6AAB\u6AB5\u6AD4\u6AF6\u6B81\u6BC1\u6BEA\u6C75\u6CAA\u3CCB\u6D02\u6D06\u6D26\u6D81\u3CEF\u6DA4\u6DB1\u6E15\u6E18\u6E29\u6E86\u89C0\u6EBB\u6EE2\u6EDA\u9F7F\u6EE8\u6EE9\u6F24\u6F34\u3D46\u3F41\u6F81\u6FBE\u3D6A\u3D75\u71B7\u5C99\u3D8A\u702C\u3D91\u7050\u7054\u706F\u707F\u7089\u0325\u43C1\u35F1\u0ED8\u3ED7\u57BE\u6ED3\u713E\u57E0\u364E\u69A2\u8BE9\u5B74\u7A49\u58E1\u94D9\u7A65\u7A7D\u59AC\u7ABB\u7AB0\u7AC2\u7AC3\u71D1\u648D\u41CA\u7ADA\u7ADD\u7AEA\u41EF\u54B2\u5C01\u7B0B\u7B55\u7B29\u530E\u5CFE\u7BA2\u7B6F\u839C\u5BB4\u6C7F\u7BD0\u8421\u7B92\u7BB8\u5D20\u3DAD\u5C65\u8492\u7BFA\u7C06\u7C35\u5CC1\u7C44\u7C83\u4882\u7CA6\u667D\u4578\u7CC9\u7CC7\u7CE6\u7C74\u7CF3\u7CF5\u7CCE\u7E67\u451D\u6E44\u7D5D\u6ED6\u748D\u7D89\u7DAB\u7135\u7DB3\u7DD2\u4057\u6029\u7DE4\u3D13\u7DF5\u17F9\u7DE5\u836D\u7E1D\u6121\u615A\u7E6E\u7E92\u432B\u946C\u7E27\u7F40\u7F41\u7F47\u7936\u62D0\u99E1\u7F97\u6351\u7FA3\u1661\u0068\u455C\u3766\u4503\u833A\u7FFA\u6489\u8005\u8008\u801D\u8028\u802F\uA087\u6CC3\u803B\u803C\u8061\u2714\u4989\u6626\u3DE3\u66E8\u6725\u80A7\u8A48\u8107\u811A\u58B0\u26F6\u6C7F\u6498\u4FB8\u64E7\u148A\u8218\u185E\u6A53\u4A65\u4A95\u447A\u8229\u0B0D\u6A52\u3D7E\u4FF9\u14FD\u84E2\u8362\u6B0A\u49A7\u3530\u1773\u3DF8\u82AA\u691B\uF994\u41DB\u854B\u82D0\u831A\u0E16\u17B4\u36C1\u317D\u355A\u827B\u82E2\u8318\u3E8B\u6DA3\u6B05\u6B97\u35CE\u3DBF\u831D\u55EC\u8385\u450B\u6DA5\u83AC\u83C1\u83D3\u347E\u6ED4\u6A57\u855A\u3496\u6E42\u2EEF\u8458\u5BE4\u8471\u3DD3\u44E4\u6AA7\u844A\u3CB5\u7958\u84A8\u6B96\u6E77\u6E43\u84DE\u840F\u8391\u44A0\u8493\u84E4\u5C91\u4240\u5CC0\u4543\u8534\u5AF2\u6E99\u4527\u8573\u4516\u67BF\u8616\u8625\u863B\u85C1\u7088\u8602\u1582\u70CD\uF9B2\u456A\u8628\u3648\u18A2\u53F7\u739A\u867E\u8771\uA0F8\u87EE\u2C27\u87B1\u87DA\u880F\u5661\u866C\u6856\u460F\u8845\u8846\u75E0\u3DB9\u75E4\u885E\u889C\u465B\u88B4\u88B5\u63C1\u88C5\u7777\u770F\u8987\u898A\u89A6\u89A9\u89A7\u89BC\u8A25\u89E7\u7924\u7ABD\u8A9C\u7793\u91FE\u8A90\u7A59\u7AE9\u7B3A\u3F8F\u4713\u7B38\u717C\u8B0C\u8B1F\u5430\u5565\u8B3F\u8B4C\u8B4D\u8AA9\u4A7A\u8B90\u8B9B\u8AAF\u16DF\u4615\u884F\u8C9B\u7D54\u7D8F\uF9D4\u3725\u7D53\u8CD6\u7D98\u7DBD\u8D12\u8D03\u1910\u8CDB\u705C\u8D11\u4CC9\u3ED0\u8D77\u8DA9\u8002\u1014\u498A\u3B7C\u81BC\u710C\u7AE7\u8EAD\u8EB6\u8EC3\u92D4\u8F19\u8F2D\u8365\u8412\u8FA5\u9303\uA29F\u0A50\u8FB3\u492A\u89DE\u853D\u3DBB\u5EF8\u3262\u8FF9\uA014\u86BC\u8501\u2325\u3980\u6ED7\u9037\u853C\u7ABE\u9061\u856C\u860B\u90A8\u8713\u90C4\u86E6\u90AE\u90FD\u9167\u3AF0\u91A9\u91C4\u7CAC\u8933\u1E89\u920E\u6C9F\u9241\u9262\u55B9\u92B9\u8AC6\u3C9B\u8B0C\u55DB\u0D31\u932C\u936B\u8AE1\u8BEB\u708F\u5AC3\u8AE2\u8AE5\u4965\u9244\u8BEC\u8C39\u8BFF\u9373\u945B\u8EBC\u9585\u95A6\u9426\u95A0\u6FF6\u42B9\u267A\u86D8\u127C\u3E2E\u49DF\u6C1C\u967B\u9696\u416C\u96A3\u6ED5\u61DA\u96B6\u78F5\u8AE0\u96BD\u53CC\u49A1\u6CB8\u0274\u6410\u90AF\u90E5\u4AD1\u1915\u330A\u9731\u8642\u9736\u4A0F\u453D\u4585\u4AE9\u7075\u5B41\u971B\u975C\u91D5\u9757\u5B4A\u91EB\u975F\u9425\u50D0\u30B7\u30BC\u9789\u979F\u97B1\u97BE\u97C0\u97D2\u97E0\u546C\u97EE\u741C\u9433\u97FF\u97F5\u941D\u797A\u4AD1\u9834\u9833\u984B\u9866\u3B0E\u7175\u3D51\u0630\u415C\u5706\u98CA\u98B7\u98C8\u98C7\u4AFF\u6D27\u16D3\u55B0\u98E1\u98E6\u98EC\u9378\u9939\u4A29\u4B72\u9857\u9905\u99F5\u9A0C\u9A3B\u9A10\u9A58\u5725\u36C4\u90B1\u9BD5\u9AE0\u9AE2\u9B05\u9AF4\u4C0E\u9B14\u9B2D\u8600\u5034\u9B34\u69A8\u38C3\u307D\u9B50\u9B40\u9D3E\u5A45\u1863\u9B8E\u424B\u9C02\u9BFF\u9C0C\u9E68\u9DD4\u9FB7\uA192\uA1AB\uA0E1\uA123\uA1DF\u9D7E\u9D83\uA134\u9E0E\u6888\u9DC4\u215B\uA193\uA220\u193B\uA233\u9D39\uA0B9\uA2B4\u9E90\u9E95\u9E9E\u9EA2\u4D34\u9EAA\u9EAF\u4364\u9EC1\u3B60\u39E5\u3D1D\u4F32\u37BE\u8C2B\u9F02\u9F08\u4B96\u9424\u6DA2\u9F17\u9F16\u9F39\u569F\u568A\u9F45\u99B8\u908B\u97F2\u847F\u9F62\u9F69\u7ADC\u9F8E\u7216\u4BBE\u4975\u49BB\u7177\u49F8\u4348\u4A51\u739E\u8BDA\u18FA\u799F\u897E\u8E36\u9369\u93F3\u8A44\u92EC\u9381\u93CB\u896C\u44B9\u7217\u3EEB\u7772\u7A43\u70D0\u4473\u43F8\u717E\u17EF\u70A3\u18BE\u3599\u3EC7\u1885\u542F\u17F8\u3722\u16FB\u1839\u36E1\u1774\u18D1\u5F4B\u3723\u16C0\u575B\u4A25\u13FE\u12A8\u13C6\u14B6\u8503\u36A6\u8503\u8455\u4994\u7165\u3E31\u555C\u3EFB\u7052\u44F4\u36EE\u999D\u6F26\u67F9\u3733\u3C15\u3DE7\u586C\u1922\u6810\u4057\u373F\u40E1\u408B\u410F\u6C21\u54CB\u569E\u66B1\u5692\u0FDF\u0BA8\u0E0D\u93C6\u8B13\u939C\u4EF8\u512B\u3819\u4436\u4EBC\u0465\u037F\u4F4B\u4F8A\u5651\u5A68\u01AB\u03CB\u3999\u030A\u0414\u3435\u4F29\u02C0\u8EB3\u0275\u8ADA\u020C\u4E98\u50CD\u510D\u4FA2\u4F03\u4A0E\u3E8A\u4F42\u502E\u506C\u5081\u4FCC\u4FE5\u5058\u50FC\u5159\u515B\u515D\u515E\u6E76\u3595\u3E39\u3EBF\u6D72\u1884\u3E89\u51A8\u51C3\u05E0\u44DD\u04A3\u0492\u0491\u8D7A\u8A9C\u070E\u5259\u52A4\u0873\u52E1\u936E\u467A\u718C\u438C\u0C20\u49AC\u10E4\u69D1\u0E1D\u7479\u3EDE\u7499\u7414\u7456\u7398\u4B8E\u4ABC\u408D\u53D0\u3584\u720F\u40C9\u55B4\u0345\u54CD\u0BC6\u571D\u925D\u96F4\u9366\u57DD\u578D\u577F\u363E\u58CB\u5A99\u8A46\u16FA\u176F\u1710\u5A2C\u59B8\u928F\u5A7E\u5ACF\u5A12\u5946\u19F3\u1861\u4295\u36F5\u6D05\u7443\u5A21\u5E83\u5A81\u8BD7\u0413\u93E0\u748C\u1303\u7105\u4972\u9408\u89FB\u93BD\u37A0\u5C1E\u5C9E\u5E5E\u5E48\u1996\u197C\u3AEE\u5ECD\u5B4F\u1903\u1904\u3701\u18A0\u36DD\u16FE\u36D3\u812A\u8A47\u1DBA\u3472\u89A8\u5F0C\u5F0E\u1927\u17AB\u5A6B\u173B\u5B44\u8614\u75FD\u8860\u607E\u2860\u262B\u5FDB\u3EB8\u25AF\u25BE\u9088\u6F73\u61C0\u003E\u0046\u261B\u6199\u6198\u6075\u2C9B\u2D07\u46D4\u914D\u6471\u4665\u2B6A\u3A29\u2B22\u3450\u98EA\u2E78\u6337\uA45B\u64B6\u6331\u63D1\u49E3\u2D67\u62A4\u2CA1\u643B\u656B\u6972\u3BF4\u308E\u32AD\u4989\u32AB\u550D\u32E0\u18D9\u943F\u66CE\u3289\u31B3\u3AE0\u4190\u5584\u8B22\u558F\u16FC\u555B\u5425\u78EE\u3103\u182A\u3234\u3464\u320F\u3182\u42C9\u668E\u6D24\u666B\u4B93\u6630\u7870\u1DEB\u6663\u32D2\u32E1\u661E\u5872\u38D1\u383A\u37BC\u3B99\u37A2\u33FE\u74D0\u3B96\u678F\u462A\u68B6\u681E\u3BC4\u6ABE\u3863\u37D5\u4487\u6A33\u6A52\u6AC9\u6B05\u1912\u6511\u6898\u6A4C\u3BD7\u6A7A\u6B57\u3FC0\u3C9A\u93A0\u92F2\u8BEA\u8ACB\u9289\u801E\u89DC\u9467\u6DA5\u6F0B\u49EC\u6D67\u3F7F\u3D8F\u6E04\u403C\u5A3D\u6E0A\u5847\u6D24\u7842\u713B\u431A\u4276\u70F1\u7250\u7287\u7294\u478F\u4725\u5179\u4AA4\u05EB\u747A\u3EF8\u365F\u4A4A\u4917\u5FE1\u3F06\u3EB1\u4ADF\u8C23\u3F35\u60A7\u3EF3\u74CC\u743C\u9387\u7437\u449F\u6DEA\u4551\u7583\u3F63\u4CD9\u4D06\u3F58\u7555\u7673\uA5C6\u3B19\u7468\u8ACC\u49AB\u498E\u3AFB\u3DCD\u4A4E\u3EFF\u49C5\u48F3\u91FA\u5732\u9342\u8AE3\u1864\u50DF\u5221\u51E7\u7778\u3232\u770E\u770F\u777B\u4697\u3781\u3A5E\u48F0\u7438\u749B\u3EBF\u4ABA\u4AC7\u40C8\u4A96\u61AE\u9307\u5581\u781E\u788D\u7888\u78D2\u73D0\u7959\u7741\u56E3\u410E\u799B\u8496\u79A5\u6A2D\u3EFA\u7A3A\u79F4\u416E\u16E6\u4132\u9235\u79F1\u0D4C\u498C\u0299\u3DBA\u176E\u3597\u556B\u3570\u36AA\u01D4\u0C0D\u7AE2\u5A59\u26F5\u5AAF\u5A9C\u5A0D\u025B\u78F0\u5A2A\u5BC6\u7AFE\u41F9\u7C5D\u7C6D\u4211\u5BB3\u5EBC\u5EA6\u7CCD\u49F9\u17B0\u7C8E\u7C7C\u7CAE\u6AB2\u7DDC\u7E07\u7DD3\u7F4E\u6261\u615C\u7B48\u7D97\u5E82\u426A\u6B75\u0916\u67D6\u004E\u35CF\u57C4\u6412\u63F8\u4962\u7FDD\u7B27\u082C\u5AE9\u5D43\u7B0C\u5E0E\u99E6\u8645\u9A63\u6A1C\u343F\u39E2\u49F7\u65AD\u9A1F\u65A0\u8480\u7127\u6CD1\u44EA\u8137\u4402\u80C6\u8109\u8142\u67B4\u98C3\u6A42\u8262\u8265\u6A51\u8453\u6DA7\u8610\u721B\u5A86\u417F\u1840\u5B2B\u18A1\u5AE4\u18D8\u86A0\uF9BC\u3D8F\u882D\u7422\u5A02\u886E\u4F45\u8887\u88BF\u88E6\u8965\u894D\u5683\u8954\u7785\u7784\u8BF5\u8BD9\u8B9C\u89F9\u3EAD\u84A3\u46F5\u46CF\u37F2\u8A3D\u8A1C\u9448\u5F4D\u922B\u4284\u65D4\u7129\u70C4\u1845\u9D6D\u8C9F\u8CE9\u7DDC\u599A\u77C3\u59F0\u436E\u36D4\u8E2A\u8EA7\u4C09\u8F30\u8F4A\u42F4\u6C58\u6FBB\u2321\u489B\u6F79\u6E8B\u17DA\u9BE9\u36B5\u492F\u90BB\u9097\u5571\u4906\u91BB\u9404\u8A4B\u4062\u8AFC\u9427\u8C1D\u8C3B\u84E5\u8A2B\u9599\u95A7\u9597\u9596\u8D34\u7445\u3EC2\u48FF\u4A42\u43EA\u3EE7\u3225\u968F\u8EE7\u8E66\u8E65\u3ECC\u49ED\u4A78\u3FEE\u7412\u746B\u3EFC\u9741\u90B0\u6847\u4A1D\u9093\u57DF\u975D\u9368\u8989\u8C26\u8B2F\u63BE\u92BA\u5B11\u8B69\u493C\u73F9\u421B\u979B\u9771\u9938\u0F26\u5DC1\u8BC5\u4AB2\u981F\u94DA\u92F6\u95D7\u91E5\u44C0\u8B50\u4A67\u8B64\u98DC\u8A45\u3F00\u922A\u4925\u8414\u993B\u994D\u7B06\u3DFD\u999B\u4B6F\u99AA\u9A5C\u8B65\u58C8\u6A8F\u9A21\u5AFE\u9A2F\u98F1\u4B90\u9948\u99BC\u4BBD\u4B97\u937D\u5872\u1302\u5822\u49B8\u14E8\u7844\u271F\u3DB8\u68C5\u3D7D\u9458\u3927\u6150\u2781\u296B\u6107\u9C4F\u9C53\u9C7B\u9C35\u9C10\u9B7F\u9BCF\u9E2D\u9B9F\uA1F5\uA0FE\u9D21\u4CAE\u4104\u9E18\u4CB0\u9D0C\uA1B4\uA0ED\uA0F3\u992F\u9DA5\u84BD\u6E12\u6FDF\u6B82\u85FC\u4533\u6DA4\u6E84\u6DF0\u8420\u85EE\u6E00\u37D7\u6064\u79E2\u359C\u3640\u492D\u49DE\u3D62\u93DB\u92BE\u9348\u02BF\u78B9\u9277\u944D\u4FE4\u3440\u9064\u555D\u783D\u7854\u78B6\u784B\u1757\u31C9\u4941\u369A\u4F72\u6FDA\u6FD9\u701E\u701E\u5414\u41B5\u57BB\u58F3\u578A\u9D16\u57D7\u7134\u34AF\u41AC\u71EB\u6C40\u4F97\u5B28\u17B5\u8A49\u610C\u5ACE\u5A0B\u42BC\u4488\u372C\u4B7B\u89FC\u93BB\u93B8\u18D6\u0F1D\u8472\u6CC0\u1413\u42FA\u2C26\u43C1\u5994\u3DB7\u6741\u7DA8\u615B\u60A4\u49B9\u498B\u89FA\u92E5\u73E2\u3EE9\u74B4\u8B63\u189F\u3EE1\u4AB3\u6AD8\u73F3\u73FB\u3ED6\u4A3E\u4A94\u17D9\u4A66\u03A7\u1424\u49E5\u7448\u4916\u70A5\u4976\u9284\u73E6\u935F\u04FE\u9331\u8ACE\u8A16\u9386\u8BE7\u55D5\u4935\u8A82\u716B\u4943\u0CFF\u56A4\u061A\u0BEB\u0CB8\u5502\u79C4\u17FA\u7DFE\u16C2\u4A50\u1852\u452E\u9401\u370A\u8AC0\u49AD\u59B0\u18BF\u1883\u7484\u5AA1\u36E2\u3D5B\u36B0\u925F\u5A79\u8A81\u1862\u9374\u3CCD\u0AB4\u4A96\u398A\u50F4\u3D69\u3D4C\u139C\u7175\u42FB\u8218\u6E0F\u90E4\u44EB\u6D57\u7E4F\u7067\u6CAF\u3CD6\u3FED\u3E2D\u6E02\u6F0C\u3D6F\u03F5\u7551\u36BC\u34C8\u4680\u3EDA\u4871\u59C4\u926E\u493E\u8F41\u8C1C\u6BC0\u5812\u57C8\u36D6\u1452\u70FE\u4362\u4A71\u2FE3\u12B0\u23BD\u68B9\u6967\u1398\u34E5\u7BF4\u36DF\u8A83\u37D6\u33FA\u4C9F\u6A1A\u36AD\u6CB7\u843E\u44DF\u44CE\u6D26\u6D51\u6C82\u6FDE\u6F17\u7109\u833D\u173A\u83ED\u6C80\u7053\u17DB\u5989\u5A82\u17B3\u5A61\u5A71\u1905\u41FC\u372D\u59EF\u173C\u36C7\u718E\u9390\u669A\u42A5\u5A6E\u5A2B\u4293\u6A2B\u3EF9\u7736\u445B\u42CA\u711D\u4259\u89E1\u4FB0\u6D28\u5CC2\u44CE\u7E4D\u43BD\u6A0C\u4256\u1304\u70A6\u7133\u43E9\u3DA5\u6CDF\uF825\u4A4F\u7E65\u59EB\u5D2F\u3DF3\u5F5C\u4A5D\u17DF\u7DA4\u8426\u5485\u3AFA\u3300\u0214\u577E\u08D5\u0619\u3FE5\u1F9E\uA2B6\u7003\u915B\u5D70\u738F\u7CD3\u8A59\u9420\u4FC8\u7FE7\u72CD\u7310\u7AF4\u7338\u7339\u56F6\u7341\u7348\u3EA9\u7B18\u906C\u71F5\u48F2\u73E1\u81F6\u3ECA\u770C\u3ED1\u6CA2\u56FD\u7419\u741E\u741F\u3EE2\u3EF0\u3EF4\u3EFA\u74D3\u3F0E\u3F53\u7542\u756D\u7572\u758D\u3F7C\u75C8\u75DC\u3FC0\u764D\u3FD7\u7674\u3FDC\u767A\u4F5C\u7188\u5623\u8980\u5869\u401D\u7743\u4039\u6761\u4045\u35DB\u7798\u406A\u406F\u5C5E\u77BE\u77CB\u58F2\u7818\u70B9\u781C\u40A8\u7839\u7847\u7851\u7866\u8448\u5535\u7933\u6803\u7932\u4103\u4109\u7991\u7999\u8FBB\u7A06\u8FBC\u4167\u7A91\u41B2\u7ABC\u8279\u41C4\u7ACF\u7ADB\u41CF\u4E21\u7B62\u7B6C\u7B7B\u7C12\u7C1B\u4260\u427A\u7C7B\u7C9C\u428C\u7CB8\u4294\u7CED\u8F93\u70C0\u0CCF\u7DCF\u7DD4\u7DD0\u7DFD\u7FAE\u7FB4\u729F\u4397\u8020\u8025\u7B39\u802E\u8031\u8054\u3DCC\u57B4\u70A0\u80B7\u80E9\u43ED\u810C\u732A\u810E\u8112\u7560\u8114\u4401\u3B39\u8156\u8159\u815A\u4413\u583A\u817C\u8184\u4425\u8193\u442D\u81A5\u57EF\u81C1\u81E4\u8254\u448F\u82A6\u8276\u82CA\u82D8\u82FF\u44B0\u8357\u9669\u698A\u8405\u70F5\u8464\u60E3\u8488\u4504\u84BE\u84E1\u84F8\u8510\u8538\u8552\u453B\u856F\u8570\u85E0\u4577\u8672\u8692\u86B2\u86EF\u9645\u878B\u4606\u4617\u88AE\u88FF\u8924\u8947\u8991\u7967\u8A29\u8A38\u8A94\u8AB4\u8C51\u8CD4\u8CF2\u8D1C\u4798\u585F\u8DC3\u47ED\u4EEE\u8E3A\u55D8\u5754\u8E71\u55F5\u8EB0\u4837\u8ECE\u8EE2\u8EE4\u8EED\u8EF2\u8FB7\u8FC1\u8FCA\u8FCC\u9033\u99C4\u48AD\u98E0\u9213\u491E\u9228\u9258\u926B\u92B1\u92AE\u92BF\u92E3\u92EB\u92F3\u92F4\u92FD\u9343\u9384\u93AD\u4945\u4951\u9EBF\u9417\u5301\u941D\u942D\u943E\u496A\u9454\u9479\u952D\u95A2\u49A7\u95F4\u9633\u49E5\u67A0\u4A24\u9740\u4A35\u97B2\u97C2\u5654\u4AE4\u60E8\u98B9\u4B19\u98F1\u5844\u990E\u9919\u51B4\u991C\u9937\u9942\u995D\u9962\u4B70\u99C5\u4B9D\u9A3C\u9B0F\u7A83\u9B69\u9B81\u9BDD\u9BF1\u9BF4\u4C6D\u9C20\u376F\u1BC2\u9D49\u9C3A\u9EFE\u5650\u9D93\u9DBD\u9DC0\u9DFC\u94F6\u8FB6\u9E7B\u9EAC\u9EB1\u9EBD\u9EC6\u94DC\u9EE2\u9EF1\u9EF8\u7AC8\u9F44\u0094\u02B7\u03A0\u691A\u94C3\u59AC\u04D7\u5840\u94C1\u37B9\u05D5\u0615\u0676\u16BA\u5757\u7173\u0AC2\u0ACD\u0BBF\u546A\uF83B\u0BCB\u549E\u0BFB\u0C3B\u0C53\u0C65\u0C7C\u60E7\u0C8D\u567A\u0CB5\u0CDD\u0CED\u0D6F\u0DB2\u0DC8\u6955\u9C2F\u87A5\u0E04\u0E0E\u0ED7\u0F90\u0F2D\u0E73\u5C20\u0FBC\u5E0B\u105C\u104F\u1076\u671E\u107B\u1088\u1096\u3647\u10BF\u10D3\u112F\u113B\u5364\u84AD\u12E3\u1375\u1336\u8B81\u1577\u1619\u17C3\u17C7\u4E78\u70BB\u182D\u196A\u1A2D\u1A45\u1C2A\u1C70\u1CAC\u1EC8\u62C3\u1ED5\u1F15\u7198\u6855\u2045\u69E9\u36C8\u227C\u23D7\u23FA\u272A\u2871\u294F\u82FD\u2967\u2993\u2AD5\u89A5\u2AE8\u8FA0\u2B0E\u97B8\u2B3F\u9847\u9ABD\u2C4C\u0000\u2C88\u2CB7\u5BE8\u2D08\u2D12\u2DB7\u2D95\u2E42\u2F74\u2FCC\u3033\u3066\u331F\u33DE\u5FB1\u6648\u66BF\u7A79\u3567\u35F3\u7201\u49BA\u77D7\u361A\u3716\u7E87\u0346\u58B5\u670E\u6918\u3AA7\u7657\u5FE2\u3E11\u3EB9\u75FE\u209A\u48D0\u4AB8\u4119\u8A9A\u42EE\u430D\u403B\u4334\u4396\u4A45\u05CA\u51D2\u0611\u599F\u1EA8\u3BBE\u3CFF\u4404\u44D6\u5788\u4674\u399B\u472F\u85E8\u99C9\u3762\u21C3\u8B5E\u8B4E\u99D6\u4812\u48FB\u4A15\u7209\u4AC0\u0C78\u5965\u4EA5\u4F86\u0779\u8EDA\u502C\u528F\u573F\u7171\u5299\u5419\u3F4A\u4AA7\u55BC\u5446\u546E\u6B52\u91D4\u3473\u553F\u7632\u555E\u4718\u5562\u5566\u57C7\u493F\u585D\u5066\u34FB\u33CC\u60DE\u5903\u477C\u8948\u5AAE\u5B89\u5C06\u1D90\u57A1\u7151\u6FB6\u6102\u7C12\u9056\u61B2\u4F9A\u8B62\u6402\u644A\u5D5B\u6BF7\u8F36\u6484\u191C\u8AEA\u49F6\u6488\u3FEF\u6512\u4BC0\u65BF\u66B5\u271B\u9465\u57E1\u6195\u5A27\uF8CD\u4FBB\u56B9\u4521\u66FC\u4E6A\u4934\u9656\u6D8F\u6CBD\u3618\u8977\u6799\u686E\u6411\u685E\u71DF\u68C7\u7B42\u90C0\u0A11\u6926\u9104\u6939\u7A45\u9DF0\u69FA\u9A26\u6A2D\u365F\u6469\u0021\u7983\u6A34\u6B5B\u5D2C\u3519\u83CF\u6B9D\u46D0\u6CA4\u753B\u8865\u6DAE\u58B6\u371C\u258D\u704B\u71CD\u3C54\u7280\u7285\u9281\u217A\u728B\u9330\u72E6\u49D0\u6C39\u949F\u7450\u0EF8\u8827\u88F5\u2926\u8473\u17B1\u6EB8\u4A2A\u1820\u39A4\u36B9\u5C10\u79E3\u453F\u66B6\u9CAD\u98A4\u8943\u77CC\u7858\u56D6\u40DF\u160A\u39A1\u372F\u80E8\u13C5\u71AD\u8366\u79DD\u91A8\u5A67\u4CB7\u70AF\u89AB\u79FD\u7A0A\u7B0B\u7D66\u417A\u7B43\u797E\u8009\u6FB5\uA2DF\u6A03\u8318\u53A2\u6E07\u93BF\u6836\u975D\u816F\u8023\u69B5\u13ED\u322F\u8048\u5D85\u8C30\u8083\u5715\u9823\u8949\u5DAB\u4988\u65BE\u69D5\u53D2\u4AA5\u3F81\u3C11\u6736\u8090\u80F4\u812E\u1FA1\u814F\u8189\u81AF\u821A\u8306\u832F\u838A\u35CA\u8468\u86AA\u48FA\u63E6\u8956\u7808\u9255\u89B8\u43F2\u89E7\u43DF\u89E8\u8B46\u8BD4\u59F8\u8C09\u8F0B\u8FC5\u90EC\u7B51\u9110\u913C\u3DF7\u915E\u4ACA\u8FD0\u728F\u568B\u94E7\u95E9\u95B0\u95B8\u9732\u98D1\u9949\u996A\u99C3\u9A28\u9B0E\u9D5A\u9D9B\u7E9F\u9EF8\u9F23\u4CA4\u9547\uA293\u71A2\uA2FF\u4D91\u9012\uA5CB\u4D9C\u0C9C\u8FBE\u55C1\u8FBA\u24B0\u8FB9\u4A93\u4509\u7E7F\u6F56\u6AB1\u4EEA\u34E4\u8B2C\u789D\u373A\u8E80\u17F5\u8024\u8B6C\u8B99\u7A3E\u66AF\u3DEB\u7655\u3CB7\u5635\u5956\u4E9A\u5E81\u6258\u56BF\u0E6D\u8E0E\u5B6D\u3E88\u4C9E\u63DE\u62D0\u17F6\u187B\u6530\u562D\u5C4A\u541A\u5311\u3DC6\u9D98\u4C7D\u5622\u561E\u7F49\u5ED8\u5975\u3D40\u8770\u4E1C\u0FEA\u0D49\u36BA\u8117\u9D5E\u8D18\u763B\u9C45\u764E\u77B9\u9345\u5432\u8148\u82F7\u5625\u8132\u8418\u80BD\u55EA\u7962\u5643\u5416\u0E9D\u35CE\u5605\u55F1\u66F1\u82E2\u362D\u7534\u55F0\u55BA\u5497\u5572\u0C41\u0C96\u5ED0\u5148\u0E76\u2C62\u0EA2\u9EAB\u7D5A\u55DE\u1075\u629D\u976D\u5494\u8CCD\u71F6\u9176\u63FC\u63B9\u63FE\u5569\u2B43\u9C72\u2EB3\u519A\u34DF\u0DA7\u51A7\u544D\u551E\u5513\u7666\u8E2D\u688A\u75B1\u80B6\u8804\u8786\u88C7\u81B6\u841C\u10C1\u44EC\u7304\u4706\u5B90\u830B\u6893\u567B\u26F4\u7D2F\u41A3\u7D73\u6ED0\u72B6\u9170\u11D9\u9208\u3CFC\uA6A9\u0EAC\u0EF9\u7266\u1CA2\u474E\u4FC2\u7FF9\u0FEB\u40FA\u9C5D\u651F\u2DA0\u48F3\u47E0\u9D7C\u0FEC\u0E0A\u6062\u75A3\u0FED\u0000\u6048\u1187\u71A3\u7E8E\u9D50\u4E1A\u4E04\u3577\u5B0D\u6CB2\u5367\u36AC\u39DC\u537D\u36A5\u4618\u589A\u4B6E\u822D\u544B\u57AA\u5A95\u0979\u0000\u3A52\u2465\u7374\u9EAC\u4D09\u9BED\u3CFE\u9F30\u4C5B\u4FA9\u959E\u9FDE\u845C\u3DB6\u72B2\u67B3\u3720\u632E\u7D25\u3EF7\u3E2C\u3A2A\u9008\u52CC\u3E74\u367A\u45E9\u048E\u7640\u5AF0\u0EB6\u787A\u7F2E\u58A7\u40BF\u567C\u9B8B\u5D74\u7654\uA434\u9E85\u4CE1\u75F9\u37FB\u6119\u30DA\u43F2\u0000\u565D\u12A9\u57A7\u4963\u9E06\u5234\u70AE\u35AD\u6C4A\u9D7C\u7C56\u9B39\u57DE\u176C\u5C53\u64D3\u94D0\u6335\u7164\u86AD\u0D28\u6D22\u4AE2\u0D71\u0000\u51FE\u1F0F\u5D8E\u9703\u1DD1\u9E81\u904C\u7B1F\u9B02\u5CD1\u7BA3\u6268\u6335\u9AFF\u7BCF\u9B2A\u7C7E\u9B2E\u7C42\u7C86\u9C15\u7BFC\u9B09\u9F17\u9C1B\u493E\u9F5A\u5573\u5BC3\u4FFD\u9E98\u4FF2\u5260\u3E06\u52D1\u5767\u5056\u59B7\u5E12\u97C8\u9DAB\u8F5C\u5469\u97B4\u9940\u97BA\u532C\u6130\u692C\u53DA\u9C0A\u9D02\u4C3B\u9641\u6980\u50A6\u7546\u176D\u99DA\u5273\u0000\u9159\u9681\u915C\u0000\u9151\u8E97\u637F\u6D23\u6ACA\u5611\u918E\u757A\u6285\u03FC\u734F\u7C70\u5C21\u3CFD\u0000\u4919\u76D6\u9B9D\u4E2A\u0CD4\u83BE\u8842\u0000\u5C4A\u69C0\u50ED\u577A\u521F\u5DF5\u4ECE\u6C31\u01F2\u4F39\u549C\u54DA\u529A\u8D82\u35FE\u5F0C\u35F3\u0000\u6B52\u917C\u9FA5\u9B97\u982E\u98B4\u9ABA\u9EA8\u9E84\u717A\u7B14\u0000\u6BFA\u8818\u7F78\u0000\u5620\uA64A\u8E77\u9F53\u0000\u8DD4\u8E4F\u9E1C\u8E01\u6282\u837D\u8E28\u8E75\u7AD3\u4A77\u7A3E\u78D8\u6CEA\u8A67\u7607\u8A5A\u9F26\u6CCE\u87D6\u75C3\uA2B2\u7853\uF840\u8D0C\u72E2\u7371\u8B2D\u7302\u74F1\u8CEB\u4ABB\u862F\u5FBA\u88A0\u44B7\u0000\u183B\u6E05\u0000\u8A7E\u251B\u0000\u60FD\u7667\u9AD7\u9D44\u936E\u9B8F\u87F5\u0000\u880F\u8CF7\u732C\u9721\u9BB0\u35D6\u72B2\u4C07\u7C51\u994A\u6159\u6159\u4C04\u9E96\u617D\u0000\u575F\u616F\u62A6\u6239\u62CE\u3A5C\u61E2\u53AA\u33F5\u6364\u6802\u35D2\u5D57\u8BC2\u8FDA\u8E39\u0000\u50D9\u1D46\u7906\u5332\u9638\u0F3B\u4065\u0000\u77FE\u0000\u7CC2\u5F1A\u7CDA\u7A2D\u8066\u8063\u7D4D\u7505\u74F2\u8994\u821A\u670C\u8062\u7486\u805B\u74F0\u8103\u7724\u8989\u67CC\u7553\u6ED1\u87A9\u87CE\u81C8\u878C\u8A49\u8CAD\u8B43\u772B\u74F8\u84DA\u3635\u69B2\u8DA6\u0000\u89A9\u7468\u6DB9\u87C1\u4011\u74E7\u3DDB\u7176\u60A4\u619C\u3CD1\u7162\u6077\u0000\u7F71\u8B2D\u7250\u60E9\u4B7E\u5220\u3C18\u3CC7\u5ED7\u7656\u5531\u1944\u12FE\u9903\u6DDC\u70AD\u5CC1\u61AD\u8A0F\u3677\u00EE\u6846\u4F0E\u4562\u5B1F\u634C\u9F50\u9EA6\u626B\u3000\uFF0C\u3001\u3002\uFF0E\u2027\uFF1B\uFF1A\uFF1F\uFF01\uFE30\u2026\u2025\uFE50\uFE51\uFE52\u00B7\uFE54\uFE55\uFE56\uFE57\uFF5C\u2013\uFE31\u2014\uFE33\u2574\uFE34\uFE4F\uFF08\uFF09\uFE35\uFE36\uFF5B\uFF5D\uFE37\uFE38\u3014\u3015\uFE39\uFE3A\u3010\u3011\uFE3B\uFE3C\u300A\u300B\uFE3D\uFE3E\u3008\u3009\uFE3F\uFE40\u300C\u300D\uFE41\uFE42\u300E\u300F\uFE43\uFE44\uFE59\uFE5A\uFE5B\uFE5C\uFE5D\uFE5E\u2018\u2019\u201C\u201D\u301D\u301E\u2035\u2032\uFF03\uFF06\uFF0A\u203B\u00A7\u3003\u25CB\u25CF\u25B3\u25B2\u25CE\u2606\u2605\u25C7\u25C6\u25A1\u25A0\u25BD\u25BC\u32A3\u2105\u00AF\uFFE3\uFF3F\u02CD\uFE49\uFE4A\uFE4D\uFE4E\uFE4B\uFE4C\uFE5F\uFE60\uFE61\uFF0B\uFF0D\u00D7\u00F7\u00B1\u221A\uFF1C\uFF1E\uFF1D\u2266\u2267\u2260\u221E\u2252\u2261\uFE62\uFE63\uFE64\uFE65\uFE66\uFF5E\u2229\u222A\u22A5\u2220\u221F\u22BF\u33D2\u33D1\u222B\u222E\u2235\u2234\u2640\u2642\u2295\u2299\u2191\u2193\u2190\u2192\u2196\u2197\u2199\u2198\u2225\u2223\uFF0F\uFF3C\u2215\uFE68\uFF04\uFFE5\u3012\uFFE0\uFFE1\uFF05\uFF20\u2103\u2109\uFE69\uFE6A\uFE6B\u33D5\u339C\u339D\u339E\u33CE\u33A1\u338E\u338F\u33C4\u00B0\u5159\u515B\u515E\u515D\u5161\u5163\u55E7\u74E9\u7CCE\u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588\u258F\u258E\u258D\u258C\u258B\u258A\u2589\u253C\u2534\u252C\u2524\u251C\u2594\u2500\u2502\u2595\u250C\u2510\u2514\u2518\u256D\u256E\u2570\u256F\u2550\u255E\u256A\u2561\u25E2\u25E3\u25E5\u25E4\u2571\u2572\u2573\uFF10\uFF11\uFF12\uFF13\uFF14\uFF15\uFF16\uFF17\uFF18\uFF19\u2160\u2161\u2162\u2163\u2164\u2165\u2166\u2167\u2168\u2169\u3021\u3022\u3023\u3024\u3025\u3026\u3027\u3028\u3029\u5341\u5344\u5345\uFF21\uFF22\uFF23\uFF24\uFF25\uFF26\uFF27\uFF28\uFF29\uFF2A\uFF2B\uFF2C\uFF2D\uFF2E\uFF2F\uFF30\uFF31\uFF32\uFF33\uFF34\uFF35\uFF36\uFF37\uFF38\uFF39\uFF3A\uFF41\uFF42\uFF43\uFF44\uFF45\uFF46\uFF47\uFF48\uFF49\uFF4A\uFF4B\uFF4C\uFF4D\uFF4E\uFF4F\uFF50\uFF51\uFF52\uFF53\uFF54\uFF55\uFF56\uFF57\uFF58\uFF59\uFF5A\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF\u03C0\u03C1\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u3105\u3106\u3107\u3108\u3109\u310A\u310B\u310C\u310D\u310E\u310F\u3110\u3111\u3112\u3113\u3114\u3115\u3116\u3117\u3118\u3119\u311A\u311B\u311C\u311D\u311E\u311F\u3120\u3121\u3122\u3123\u3124\u3125\u3126\u3127\u3128\u3129\u02D9\u02C9\u02CA\u02C7\u02CB\u2400\u2401\u2402\u2403\u2404\u2405\u2406\u2407\u2408\u2409\u240A\u240B\u240C\u240D\u240E\u240F\u2410\u2411\u2412\u2413\u2414\u2415\u2416\u2417\u2418\u2419\u241A\u241B\u241C\u241D\u241E\u241F\u2421\u20AC";
+
+ private static final String TABLE3 = "\u4E00\u4E59\u4E01\u4E03\u4E43\u4E5D\u4E86\u4E8C\u4EBA\u513F\u5165\u516B\u51E0\u5200\u5201\u529B\u5315\u5341\u535C\u53C8\u4E09\u4E0B\u4E08\u4E0A\u4E2B\u4E38\u51E1\u4E45\u4E48\u4E5F\u4E5E\u4E8E\u4EA1\u5140\u5203\u52FA\u5343\u53C9\u53E3\u571F\u58EB\u5915\u5927\u5973\u5B50\u5B51\u5B53\u5BF8\u5C0F\u5C22\u5C38\u5C71\u5DDD\u5DE5\u5DF1\u5DF2\u5DF3\u5DFE\u5E72\u5EFE\u5F0B\u5F13\u624D\u4E11\u4E10\u4E0D\u4E2D\u4E30\u4E39\u4E4B\u5C39\u4E88\u4E91\u4E95\u4E92\u4E94\u4EA2\u4EC1\u4EC0\u4EC3\u4EC6\u4EC7\u4ECD\u4ECA\u4ECB\u4EC4\u5143\u5141\u5167\u516D\u516E\u516C\u5197\u51F6\u5206\u5207\u5208\u52FB\u52FE\u52FF\u5316\u5339\u5348\u5347\u5345\u535E\u5384\u53CB\u53CA\u53CD\u58EC\u5929\u592B\u592A\u592D\u5B54\u5C11\u5C24\u5C3A\u5C6F\u5DF4\u5E7B\u5EFF\u5F14\u5F15\u5FC3\u6208\u6236\u624B\u624E\u652F\u6587\u6597\u65A4\u65B9\u65E5\u66F0\u6708\u6728\u6B20\u6B62\u6B79\u6BCB\u6BD4\u6BDB\u6C0F\u6C34\u706B\u722A\u7236\u723B\u7247\u7259\u725B\u72AC\u738B\u4E19\u4E16\u4E15\u4E14\u4E18\u4E3B\u4E4D\u4E4F\u4E4E\u4EE5\u4ED8\u4ED4\u4ED5\u4ED6\u4ED7\u4EE3\u4EE4\u4ED9\u4EDE\u5145\u5144\u5189\u518A\u51AC\u51F9\u51FA\u51F8\u520A\u52A0\u529F\u5305\u5306\u5317\u531D\u4EDF\u534A\u5349\u5361\u5360\u536F\u536E\u53BB\u53EF\u53E4\u53F3\u53EC\u53EE\u53E9\u53E8\u53FC\u53F8\u53F5\u53EB\u53E6\u53EA\u53F2\u53F1\u53F0\u53E5\u53ED\u53FB\u56DB\u56DA\u5916\u592E\u5931\u5974\u5976\u5B55\u5B83\u5C3C\u5DE8\u5DE7\u5DE6\u5E02\u5E03\u5E73\u5E7C\u5F01\u5F18\u5F17\u5FC5\u620A\u6253\u6254\u6252\u6251\u65A5\u65E6\u672E\u672C\u672A\u672B\u672D\u6B63\u6BCD\u6C11\u6C10\u6C38\u6C41\u6C40\u6C3E\u72AF\u7384\u7389\u74DC\u74E6\u7518\u751F\u7528\u7529\u7530\u7531\u7532\u7533\u758B\u767D\u76AE\u76BF\u76EE\u77DB\u77E2\u77F3\u793A\u79BE\u7A74\u7ACB\u4E1E\u4E1F\u4E52\u4E53\u4E69\u4E99\u4EA4\u4EA6\u4EA5\u4EFF\u4F09\u4F19\u4F0A\u4F15\u4F0D\u4F10\u4F11\u4F0F\u4EF2\u4EF6\u4EFB\u4EF0\u4EF3\u4EFD\u4F01\u4F0B\u5149\u5147\u5146\u5148\u5168\u5171\u518D\u51B0\u5217\u5211\u5212\u520E\u5216\u52A3\u5308\u5321\u5320\u5370\u5371\u5409\u540F\u540C\u540A\u5410\u5401\u540B\u5404\u5411\u540D\u5408\u5403\u540E\u5406\u5412\u56E0\u56DE\u56DD\u5733\u5730\u5728\u572D\u572C\u572F\u5729\u5919\u591A\u5937\u5938\u5984\u5978\u5983\u597D\u5979\u5982\u5981\u5B57\u5B58\u5B87\u5B88\u5B85\u5B89\u5BFA\u5C16\u5C79\u5DDE\u5E06\u5E76\u5E74\u5F0F\u5F1B\u5FD9\u5FD6\u620E\u620C\u620D\u6210\u6263\u625B\u6258\u6536\u65E9\u65E8\u65EC\u65ED\u66F2\u66F3\u6709\u673D\u6734\u6731\u6735\u6B21\u6B64\u6B7B\u6C16\u6C5D\u6C57\u6C59\u6C5F\u6C60\u6C50\u6C55\u6C61\u6C5B\u6C4D\u6C4E\u7070\u725F\u725D\u767E\u7AF9\u7C73\u7CF8\u7F36\u7F8A\u7FBD\u8001\u8003\u800C\u8012\u8033\u807F\u8089\u808B\u808C\u81E3\u81EA\u81F3\u81FC\u820C\u821B\u821F\u826E\u8272\u827E\u866B\u8840\u884C\u8863\u897F\u9621\u4E32\u4EA8\u4F4D\u4F4F\u4F47\u4F57\u4F5E\u4F34\u4F5B\u4F55\u4F30\u4F50\u4F51\u4F3D\u4F3A\u4F38\u4F43\u4F54\u4F3C\u4F46\u4F63\u4F5C\u4F60\u4F2F\u4F4E\u4F36\u4F59\u4F5D\u4F48\u4F5A\u514C\u514B\u514D\u5175\u51B6\u51B7\u5225\u5224\u5229\u522A\u5228\u52AB\u52A9\u52AA\u52AC\u5323\u5373\u5375\u541D\u542D\u541E\u543E\u5426\u544E\u5427\u5446\u5443\u5433\u5448\u5442\u541B\u5429\u544A\u5439\u543B\u5438\u542E\u5435\u5436\u5420\u543C\u5440\u5431\u542B\u541F\u542C\u56EA\u56F0\u56E4\u56EB\u574A\u5751\u5740\u574D\u5747\u574E\u573E\u5750\u574F\u573B\u58EF\u593E\u599D\u5992\u59A8\u599E\u59A3\u5999\u5996\u598D\u59A4\u5993\u598A\u59A5\u5B5D\u5B5C\u5B5A\u5B5B\u5B8C\u5B8B\u5B8F\u5C2C\u5C40\u5C41\u5C3F\u5C3E\u5C90\u5C91\u5C94\u5C8C\u5DEB\u5E0C\u5E8F\u5E87\u5E8A\u5EF7\u5F04\u5F1F\u5F64\u5F62\u5F77\u5F79\u5FD8\u5FCC\u5FD7\u5FCD\u5FF1\u5FEB\u5FF8\u5FEA\u6212\u6211\u6284\u6297\u6296\u6280\u6276\u6289\u626D\u628A\u627C\u627E\u6279\u6273\u6292\u626F\u6298\u626E\u6295\u6293\u6291\u6286\u6539\u653B\u6538\u65F1\u66F4\u675F\u674E\u674F\u6750\u6751\u675C\u6756\u675E\u6749\u6746\u6760\u6753\u6757\u6B65\u6BCF\u6C42\u6C5E\u6C99\u6C81\u6C88\u6C89\u6C85\u6C9B\u6C6A\u6C7A\u6C90\u6C70\u6C8C\u6C68\u6C96\u6C92\u6C7D\u6C83\u6C72\u6C7E\u6C74\u6C86\u6C76\u6C8D\u6C94\u6C98\u6C82\u7076\u707C\u707D\u7078\u7262\u7261\u7260\u72C4\u72C2\u7396\u752C\u752B\u7537\u7538\u7682\u76EF\u77E3\u79C1\u79C0\u79BF\u7A76\u7CFB\u7F55\u8096\u8093\u809D\u8098\u809B\u809A\u80B2\u826F\u8292\u828B\u828D\u898B\u89D2\u8A00\u8C37\u8C46\u8C55\u8C9D\u8D64\u8D70\u8DB3\u8EAB\u8ECA\u8F9B\u8FB0\u8FC2\u8FC6\u8FC5\u8FC4\u5DE1\u9091\u90A2\u90AA\u90A6\u90A3\u9149\u91C6\u91CC\u9632\u962E\u9631\u962A\u962C\u4E26\u4E56\u4E73\u4E8B\u4E9B\u4E9E\u4EAB\u4EAC\u4F6F\u4F9D\u4F8D\u4F73\u4F7F\u4F6C\u4F9B\u4F8B\u4F86\u4F83\u4F70\u4F75\u4F88\u4F69\u4F7B\u4F96\u4F7E\u4F8F\u4F91\u4F7A\u5154\u5152\u5155\u5169\u5177\u5176\u5178\u51BD\u51FD\u523B\u5238\u5237\u523A\u5230\u522E\u5236\u5241\u52BE\u52BB\u5352\u5354\u5353\u5351\u5366\u5377\u5378\u5379\u53D6\u53D4\u53D7\u5473\u5475\u5496\u5478\u5495\u5480\u547B\u5477\u5484\u5492\u5486\u547C\u5490\u5471\u5476\u548C\u549A\u5462\u5468\u548B\u547D\u548E\u56FA\u5783\u5777\u576A\u5769\u5761\u5766\u5764\u577C\u591C\u5949\u5947\u5948\u5944\u5954\u59BE\u59BB\u59D4\u59B9\u59AE\u59D1\u59C6\u59D0\u59CD\u59CB\u59D3\u59CA\u59AF\u59B3\u59D2\u59C5\u5B5F\u5B64\u5B63\u5B97\u5B9A\u5B98\u5B9C\u5B99\u5B9B\u5C1A\u5C48\u5C45\u5C46\u5CB7\u5CA1\u5CB8\u5CA9\u5CAB\u5CB1\u5CB3\u5E18\u5E1A\u5E16\u5E15\u5E1B\u5E11\u5E78\u5E9A\u5E97\u5E9C\u5E95\u5E96\u5EF6\u5F26\u5F27\u5F29\u5F80\u5F81\u5F7F\u5F7C\u5FDD\u5FE0\u5FFD\u5FF5\u5FFF\u600F\u6014\u602F\u6035\u6016\u602A\u6015\u6021\u6027\u6029\u602B\u601B\u6216\u6215\u623F\u623E\u6240\u627F\u62C9\u62CC\u62C4\u62BF\u62C2\u62B9\u62D2\u62DB\u62AB\u62D3\u62D4\u62CB\u62C8\u62A8\u62BD\u62BC\u62D0\u62D9\u62C7\u62CD\u62B5\u62DA\u62B1\u62D8\u62D6\u62D7\u62C6\u62AC\u62CE\u653E\u65A7\u65BC\u65FA\u6614\u6613\u660C\u6606\u6602\u660E\u6600\u660F\u6615\u660A\u6607\u670D\u670B\u676D\u678B\u6795\u6771\u679C\u6773\u6777\u6787\u679D\u6797\u676F\u6770\u677F\u6789\u677E\u6790\u6775\u679A\u6793\u677C\u676A\u6772\u6B23\u6B66\u6B67\u6B7F\u6C13\u6C1B\u6CE3\u6CE8\u6CF3\u6CB1\u6CCC\u6CE5\u6CB3\u6CBD\u6CBE\u6CBC\u6CE2\u6CAB\u6CD5\u6CD3\u6CB8\u6CC4\u6CB9\u6CC1\u6CAE\u6CD7\u6CC5\u6CF1\u6CBF\u6CBB\u6CE1\u6CDB\u6CCA\u6CAC\u6CEF\u6CDC\u6CD6\u6CE0\u7095\u708E\u7092\u708A\u7099\u722C\u722D\u7238\u7248\u7267\u7269\u72C0\u72CE\u72D9\u72D7\u72D0\u73A9\u73A8\u739F\u73AB\u73A5\u753D\u759D\u7599\u759A\u7684\u76C2\u76F2\u76F4\u77E5\u77FD\u793E\u7940\u7941\u79C9\u79C8\u7A7A\u7A79\u7AFA\u7CFE\u7F54\u7F8C\u7F8B\u8005\u80BA\u80A5\u80A2\u80B1\u80A1\u80AB\u80A9\u80B4\u80AA\u80AF\u81E5\u81FE\u820D\u82B3\u829D\u8299\u82AD\u82BD\u829F\u82B9\u82B1\u82AC\u82A5\u82AF\u82B8\u82A3\u82B0\u82BE\u82B7\u864E\u8671\u521D\u8868\u8ECB\u8FCE\u8FD4\u8FD1\u90B5\u90B8\u90B1\u90B6\u91C7\u91D1\u9577\u9580\u961C\u9640\u963F\u963B\u9644\u9642\u96B9\u96E8\u9752\u975E\u4E9F\u4EAD\u4EAE\u4FE1\u4FB5\u4FAF\u4FBF\u4FE0\u4FD1\u4FCF\u4FDD\u4FC3\u4FB6\u4FD8\u4FDF\u4FCA\u4FD7\u4FAE\u4FD0\u4FC4\u4FC2\u4FDA\u4FCE\u4FDE\u4FB7\u5157\u5192\u5191\u51A0\u524E\u5243\u524A\u524D\u524C\u524B\u5247\u52C7\u52C9\u52C3\u52C1\u530D\u5357\u537B\u539A\u53DB\u54AC\u54C0\u54A8\u54CE\u54C9\u54B8\u54A6\u54B3\u54C7\u54C2\u54BD\u54AA\u54C1\u54C4\u54C8\u54AF\u54AB\u54B1\u54BB\u54A9\u54A7\u54BF\u56FF\u5782\u578B\u57A0\u57A3\u57A2\u57CE\u57AE\u5793\u5955\u5951\u594F\u594E\u5950\u59DC\u59D8\u59FF\u59E3\u59E8\u5A03\u59E5\u59EA\u59DA\u59E6\u5A01\u59FB\u5B69\u5BA3\u5BA6\u5BA4\u5BA2\u5BA5\u5C01\u5C4E\u5C4F\u5C4D\u5C4B\u5CD9\u5CD2\u5DF7\u5E1D\u5E25\u5E1F\u5E7D\u5EA0\u5EA6\u5EFA\u5F08\u5F2D\u5F65\u5F88\u5F85\u5F8A\u5F8B\u5F87\u5F8C\u5F89\u6012\u601D\u6020\u6025\u600E\u6028\u604D\u6070\u6068\u6062\u6046\u6043\u606C\u606B\u606A\u6064\u6241\u62DC\u6316\u6309\u62FC\u62ED\u6301\u62EE\u62FD\u6307\u62F1\u62F7\u62EF\u62EC\u62FE\u62F4\u6311\u6302\u653F\u6545\u65AB\u65BD\u65E2\u6625\u662D\u6620\u6627\u662F\u661F\u6628\u6631\u6624\u66F7\u67FF\u67D3\u67F1\u67D4\u67D0\u67EC\u67B6\u67AF\u67F5\u67E9\u67EF\u67C4\u67D1\u67B4\u67DA\u67E5\u67B8\u67CF\u67DE\u67F3\u67B0\u67D9\u67E2\u67DD\u67D2\u6B6A\u6B83\u6B86\u6BB5\u6BD2\u6BD7\u6C1F\u6CC9\u6D0B\u6D32\u6D2A\u6D41\u6D25\u6D0C\u6D31\u6D1E\u6D17\u6D3B\u6D3D\u6D3E\u6D36\u6D1B\u6CF5\u6D39\u6D27\u6D38\u6D29\u6D2E\u6D35\u6D0E\u6D2B\u70AB\u70BA\u70B3\u70AC\u70AF\u70AD\u70B8\u70AE\u70A4\u7230\u7272\u726F\u7274\u72E9\u72E0\u72E1\u73B7\u73CA\u73BB\u73B2\u73CD\u73C0\u73B3\u751A\u752D\u754F\u754C\u754E\u754B\u75AB\u75A4\u75A5\u75A2\u75A3\u7678\u7686\u7687\u7688\u76C8\u76C6\u76C3\u76C5\u7701\u76F9\u76F8\u7709\u770B\u76FE\u76FC\u7707\u77DC\u7802\u7814\u780C\u780D\u7946\u7949\u7948\u7947\u79B9\u79BA\u79D1\u79D2\u79CB\u7A7F\u7A81\u7AFF\u7AFD\u7C7D\u7D02\u7D05\u7D00\u7D09\u7D07\u7D04\u7D06\u7F38\u7F8E\u7FBF\u8004\u8010\u800D\u8011\u8036\u80D6\u80E5\u80DA\u80C3\u80C4\u80CC\u80E1\u80DB\u80CE\u80DE\u80E4\u80DD\u81F4\u8222\u82E7\u8303\u8305\u82E3\u82DB\u82E6\u8304\u82E5\u8302\u8309\u82D2\u82D7\u82F1\u8301\u82DC\u82D4\u82D1\u82DE\u82D3\u82DF\u82EF\u8306\u8650\u8679\u867B\u867A\u884D\u886B\u8981\u89D4\u8A08\u8A02\u8A03\u8C9E\u8CA0\u8D74\u8D73\u8DB4\u8ECD\u8ECC\u8FF0\u8FE6\u8FE2\u8FEA\u8FE5\u8FED\u8FEB\u8FE4\u8FE8\u90CA\u90CE\u90C1\u90C3\u914B\u914A\u91CD\u9582\u9650\u964B\u964C\u964D\u9762\u9769\u97CB\u97ED\u97F3\u9801\u98A8\u98DB\u98DF\u9996\u9999\u4E58\u4EB3\u500C\u500D\u5023\u4FEF\u5026\u5025\u4FF8\u5029\u5016\u5006\u503C\u501F\u501A\u5012\u5011\u4FFA\u5000\u5014\u5028\u4FF1\u5021\u500B\u5019\u5018\u4FF3\u4FEE\u502D\u502A\u4FFE\u502B\u5009\u517C\u51A4\u51A5\u51A2\u51CD\u51CC\u51C6\u51CB\u5256\u525C\u5254\u525B\u525D\u532A\u537F\u539F\u539D\u53DF\u54E8\u5510\u5501\u5537\u54FC\u54E5\u54F2\u5506\u54FA\u5514\u54E9\u54ED\u54E1\u5509\u54EE\u54EA\u54E6\u5527\u5507\u54FD\u550F\u5703\u5704\u57C2\u57D4\u57CB\u57C3\u5809\u590F\u5957\u5958\u595A\u5A11\u5A18\u5A1C\u5A1F\u5A1B\u5A13\u59EC\u5A20\u5A23\u5A29\u5A25\u5A0C\u5A09\u5B6B\u5C58\u5BB0\u5BB3\u5BB6\u5BB4\u5BAE\u5BB5\u5BB9\u5BB8\u5C04\u5C51\u5C55\u5C50\u5CED\u5CFD\u5CFB\u5CEA\u5CE8\u5CF0\u5CF6\u5D01\u5CF4\u5DEE\u5E2D\u5E2B\u5EAB\u5EAD\u5EA7\u5F31\u5F92\u5F91\u5F90\u6059\u6063\u6065\u6050\u6055\u606D\u6069\u606F\u6084\u609F\u609A\u608D\u6094\u608C\u6085\u6096\u6247\u62F3\u6308\u62FF\u634E\u633E\u632F\u6355\u6342\u6346\u634F\u6349\u633A\u6350\u633D\u632A\u632B\u6328\u634D\u634C\u6548\u6549\u6599\u65C1\u65C5\u6642\u6649\u664F\u6643\u6652\u664C\u6645\u6641\u66F8\u6714\u6715\u6717\u6821\u6838\u6848\u6846\u6853\u6839\u6842\u6854\u6829\u68B3\u6817\u684C\u6851\u683D\u67F4\u6850\u6840\u683C\u6843\u682A\u6845\u6813\u6818\u6841\u6B8A\u6B89\u6BB7\u6C23\u6C27\u6C28\u6C26\u6C24\u6CF0\u6D6A\u6D95\u6D88\u6D87\u6D66\u6D78\u6D77\u6D59\u6D93\u6D6C\u6D89\u6D6E\u6D5A\u6D74\u6D69\u6D8C\u6D8A\u6D79\u6D85\u6D65\u6D94\u70CA\u70D8\u70E4\u70D9\u70C8\u70CF\u7239\u7279\u72FC\u72F9\u72FD\u72F8\u72F7\u7386\u73ED\u7409\u73EE\u73E0\u73EA\u73DE\u7554\u755D\u755C\u755A\u7559\u75BE\u75C5\u75C7\u75B2\u75B3\u75BD\u75BC\u75B9\u75C2\u75B8\u768B\u76B0\u76CA\u76CD\u76CE\u7729\u771F\u7720\u7728\u77E9\u7830\u7827\u7838\u781D\u7834\u7837\u7825\u782D\u7820\u781F\u7832\u7955\u7950\u7960\u795F\u7956\u795E\u795D\u7957\u795A\u79E4\u79E3\u79E7\u79DF\u79E6\u79E9\u79D8\u7A84\u7A88\u7AD9\u7B06\u7B11\u7C89\u7D21\u7D17\u7D0B\u7D0A\u7D20\u7D22\u7D14\u7D10\u7D15\u7D1A\u7D1C\u7D0D\u7D19\u7D1B\u7F3A\u7F5F\u7F94\u7FC5\u7FC1\u8006\u8018\u8015\u8019\u8017\u803D\u803F\u80F1\u8102\u80F0\u8105\u80ED\u80F4\u8106\u80F8\u80F3\u8108\u80FD\u810A\u80FC\u80EF\u81ED\u81EC\u8200\u8210\u822A\u822B\u8228\u822C\u82BB\u832B\u8352\u8354\u834A\u8338\u8350\u8349\u8335\u8334\u834F\u8332\u8339\u8336\u8317\u8340\u8331\u8328\u8343\u8654\u868A\u86AA\u8693\u86A4\u86A9\u868C\u86A3\u869C\u8870\u8877\u8881\u8882\u887D\u8879\u8A18\u8A10\u8A0E\u8A0C\u8A15\u8A0A\u8A17\u8A13\u8A16\u8A0F\u8A11\u8C48\u8C7A\u8C79\u8CA1\u8CA2\u8D77\u8EAC\u8ED2\u8ED4\u8ECF\u8FB1\u9001\u9006\u8FF7\u9000\u8FFA\u8FF4\u9003\u8FFD\u9005\u8FF8\u9095\u90E1\u90DD\u90E2\u9152\u914D\u914C\u91D8\u91DD\u91D7\u91DC\u91D9\u9583\u9662\u9663\u9661\u965B\u965D\u9664\u9658\u965E\u96BB\u98E2\u99AC\u9AA8\u9AD8\u9B25\u9B32\u9B3C\u4E7E\u507A\u507D\u505C\u5047\u5043\u504C\u505A\u5049\u5065\u5076\u504E\u5055\u5075\u5074\u5077\u504F\u500F\u506F\u506D\u515C\u5195\u51F0\u526A\u526F\u52D2\u52D9\u52D8\u52D5\u5310\u530F\u5319\u533F\u5340\u533E\u53C3\u66FC\u5546\u556A\u5566\u5544\u555E\u5561\u5543\u554A\u5531\u5556\u554F\u5555\u552F\u5564\u5538\u552E\u555C\u552C\u5563\u5533\u5541\u5557\u5708\u570B\u5709\u57DF\u5805\u580A\u5806\u57E0\u57E4\u57FA\u5802\u5835\u57F7\u57F9\u5920\u5962\u5A36\u5A41\u5A49\u5A66\u5A6A\u5A40\u5A3C\u5A62\u5A5A\u5A46\u5A4A\u5B70\u5BC7\u5BC5\u5BC4\u5BC2\u5BBF\u5BC6\u5C09\u5C08\u5C07\u5C60\u5C5C\u5C5D\u5D07\u5D06\u5D0E\u5D1B\u5D16\u5D22\u5D11\u5D29\u5D14\u5D19\u5D24\u5D27\u5D17\u5DE2\u5E38\u5E36\u5E33\u5E37\u5EB7\u5EB8\u5EB6\u5EB5\u5EBE\u5F35\u5F37\u5F57\u5F6C\u5F69\u5F6B\u5F97\u5F99\u5F9E\u5F98\u5FA1\u5FA0\u5F9C\u607F\u60A3\u6089\u60A0\u60A8\u60CB\u60B4\u60E6\u60BD\u60C5\u60BB\u60B5\u60DC\u60BC\u60D8\u60D5\u60C6\u60DF\u60B8\u60DA\u60C7\u621A\u621B\u6248\u63A0\u63A7\u6372\u6396\u63A2\u63A5\u6377\u6367\u6398\u63AA\u6371\u63A9\u6389\u6383\u639B\u636B\u63A8\u6384\u6388\u6399\u63A1\u63AC\u6392\u638F\u6380\u637B\u6369\u6368\u637A\u655D\u6556\u6551\u6559\u6557\u555F\u654F\u6558\u6555\u6554\u659C\u659B\u65AC\u65CF\u65CB\u65CC\u65CE\u665D\u665A\u6664\u6668\u6666\u665E\u66F9\u52D7\u671B\u6881\u68AF\u68A2\u6893\u68B5\u687F\u6876\u68B1\u68A7\u6897\u68B0\u6883\u68C4\u68AD\u6886\u6885\u6894\u689D\u68A8\u689F\u68A1\u6882\u6B32\u6BBA\u6BEB\u6BEC\u6C2B\u6D8E\u6DBC\u6DF3\u6DD9\u6DB2\u6DE1\u6DCC\u6DE4\u6DFB\u6DFA\u6E05\u6DC7\u6DCB\u6DAF\u6DD1\u6DAE\u6DDE\u6DF9\u6DB8\u6DF7\u6DF5\u6DC5\u6DD2\u6E1A\u6DB5\u6DDA\u6DEB\u6DD8\u6DEA\u6DF1\u6DEE\u6DE8\u6DC6\u6DC4\u6DAA\u6DEC\u6DBF\u6DE6\u70F9\u7109\u710A\u70FD\u70EF\u723D\u727D\u7281\u731C\u731B\u7316\u7313\u7319\u7387\u7405\u740A\u7403\u7406\u73FE\u740D\u74E0\u74F6\u74F7\u751C\u7522\u7565\u7566\u7562\u7570\u758F\u75D4\u75D5\u75B5\u75CA\u75CD\u768E\u76D4\u76D2\u76DB\u7737\u773E\u773C\u7736\u7738\u773A\u786B\u7843\u784E\u7965\u7968\u796D\u79FB\u7A92\u7A95\u7B20\u7B28\u7B1B\u7B2C\u7B26\u7B19\u7B1E\u7B2E\u7C92\u7C97\u7C95\u7D46\u7D43\u7D71\u7D2E\u7D39\u7D3C\u7D40\u7D30\u7D33\u7D44\u7D2F\u7D42\u7D32\u7D31\u7F3D\u7F9E\u7F9A\u7FCC\u7FCE\u7FD2\u801C\u804A\u8046\u812F\u8116\u8123\u812B\u8129\u8130\u8124\u8202\u8235\u8237\u8236\u8239\u838E\u839E\u8398\u8378\u83A2\u8396\u83BD\u83AB\u8392\u838A\u8393\u8389\u83A0\u8377\u837B\u837C\u8386\u83A7\u8655\u5F6A\u86C7\u86C0\u86B6\u86C4\u86B5\u86C6\u86CB\u86B1\u86AF\u86C9\u8853\u889E\u8888\u88AB\u8892\u8896\u888D\u888B\u8993\u898F\u8A2A\u8A1D\u8A23\u8A25\u8A31\u8A2D\u8A1F\u8A1B\u8A22\u8C49\u8C5A\u8CA9\u8CAC\u8CAB\u8CA8\u8CAA\u8CA7\u8D67\u8D66\u8DBE\u8DBA\u8EDB\u8EDF\u9019\u900D\u901A\u9017\u9023\u901F\u901D\u9010\u9015\u901E\u9020\u900F\u9022\u9016\u901B\u9014\u90E8\u90ED\u90FD\u9157\u91CE\u91F5\u91E6\u91E3\u91E7\u91ED\u91E9\u9589\u966A\u9675\u9673\u9678\u9670\u9674\u9676\u9677\u966C\u96C0\u96EA\u96E9\u7AE0\u7ADF\u9802\u9803\u9B5A\u9CE5\u9E75\u9E7F\u9EA5\u9EBB\u50A2\u508D\u5085\u5099\u5091\u5080\u5096\u5098\u509A\u6700\u51F1\u5272\u5274\u5275\u5269\u52DE\u52DD\u52DB\u535A\u53A5\u557B\u5580\u55A7\u557C\u558A\u559D\u5598\u5582\u559C\u55AA\u5594\u5587\u558B\u5583\u55B3\u55AE\u559F\u553E\u55B2\u559A\u55BB\u55AC\u55B1\u557E\u5589\u55AB\u5599\u570D\u582F\u582A\u5834\u5824\u5830\u5831\u5821\u581D\u5820\u58F9\u58FA\u5960\u5A77\u5A9A\u5A7F\u5A92\u5A9B\u5AA7\u5B73\u5B71\u5BD2\u5BCC\u5BD3\u5BD0\u5C0A\u5C0B\u5C31\u5D4C\u5D50\u5D34\u5D47\u5DFD\u5E45\u5E3D\u5E40\u5E43\u5E7E\u5ECA\u5EC1\u5EC2\u5EC4\u5F3C\u5F6D\u5FA9\u5FAA\u5FA8\u60D1\u60E1\u60B2\u60B6\u60E0\u611C\u6123\u60FA\u6115\u60F0\u60FB\u60F4\u6168\u60F1\u610E\u60F6\u6109\u6100\u6112\u621F\u6249\u63A3\u638C\u63CF\u63C0\u63E9\u63C9\u63C6\u63CD\u63D2\u63E3\u63D0\u63E1\u63D6\u63ED\u63EE\u6376\u63F4\u63EA\u63DB\u6452\u63DA\u63F9\u655E\u6566\u6562\u6563\u6591\u6590\u65AF\u666E\u6670\u6674\u6676\u666F\u6691\u667A\u667E\u6677\u66FE\u66FF\u671F\u671D\u68FA\u68D5\u68E0\u68D8\u68D7\u6905\u68DF\u68F5\u68EE\u68E7\u68F9\u68D2\u68F2\u68E3\u68CB\u68CD\u690D\u6912\u690E\u68C9\u68DA\u696E\u68FB\u6B3E\u6B3A\u6B3D\u6B98\u6B96\u6BBC\u6BEF\u6C2E\u6C2F\u6C2C\u6E2F\u6E38\u6E54\u6E21\u6E32\u6E67\u6E4A\u6E20\u6E25\u6E23\u6E1B\u6E5B\u6E58\u6E24\u6E56\u6E6E\u6E2D\u6E26\u6E6F\u6E34\u6E4D\u6E3A\u6E2C\u6E43\u6E1D\u6E3E\u6ECB\u6E89\u6E19\u6E4E\u6E63\u6E44\u6E72\u6E69\u6E5F\u7119\u711A\u7126\u7130\u7121\u7136\u716E\u711C\u724C\u7284\u7280\u7336\u7325\u7334\u7329\u743A\u742A\u7433\u7422\u7425\u7435\u7436\u7434\u742F\u741B\u7426\u7428\u7525\u7526\u756B\u756A\u75E2\u75DB\u75E3\u75D9\u75D8\u75DE\u75E0\u767B\u767C\u7696\u7693\u76B4\u76DC\u774F\u77ED\u785D\u786C\u786F\u7A0D\u7A08\u7A0B\u7A05\u7A00\u7A98\u7A97\u7A96\u7AE5\u7AE3\u7B49\u7B56\u7B46\u7B50\u7B52\u7B54\u7B4D\u7B4B\u7B4F\u7B51\u7C9F\u7CA5\u7D5E\u7D50\u7D68\u7D55\u7D2B\u7D6E\u7D72\u7D61\u7D66\u7D62\u7D70\u7D73\u5584\u7FD4\u7FD5\u800B\u8052\u8085\u8155\u8154\u814B\u8151\u814E\u8139\u8146\u813E\u814C\u8153\u8174\u8212\u821C\u83E9\u8403\u83F8\u840D\u83E0\u83C5\u840B\u83C1\u83EF\u83F1\u83F4\u8457\u840A\u83F0\u840C\u83CC\u83FD\u83F2\u83CA\u8438\u840E\u8404\u83DC\u8407\u83D4\u83DF\u865B\u86DF\u86D9\u86ED\u86D4\u86DB\u86E4\u86D0\u86DE\u8857\u88C1\u88C2\u88B1\u8983\u8996\u8A3B\u8A60\u8A55\u8A5E\u8A3C\u8A41\u8A54\u8A5B\u8A50\u8A46\u8A34\u8A3A\u8A36\u8A56\u8C61\u8C82\u8CAF\u8CBC\u8CB3\u8CBD\u8CC1\u8CBB\u8CC0\u8CB4\u8CB7\u8CB6\u8CBF\u8CB8\u8D8A\u8D85\u8D81\u8DCE\u8DDD\u8DCB\u8DDA\u8DD1\u8DCC\u8DDB\u8DC6\u8EFB\u8EF8\u8EFC\u8F9C\u902E\u9035\u9031\u9038\u9032\u9036\u9102\u90F5\u9109\u90FE\u9163\u9165\u91CF\u9214\u9215\u9223\u9209\u921E\u920D\u9210\u9207\u9211\u9594\u958F\u958B\u9591\u9593\u9592\u958E\u968A\u968E\u968B\u967D\u9685\u9686\u968D\u9672\u9684\u96C1\u96C5\u96C4\u96C6\u96C7\u96EF\u96F2\u97CC\u9805\u9806\u9808\u98E7\u98EA\u98EF\u98E9\u98F2\u98ED\u99AE\u99AD\u9EC3\u9ECD\u9ED1\u4E82\u50AD\u50B5\u50B2\u50B3\u50C5\u50BE\u50AC\u50B7\u50BB\u50AF\u50C7\u527F\u5277\u527D\u52DF\u52E6\u52E4\u52E2\u52E3\u532F\u55DF\u55E8\u55D3\u55E6\u55CE\u55DC\u55C7\u55D1\u55E3\u55E4\u55EF\u55DA\u55E1\u55C5\u55C6\u55E5\u55C9\u5712\u5713\u585E\u5851\u5858\u5857\u585A\u5854\u586B\u584C\u586D\u584A\u5862\u5852\u584B\u5967\u5AC1\u5AC9\u5ACC\u5ABE\u5ABD\u5ABC\u5AB3\u5AC2\u5AB2\u5D69\u5D6F\u5E4C\u5E79\u5EC9\u5EC8\u5F12\u5F59\u5FAC\u5FAE\u611A\u610F\u6148\u611F\u60F3\u611B\u60F9\u6101\u6108\u614E\u614C\u6144\u614D\u613E\u6134\u6127\u610D\u6106\u6137\u6221\u6222\u6413\u643E\u641E\u642A\u642D\u643D\u642C\u640F\u641C\u6414\u640D\u6436\u6416\u6417\u6406\u656C\u659F\u65B0\u6697\u6689\u6687\u6688\u6696\u6684\u6698\u668D\u6703\u6994\u696D\u695A\u6977\u6960\u6954\u6975\u6930\u6982\u694A\u6968\u696B\u695E\u6953\u6979\u6986\u695D\u6963\u695B\u6B47\u6B72\u6BC0\u6BBF\u6BD3\u6BFD\u6EA2\u6EAF\u6ED3\u6EB6\u6EC2\u6E90\u6E9D\u6EC7\u6EC5\u6EA5\u6E98\u6EBC\u6EBA\u6EAB\u6ED1\u6E96\u6E9C\u6EC4\u6ED4\u6EAA\u6EA7\u6EB4\u714E\u7159\u7169\u7164\u7149\u7167\u715C\u716C\u7166\u714C\u7165\u715E\u7146\u7168\u7156\u723A\u7252\u7337\u7345\u733F\u733E\u746F\u745A\u7455\u745F\u745E\u7441\u743F\u7459\u745B\u745C\u7576\u7578\u7600\u75F0\u7601\u75F2\u75F1\u75FA\u75FF\u75F4\u75F3\u76DE\u76DF\u775B\u776B\u7766\u775E\u7763\u7779\u776A\u776C\u775C\u7765\u7768\u7762\u77EE\u788E\u78B0\u7897\u7898\u788C\u7889\u787C\u7891\u7893\u787F\u797A\u797F\u7981\u842C\u79BD\u7A1C\u7A1A\u7A20\u7A14\u7A1F\u7A1E\u7A9F\u7AA0\u7B77\u7BC0\u7B60\u7B6E\u7B67\u7CB1\u7CB3\u7CB5\u7D93\u7D79\u7D91\u7D81\u7D8F\u7D5B\u7F6E\u7F69\u7F6A\u7F72\u7FA9\u7FA8\u7FA4\u8056\u8058\u8086\u8084\u8171\u8170\u8178\u8165\u816E\u8173\u816B\u8179\u817A\u8166\u8205\u8247\u8482\u8477\u843D\u8431\u8475\u8466\u846B\u8449\u846C\u845B\u843C\u8435\u8461\u8463\u8469\u846D\u8446\u865E\u865C\u865F\u86F9\u8713\u8708\u8707\u8700\u86FE\u86FB\u8702\u8703\u8706\u870A\u8859\u88DF\u88D4\u88D9\u88DC\u88D8\u88DD\u88E1\u88CA\u88D5\u88D2\u899C\u89E3\u8A6B\u8A72\u8A73\u8A66\u8A69\u8A70\u8A87\u8A7C\u8A63\u8AA0\u8A71\u8A85\u8A6D\u8A62\u8A6E\u8A6C\u8A79\u8A7B\u8A3E\u8A68\u8C62\u8C8A\u8C89\u8CCA\u8CC7\u8CC8\u8CC4\u8CB2\u8CC3\u8CC2\u8CC5\u8DE1\u8DDF\u8DE8\u8DEF\u8DF3\u8DFA\u8DEA\u8DE4\u8DE6\u8EB2\u8F03\u8F09\u8EFE\u8F0A\u8F9F\u8FB2\u904B\u904A\u9053\u9042\u9054\u903C\u9055\u9050\u9047\u904F\u904E\u904D\u9051\u903E\u9041\u9112\u9117\u916C\u916A\u9169\u91C9\u9237\u9257\u9238\u923D\u9240\u923E\u925B\u924B\u9264\u9251\u9234\u9249\u924D\u9245\u9239\u923F\u925A\u9598\u9698\u9694\u9695\u96CD\u96CB\u96C9\u96CA\u96F7\u96FB\u96F9\u96F6\u9756\u9774\u9776\u9810\u9811\u9813\u980A\u9812\u980C\u98FC\u98F4\u98FD\u98FE\u99B3\u99B1\u99B4\u9AE1\u9CE9\u9E82\u9F0E\u9F13\u9F20\u50E7\u50EE\u50E5\u50D6\u50ED\u50DA\u50D5\u50CF\u50D1\u50F1\u50CE\u50E9\u5162\u51F3\u5283\u5282\u5331\u53AD\u55FE\u5600\u561B\u5617\u55FD\u5614\u5606\u5609\u560D\u560E\u55F7\u5616\u561F\u5608\u5610\u55F6\u5718\u5716\u5875\u587E\u5883\u5893\u588A\u5879\u5885\u587D\u58FD\u5925\u5922\u5924\u596A\u5969\u5AE1\u5AE6\u5AE9\u5AD7\u5AD6\u5AD8\u5AE3\u5B75\u5BDE\u5BE7\u5BE1\u5BE5\u5BE6\u5BE8\u5BE2\u5BE4\u5BDF\u5C0D\u5C62\u5D84\u5D87\u5E5B\u5E63\u5E55\u5E57\u5E54\u5ED3\u5ED6\u5F0A\u5F46\u5F70\u5FB9\u6147\u613F\u614B\u6177\u6162\u6163\u615F\u615A\u6158\u6175\u622A\u6487\u6458\u6454\u64A4\u6478\u645F\u647A\u6451\u6467\u6434\u646D\u647B\u6572\u65A1\u65D7\u65D6\u66A2\u66A8\u669D\u699C\u69A8\u6995\u69C1\u69AE\u69D3\u69CB\u699B\u69B7\u69BB\u69AB\u69B4\u69D0\u69CD\u69AD\u69CC\u69A6\u69C3\u69A3\u6B49\u6B4C\u6C33\u6F33\u6F14\u6EFE\u6F13\u6EF4\u6F29\u6F3E\u6F20\u6F2C\u6F0F\u6F02\u6F22\u6EFF\u6EEF\u6F06\u6F31\u6F38\u6F32\u6F23\u6F15\u6F2B\u6F2F\u6F88\u6F2A\u6EEC\u6F01\u6EF2\u6ECC\u6EF7\u7194\u7199\u717D\u718A\u7184\u7192\u723E\u7292\u7296\u7344\u7350\u7464\u7463\u746A\u7470\u746D\u7504\u7591\u7627\u760D\u760B\u7609\u7613\u76E1\u76E3\u7784\u777D\u777F\u7761\u78C1\u789F\u78A7\u78B3\u78A9\u78A3\u798E\u798F\u798D\u7A2E\u7A31\u7AAA\u7AA9\u7AED\u7AEF\u7BA1\u7B95\u7B8B\u7B75\u7B97\u7B9D\u7B94\u7B8F\u7BB8\u7B87\u7B84\u7CB9\u7CBD\u7CBE\u7DBB\u7DB0\u7D9C\u7DBD\u7DBE\u7DA0\u7DCA\u7DB4\u7DB2\u7DB1\u7DBA\u7DA2\u7DBF\u7DB5\u7DB8\u7DAD\u7DD2\u7DC7\u7DAC\u7F70\u7FE0\u7FE1\u7FDF\u805E\u805A\u8087\u8150\u8180\u818F\u8188\u818A\u817F\u8182\u81E7\u81FA\u8207\u8214\u821E\u824B\u84C9\u84BF\u84C6\u84C4\u8499\u849E\u84B2\u849C\u84CB\u84B8\u84C0\u84D3\u8490\u84BC\u84D1\u84CA\u873F\u871C\u873B\u8722\u8725\u8734\u8718\u8755\u8737\u8729\u88F3\u8902\u88F4\u88F9\u88F8\u88FD\u88E8\u891A\u88EF\u8AA6\u8A8C\u8A9E\u8AA3\u8A8D\u8AA1\u8A93\u8AA4\u8AAA\u8AA5\u8AA8\u8A98\u8A91\u8A9A\u8AA7\u8C6A\u8C8D\u8C8C\u8CD3\u8CD1\u8CD2\u8D6B\u8D99\u8D95\u8DFC\u8F14\u8F12\u8F15\u8F13\u8FA3\u9060\u9058\u905C\u9063\u9059\u905E\u9062\u905D\u905B\u9119\u9118\u911E\u9175\u9178\u9177\u9174\u9278\u9280\u9285\u9298\u9296\u927B\u9293\u929C\u92A8\u927C\u9291\u95A1\u95A8\u95A9\u95A3\u95A5\u95A4\u9699\u969C\u969B\u96CC\u96D2\u9700\u977C\u9785\u97F6\u9817\u9818\u98AF\u98B1\u9903\u9905\u990C\u9909\u99C1\u9AAF\u9AB0\u9AE6\u9B41\u9B42\u9CF4\u9CF6\u9CF3\u9EBC\u9F3B\u9F4A\u5104\u5100\u50FB\u50F5\u50F9\u5102\u5108\u5109\u5105\u51DC\u5287\u5288\u5289\u528D\u528A\u52F0\u53B2\u562E\u563B\u5639\u5632\u563F\u5634\u5629\u5653\u564E\u5657\u5674\u5636\u562F\u5630\u5880\u589F\u589E\u58B3\u589C\u58AE\u58A9\u58A6\u596D\u5B09\u5AFB\u5B0B\u5AF5\u5B0C\u5B08\u5BEE\u5BEC\u5BE9\u5BEB\u5C64\u5C65\u5D9D\u5D94\u5E62\u5E5F\u5E61\u5EE2\u5EDA\u5EDF\u5EDD\u5EE3\u5EE0\u5F48\u5F71\u5FB7\u5FB5\u6176\u6167\u616E\u615D\u6155\u6182\u617C\u6170\u616B\u617E\u61A7\u6190\u61AB\u618E\u61AC\u619A\u61A4\u6194\u61AE\u622E\u6469\u646F\u6479\u649E\u64B2\u6488\u6490\u64B0\u64A5\u6493\u6495\u64A9\u6492\u64AE\u64AD\u64AB\u649A\u64AC\u6499\u64A2\u64B3\u6575\u6577\u6578\u66AE\u66AB\u66B4\u66B1\u6A23\u6A1F\u69E8\u6A01\u6A1E\u6A19\u69FD\u6A21\u6A13\u6A0A\u69F3\u6A02\u6A05\u69ED\u6A11\u6B50\u6B4E\u6BA4\u6BC5\u6BC6\u6F3F\u6F7C\u6F84\u6F51\u6F66\u6F54\u6F86\u6F6D\u6F5B\u6F78\u6F6E\u6F8E\u6F7A\u6F70\u6F64\u6F97\u6F58\u6ED5\u6F6F\u6F60\u6F5F\u719F\u71AC\u71B1\u71A8\u7256\u729B\u734E\u7357\u7469\u748B\u7483\u747E\u7480\u757F\u7620\u7629\u761F\u7624\u7626\u7621\u7622\u769A\u76BA\u76E4\u778E\u7787\u778C\u7791\u778B\u78CB\u78C5\u78BA\u78CA\u78BE\u78D5\u78BC\u78D0\u7A3F\u7A3C\u7A40\u7A3D\u7A37\u7A3B\u7AAF\u7AAE\u7BAD\u7BB1\u7BC4\u7BB4\u7BC6\u7BC7\u7BC1\u7BA0\u7BCC\u7CCA\u7DE0\u7DF4\u7DEF\u7DFB\u7DD8\u7DEC\u7DDD\u7DE8\u7DE3\u7DDA\u7DDE\u7DE9\u7D9E\u7DD9\u7DF2\u7DF9\u7F75\u7F77\u7FAF\u7FE9\u8026\u819B\u819C\u819D\u81A0\u819A\u8198\u8517\u853D\u851A\u84EE\u852C\u852D\u8513\u8511\u8523\u8521\u8514\u84EC\u8525\u84FF\u8506\u8782\u8774\u8776\u8760\u8766\u8778\u8768\u8759\u8757\u874C\u8753\u885B\u885D\u8910\u8907\u8912\u8913\u8915\u890A\u8ABC\u8AD2\u8AC7\u8AC4\u8A95\u8ACB\u8AF8\u8AB2\u8AC9\u8AC2\u8ABF\u8AB0\u8AD6\u8ACD\u8AB6\u8AB9\u8ADB\u8C4C\u8C4E\u8C6C\u8CE0\u8CDE\u8CE6\u8CE4\u8CEC\u8CED\u8CE2\u8CE3\u8CDC\u8CEA\u8CE1\u8D6D\u8D9F\u8DA3\u8E2B\u8E10\u8E1D\u8E22\u8E0F\u8E29\u8E1F\u8E21\u8E1E\u8EBA\u8F1D\u8F1B\u8F1F\u8F29\u8F26\u8F2A\u8F1C\u8F1E\u8F25\u9069\u906E\u9068\u906D\u9077\u9130\u912D\u9127\u9131\u9187\u9189\u918B\u9183\u92C5\u92BB\u92B7\u92EA\u92AC\u92E4\u92C1\u92B3\u92BC\u92D2\u92C7\u92F0\u92B2\u95AD\u95B1\u9704\u9706\u9707\u9709\u9760\u978D\u978B\u978F\u9821\u982B\u981C\u98B3\u990A\u9913\u9912\u9918\u99DD\u99D0\u99DF\u99DB\u99D1\u99D5\u99D2\u99D9\u9AB7\u9AEE\u9AEF\u9B27\u9B45\u9B44\u9B77\u9B6F\u9D06\u9D09\u9D03\u9EA9\u9EBE\u9ECE\u58A8\u9F52\u5112\u5118\u5114\u5110\u5115\u5180\u51AA\u51DD\u5291\u5293\u52F3\u5659\u566B\u5679\u5669\u5664\u5678\u566A\u5668\u5665\u5671\u566F\u566C\u5662\u5676\u58C1\u58BE\u58C7\u58C5\u596E\u5B1D\u5B34\u5B78\u5BF0\u5C0E\u5F4A\u61B2\u6191\u61A9\u618A\u61CD\u61B6\u61BE\u61CA\u61C8\u6230\u64C5\u64C1\u64CB\u64BB\u64BC\u64DA\u64C4\u64C7\u64C2\u64CD\u64BF\u64D2\u64D4\u64BE\u6574\u66C6\u66C9\u66B9\u66C4\u66C7\u66B8\u6A3D\u6A38\u6A3A\u6A59\u6A6B\u6A58\u6A39\u6A44\u6A62\u6A61\u6A4B\u6A47\u6A35\u6A5F\u6A48\u6B59\u6B77\u6C05\u6FC2\u6FB1\u6FA1\u6FC3\u6FA4\u6FC1\u6FA7\u6FB3\u6FC0\u6FB9\u6FB6\u6FA6\u6FA0\u6FB4\u71BE\u71C9\u71D0\u71D2\u71C8\u71D5\u71B9\u71CE\u71D9\u71DC\u71C3\u71C4\u7368\u749C\u74A3\u7498\u749F\u749E\u74E2\u750C\u750D\u7634\u7638\u763A\u76E7\u76E5\u77A0\u779E\u779F\u77A5\u78E8\u78DA\u78EC\u78E7\u79A6\u7A4D\u7A4E\u7A46\u7A4C\u7A4B\u7ABA\u7BD9\u7C11\u7BC9\u7BE4\u7BDB\u7BE1\u7BE9\u7BE6\u7CD5\u7CD6\u7E0A\u7E11\u7E08\u7E1B\u7E23\u7E1E\u7E1D\u7E09\u7E10\u7F79\u7FB2\u7FF0\u7FF1\u7FEE\u8028\u81B3\u81A9\u81A8\u81FB\u8208\u8258\u8259\u854A\u8559\u8548\u8568\u8569\u8543\u8549\u856D\u856A\u855E\u8783\u879F\u879E\u87A2\u878D\u8861\u892A\u8932\u8925\u892B\u8921\u89AA\u89A6\u8AE6\u8AFA\u8AEB\u8AF1\u8B00\u8ADC\u8AE7\u8AEE\u8AFE\u8B01\u8B02\u8AF7\u8AED\u8AF3\u8AF6\u8AFC\u8C6B\u8C6D\u8C93\u8CF4\u8E44\u8E31\u8E34\u8E42\u8E39\u8E35\u8F3B\u8F2F\u8F38\u8F33\u8FA8\u8FA6\u9075\u9074\u9078\u9072\u907C\u907A\u9134\u9192\u9320\u9336\u92F8\u9333\u932F\u9322\u92FC\u932B\u9304\u931A\u9310\u9326\u9321\u9315\u932E\u9319\u95BB\u96A7\u96A8\u96AA\u96D5\u970E\u9711\u9716\u970D\u9713\u970F\u975B\u975C\u9766\u9798\u9830\u9838\u983B\u9837\u982D\u9839\u9824\u9910\u9928\u991E\u991B\u9921\u991A\u99ED\u99E2\u99F1\u9AB8\u9ABC\u9AFB\u9AED\u9B28\u9B91\u9D15\u9D23\u9D26\u9D28\u9D12\u9D1B\u9ED8\u9ED4\u9F8D\u9F9C\u512A\u511F\u5121\u5132\u52F5\u568E\u5680\u5690\u5685\u5687\u568F\u58D5\u58D3\u58D1\u58CE\u5B30\u5B2A\u5B24\u5B7A\u5C37\u5C68\u5DBC\u5DBA\u5DBD\u5DB8\u5E6B\u5F4C\u5FBD\u61C9\u61C2\u61C7\u61E6\u61CB\u6232\u6234\u64CE\u64CA\u64D8\u64E0\u64F0\u64E6\u64EC\u64F1\u64E2\u64ED\u6582\u6583\u66D9\u66D6\u6A80\u6A94\u6A84\u6AA2\u6A9C\u6ADB\u6AA3\u6A7E\u6A97\u6A90\u6AA0\u6B5C\u6BAE\u6BDA\u6C08\u6FD8\u6FF1\u6FDF\u6FE0\u6FDB\u6FE4\u6FEB\u6FEF\u6F80\u6FEC\u6FE1\u6FE9\u6FD5\u6FEE\u6FF0\u71E7\u71DF\u71EE\u71E6\u71E5\u71ED\u71EC\u71F4\u71E0\u7235\u7246\u7370\u7372\u74A9\u74B0\u74A6\u74A8\u7646\u7642\u764C\u76EA\u77B3\u77AA\u77B0\u77AC\u77A7\u77AD\u77EF\u78F7\u78FA\u78F4\u78EF\u7901\u79A7\u79AA\u7A57\u7ABF\u7C07\u7C0D\u7BFE\u7BF7\u7C0C\u7BE0\u7CE0\u7CDC\u7CDE\u7CE2\u7CDF\u7CD9\u7CDD\u7E2E\u7E3E\u7E46\u7E37\u7E32\u7E43\u7E2B\u7E3D\u7E31\u7E45\u7E41\u7E34\u7E39\u7E48\u7E35\u7E3F\u7E2F\u7F44\u7FF3\u7FFC\u8071\u8072\u8070\u806F\u8073\u81C6\u81C3\u81BA\u81C2\u81C0\u81BF\u81BD\u81C9\u81BE\u81E8\u8209\u8271\u85AA\u8584\u857E\u859C\u8591\u8594\u85AF\u859B\u8587\u85A8\u858A\u8667\u87C0\u87D1\u87B3\u87D2\u87C6\u87AB\u87BB\u87BA\u87C8\u87CB\u893B\u8936\u8944\u8938\u893D\u89AC\u8B0E\u8B17\u8B19\u8B1B\u8B0A\u8B20\u8B1D\u8B04\u8B10\u8C41\u8C3F\u8C73\u8CFA\u8CFD\u8CFC\u8CF8\u8CFB\u8DA8\u8E49\u8E4B\u8E48\u8E4A\u8F44\u8F3E\u8F42\u8F45\u8F3F\u907F\u907D\u9084\u9081\u9082\u9080\u9139\u91A3\u919E\u919C\u934D\u9382\u9328\u9375\u934A\u9365\u934B\u9318\u937E\u936C\u935B\u9370\u935A\u9354\u95CA\u95CB\u95CC\u95C8\u95C6\u96B1\u96B8\u96D6\u971C\u971E\u97A0\u97D3\u9846\u98B6\u9935\u9A01\u99FF\u9BAE\u9BAB\u9BAA\u9BAD\u9D3B\u9D3F\u9E8B\u9ECF\u9EDE\u9EDC\u9EDD\u9EDB\u9F3E\u9F4B\u53E2\u5695\u56AE\u58D9\u58D8\u5B38\u5F5D\u61E3\u6233\u64F4\u64F2\u64FE\u6506\u64FA\u64FB\u64F7\u65B7\u66DC\u6726\u6AB3\u6AAC\u6AC3\u6ABB\u6AB8\u6AC2\u6AAE\u6AAF\u6B5F\u6B78\u6BAF\u7009\u700B\u6FFE\u7006\u6FFA\u7011\u700F\u71FB\u71FC\u71FE\u71F8\u7377\u7375\u74A7\u74BF\u7515\u7656\u7658\u7652\u77BD\u77BF\u77BB\u77BC\u790E\u79AE\u7A61\u7A62\u7A60\u7AC4\u7AC5\u7C2B\u7C27\u7C2A\u7C1E\u7C23\u7C21\u7CE7\u7E54\u7E55\u7E5E\u7E5A\u7E61\u7E52\u7E59\u7F48\u7FF9\u7FFB\u8077\u8076\u81CD\u81CF\u820A\u85CF\u85A9\u85CD\u85D0\u85C9\u85B0\u85BA\u85B9\u85A6\u87EF\u87EC\u87F2\u87E0\u8986\u89B2\u89F4\u8B28\u8B39\u8B2C\u8B2B\u8C50\u8D05\u8E59\u8E63\u8E66\u8E64\u8E5F\u8E55\u8EC0\u8F49\u8F4D\u9087\u9083\u9088\u91AB\u91AC\u91D0\u9394\u938A\u9396\u93A2\u93B3\u93AE\u93AC\u93B0\u9398\u939A\u9397\u95D4\u95D6\u95D0\u95D5\u96E2\u96DC\u96D9\u96DB\u96DE\u9724\u97A3\u97A6\u97AD\u97F9\u984D\u984F\u984C\u984E\u9853\u98BA\u993E\u993F\u993D\u992E\u99A5\u9A0E\u9AC1\u9B03\u9B06\u9B4F\u9B4E\u9B4D\u9BCA\u9BC9\u9BFD\u9BC8\u9BC0\u9D51\u9D5D\u9D60\u9EE0\u9F15\u9F2C\u5133\u56A5\u58DE\u58DF\u58E2\u5BF5\u9F90\u5EEC\u61F2\u61F7\u61F6\u61F5\u6500\u650F\u66E0\u66DD\u6AE5\u6ADD\u6ADA\u6AD3\u701B\u701F\u7028\u701A\u701D\u7015\u7018\u7206\u720D\u7258\u72A2\u7378\u737A\u74BD\u74CA\u74E3\u7587\u7586\u765F\u7661\u77C7\u7919\u79B1\u7A6B\u7A69\u7C3E\u7C3F\u7C38\u7C3D\u7C37\u7C40\u7E6B\u7E6D\u7E79\u7E69\u7E6A\u7F85\u7E73\u7FB6\u7FB9\u7FB8\u81D8\u85E9\u85DD\u85EA\u85D5\u85E4\u85E5\u85F7\u87FB\u8805\u880D\u87F9\u87FE\u8960\u895F\u8956\u895E\u8B41\u8B5C\u8B58\u8B49\u8B5A\u8B4E\u8B4F\u8B46\u8B59\u8D08\u8D0A\u8E7C\u8E72\u8E87\u8E76\u8E6C\u8E7A\u8E74\u8F54\u8F4E\u8FAD\u908A\u908B\u91B1\u91AE\u93E1\u93D1\u93DF\u93C3\u93C8\u93DC\u93DD\u93D6\u93E2\u93CD\u93D8\u93E4\u93D7\u93E8\u95DC\u96B4\u96E3\u972A\u9727\u9761\u97DC\u97FB\u985E\u9858\u985B\u98BC\u9945\u9949\u9A16\u9A19\u9B0D\u9BE8\u9BE7\u9BD6\u9BDB\u9D89\u9D61\u9D72\u9D6A\u9D6C\u9E92\u9E97\u9E93\u9EB4\u52F8\u56A8\u56B7\u56B6\u56B4\u56BC\u58E4\u5B40\u5B43\u5B7D\u5BF6\u5DC9\u61F8\u61FA\u6518\u6514\u6519\u66E6\u6727\u6AEC\u703E\u7030\u7032\u7210\u737B\u74CF\u7662\u7665\u7926\u792A\u792C\u792B\u7AC7\u7AF6\u7C4C\u7C43\u7C4D\u7CEF\u7CF0\u8FAE\u7E7D\u7E7C\u7E82\u7F4C\u8000\u81DA\u8266\u85FB\u85F9\u8611\u85FA\u8606\u860B\u8607\u860A\u8814\u8815\u8964\u89BA\u89F8\u8B70\u8B6C\u8B66\u8B6F\u8B5F\u8B6B\u8D0F\u8D0D\u8E89\u8E81\u8E85\u8E82\u91B4\u91CB\u9418\u9403\u93FD\u95E1\u9730\u98C4\u9952\u9951\u99A8\u9A2B\u9A30\u9A37\u9A35\u9C13\u9C0D\u9E79\u9EB5\u9EE8\u9F2F\u9F5F\u9F63\u9F61\u5137\u5138\u56C1\u56C0\u56C2\u5914\u5C6C\u5DCD\u61FC\u61FE\u651D\u651C\u6595\u66E9\u6AFB\u6B04\u6AFA\u6BB2\u704C\u721B\u72A7\u74D6\u74D4\u7669\u77D3\u7C50\u7E8F\u7E8C\u7FBC\u8617\u862D\u861A\u8823\u8822\u8821\u881F\u896A\u896C\u89BD\u8B74\u8B77\u8B7D\u8D13\u8E8A\u8E8D\u8E8B\u8F5F\u8FAF\u91BA\u942E\u9433\u9435\u943A\u9438\u9432\u942B\u95E2\u9738\u9739\u9732\u97FF\u9867\u9865\u9957\u9A45\u9A43\u9A40\u9A3E\u9ACF\u9B54\u9B51\u9C2D\u9C25\u9DAF\u9DB4\u9DC2\u9DB8\u9E9D\u9EEF\u9F19\u9F5C\u9F66\u9F67\u513C\u513B\u56C8\u56CA\u56C9\u5B7F\u5DD4\u5DD2\u5F4E\u61FF\u6524\u6B0A\u6B61\u7051\u7058\u7380\u74E4\u758A\u766E\u766C\u79B3\u7C60\u7C5F\u807E\u807D\u81DF\u8972\u896F\u89FC\u8B80\u8D16\u8D17\u8E91\u8E93\u8F61\u9148\u9444\u9451\u9452\u973D\u973E\u97C3\u97C1\u986B\u9955\u9A55\u9A4D\u9AD2\u9B1A\u9C49\u9C31\u9C3E\u9C3B\u9DD3\u9DD7\u9F34\u9F6C\u9F6A\u9F94\u56CC\u5DD6\u6200\u6523\u652B\u652A\u66EC\u6B10\u74DA\u7ACA\u7C64\u7C63\u7C65\u7E93\u7E96\u7E94\u81E2\u8638\u863F\u8831\u8B8A\u9090\u908F\u9463\u9460\u9464\u9768\u986F\u995C\u9A5A\u9A5B\u9A57\u9AD3\u9AD4\u9AD1\u9C54\u9C57\u9C56\u9DE5\u9E9F\u9EF4\u56D1\u58E9\u652C\u705E\u7671\u7672\u77D7\u7F50\u7F88\u8836\u8839\u8862\u8B93\u8B92\u8B96\u8277\u8D1B\u91C0\u946A\u9742\u9748\u9744\u97C6\u9870\u9A5F\u9B22\u9B58\u9C5F\u9DF9\u9DFA\u9E7C\u9E7D\u9F07\u9F77\u9F72\u5EF3\u6B16\u7063\u7C6C\u7C6E\u883B\u89C0\u8EA1\u91C1\u9472\u9470\u9871\u995E\u9AD6\u9B23\u9ECC\u7064\u77DA\u8B9A\u9477\u97C9\u9A62\u9A65\u7E9C\u8B9C\u8EAA\u91C5\u947D\u947E\u947C\u9C77\u9C78\u9EF7\u8C54\u947F\u9E1A\u7228\u9A6A\u9B31\u9E1B\u9E1E\u7C72\u2460\u2461\u2462\u2463\u2464\u2465\u2466\u2467\u2468\u2469\u2474\u2475\u2476\u2477\u2478\u2479\u247A\u247B\u247C\u247D\u2170\u2171\u2172\u2173\u2174\u2175\u2176\u2177\u2178\u2179\u4E36\u4E3F\u4E85\u4EA0\u5182\u5196\u51AB\u52F9\u5338\u5369\u53B6\u590A\u5B80\u5DDB\u2F33\u5E7F\u5EF4\u5F50\u5F61\u6534\u65E0\u7592\u7676\u8FB5\u96B6\u00A8\u02C6\u30FD\u30FE\u309D\u309E\u3003\u4EDD\u3005\u3006\u3007\u30FC\uFF3B\uFF3D\u273D\u3041\u3042\u3043\u3044\u3045\u3046\u3047\u3048\u3049\u304A\u304B\u304C\u304D\u304E\u304F\u3050\u3051\u3052\u3053\u3054\u3055\u3056\u3057\u3058\u3059\u305A\u305B\u305C\u305D\u305E\u305F\u3060\u3061\u3062\u3063\u3064\u3065\u3066\u3067\u3068\u3069\u306A\u306B\u306C\u306D\u306E\u306F\u3070\u3071\u3072\u3073\u3074\u3075\u3076\u3077\u3078\u3079\u307A\u307B\u307C\u307D\u307E\u307F\u3080\u3081\u3082\u3083\u3084\u3085\u3086\u3087\u3088\u3089\u308A\u308B\u308C\u308D\u308E\u308F\u3090\u3091\u3092\u3093\u30A1\u30A2\u30A3\u30A4\u30A5\u30A6\u30A7\u30A8\u30A9\u30AA\u30AB\u30AC\u30AD\u30AE\u30AF\u30B0\u30B1\u30B2\u30B3\u30B4\u30B5\u30B6\u30B7\u30B8\u30B9\u30BA\u30BB\u30BC\u30BD\u30BE\u30BF\u30C0\u30C1\u30C2\u30C3\u30C4\u30C5\u30C6\u30C7\u30C8\u30C9\u30CA\u30CB\u30CC\u30CD\u30CE\u30CF\u30D0\u30D1\u30D2\u30D3\u30D4\u30D5\u30D6\u30D7\u30D8\u30D9\u30DA\u30DB\u30DC\u30DD\u30DE\u30DF\u30E0\u30E1\u30E2\u30E3\u30E4\u30E5\u30E6\u30E7\u30E8\u30E9\u30EA\u30EB\u30EC\u30ED\u30EE\u30EF\u30F0\u30F1\u30F2\u30F3\u30F4\u30F5\u30F6\u0410\u0411\u0412\u0413\u0414\u0415\u0401\u0416\u0417\u0418\u0419\u041A\u041B\u041C\u041D\u041E\u041F\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042A\u042B\u042C\u042D\u042E\u042F\u0430\u0431\u0432\u0433\u0434\u0435\u0451\u0436\u0437\u0438\u0439\u043A\u043B\u043C\u043D\u043E\u043F\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044A\u044B\u044C\u044D\u044E\u044F\u21E7\u21B8\u21B9\u31CF\u00CC\u4E5A\u008A\u5202\u4491\u9FB0\u5188\u9FB1\u7607";
+
+ private static final String TABLE4 = "\uFFE2\uFFE4\uFF07\uFF02\u3231\u2116\u2121\u309B\u309C\u2E80\u2E84\u2E86\u2E87\u2E88\u2E8A\u2E8C\u2E8D\u2E95\u2E9C\u2E9D\u2EA5\u2EA7\u2EAA\u2EAC\u2EAE\u2EB6\u2EBC\u2EBE\u2EC6\u2ECA\u2ECC\u2ECD\u2ECF\u2ED6\u2ED7\u2EDE\u2EE3\u0000\u0000\u0000\u0283\u0250\u025B\u0254\u0275\u0153\u00F8\u014B\u028A\u026A\u4E42\u4E5C\u51F5\u531A\u5382\u4E07\u4E0C\u4E47\u4E8D\u56D7\uFA0C\u5C6E\u5F73\u4E0F\u5187\u4E0E\u4E2E\u4E93\u4EC2\u4EC9\u4EC8\u5198\u52FC\u536C\u53B9\u5720\u5903\u592C\u5C10\u5DFF\u65E1\u6BB3\u6BCC\u6C14\u723F\u4E31\u4E3C\u4EE8\u4EDC\u4EE9\u4EE1\u4EDD\u4EDA\u520C\u531C\u534C\u5722\u5723\u5917\u592F\u5B81\u5B84\u5C12\u5C3B\u5C74\u5C73\u5E04\u5E80\u5E82\u5FC9\u6209\u6250\u6C15\u6C36\u6C43\u6C3F\u6C3B\u72AE\u72B0\u738A\u79B8\u808A\u961E\u4F0E\u4F18\u4F2C\u4EF5\u4F14\u4EF1\u4F00\u4EF7\u4F08\u4F1D\u4F02\u4F05\u4F22\u4F13\u4F04\u4EF4\u4F12\u51B1\u5213\u5209\u5210\u52A6\u5322\u531F\u534D\u538A\u5407\u56E1\u56DF\u572E\u572A\u5734\u593C\u5980\u597C\u5985\u597B\u597E\u5977\u597F\u5B56\u5C15\u5C25\u5C7C\u5C7A\u5C7B\u5C7E\u5DDF\u5E75\u5E84\u5F02\u5F1A\u5F74\u5FD5\u5FD4\u5FCF\u625C\u625E\u6264\u6261\u6266\u6262\u6259\u6260\u625A\u6265\u65EF\u65EE\u673E\u6739\u6738\u673B\u673A\u673F\u673C\u6733\u6C18\u6C46\u6C52\u6C5C\u6C4F\u6C4A\u6C54\u6C4B\u6C4C\u7071\u725E\u72B4\u72B5\u738E\u752A\u767F\u7A75\u7F51\u8278\u827C\u8280\u827D\u827F\u864D\u897E\u9099\u9097\u9098\u909B\u9094\u9622\u9624\u9620\u9623\u4F56\u4F3B\u4F62\u4F49\u4F53\u4F64\u4F3E\u4F67\u4F52\u4F5F\u4F41\u4F58\u4F2D\u4F33\u4F3F\u4F61\u518F\u51B9\u521C\u521E\u5221\u52AD\u52AE\u5309\u5363\u5372\u538E\u538F\u5430\u5437\u542A\u5454\u5445\u5419\u541C\u5425\u5418\u543D\u544F\u5441\u5428\u5424\u5447\u56EE\u56E7\u56E5\u5741\u5745\u574C\u5749\u574B\u5752\u5906\u5940\u59A6\u5998\u59A0\u5997\u598E\u59A2\u5990\u598F\u59A7\u59A1\u5B8E\u5B92\u5C28\u5C2A\u5C8D\u5C8F\u5C88\u5C8B\u5C89\u5C92\u5C8A\u5C86\u5C93\u5C95\u5DE0\u5E0A\u5E0E\u5E8B\u5E89\u5E8C\u5E88\u5E8D\u5F05\u5F1D\u5F78\u5F76\u5FD2\u5FD1\u5FD0\u5FED\u5FE8\u5FEE\u5FF3\u5FE1\u5FE4\u5FE3\u5FFA\u5FEF\u5FF7\u5FFB\u6000\u5FF4\u623A\u6283\u628C\u628E\u628F\u6294\u6287\u6271\u627B\u627A\u6270\u6281\u6288\u6277\u627D\u6272\u6274\u6537\u65F0\u65F4\u65F3\u65F2\u65F5\u6745\u6747\u6759\u6755\u674C\u6748\u675D\u674D\u675A\u674B\u6BD0\u6C19\u6C1A\u6C78\u6C67\u6C6B\u6C84\u6C8B\u6C8F\u6C71\u6C6F\u6C69\u6C9A\u6C6D\u6C87\u6C95\u6C9C\u6C66\u6C73\u6C65\u6C7B\u6C8E\u7074\u707A\u7263\u72BF\u72BD\u72C3\u72C6\u72C1\u72BA\u72C5\u7395\u7397\u7393\u7394\u7392\u753A\u7539\u7594\u7595\u7681\u793D\u8034\u8095\u8099\u8090\u8092\u809C\u8290\u828F\u8285\u828E\u8291\u8293\u828A\u8283\u8284\u8C78\u8FC9\u8FBF\u909F\u90A1\u90A5\u909E\u90A7\u90A0\u9630\u9628\u962F\u962D\u4E33\u4F98\u4F7C\u4F85\u4F7D\u4F80\u4F87\u4F76\u4F74\u4F89\u4F84\u4F77\u4F4C\u4F97\u4F6A\u4F9A\u4F79\u4F81\u4F78\u4F90\u4F9C\u4F94\u4F9E\u4F92\u4F82\u4F95\u4F6B\u4F6E\u519E\u51BC\u51BE\u5235\u5232\u5233\u5246\u5231\u52BC\u530A\u530B\u533C\u5392\u5394\u5487\u547F\u5481\u5491\u5482\u5488\u546B\u547A\u547E\u5465\u546C\u5474\u5466\u548D\u546F\u5461\u5460\u5498\u5463\u5467\u5464\u56F7\u56F9\u576F\u5772\u576D\u576B\u5771\u5770\u5776\u5780\u5775\u577B\u5773\u5774\u5762\u5768\u577D\u590C\u5945\u59B5\u59BA\u59CF\u59CE\u59B2\u59CC\u59C1\u59B6\u59BC\u59C3\u59D6\u59B1\u59BD\u59C0\u59C8\u59B4\u59C7\u5B62\u5B65\u5B93\u5B95\u5C44\u5C47\u5CAE\u5CA4\u5CA0\u5CB5\u5CAF\u5CA8\u5CAC\u5C9F\u5CA3\u5CAD\u5CA2\u5CAA\u5CA7\u5C9D\u5CA5\u5CB6\u5CB0\u5CA6\u5E17\u5E14\u5E19\u5F28\u5F22\u5F23\u5F24\u5F54\u5F82\u5F7E\u5F7D\u5FDE\u5FE5\u602D\u6026\u6019\u6032\u600B\u6034\u600A\u6017\u6033\u601A\u601E\u602C\u6022\u600D\u6010\u602E\u6013\u6011\u600C\u6009\u601C\u6214\u623D\u62AD\u62B4\u62D1\u62BE\u62AA\u62B6\u62CA\u62AE\u62B3\u62AF\u62BB\u62A9\u62B0\u62B8\u653D\u65A8\u65BB\u6609\u65FC\u6604\u6612\u6608\u65FB\u6603\u660B\u660D\u6605\u65FD\u6611\u6610\u66F6\u670A\u6785\u676C\u678E\u6792\u6776\u677B\u6798\u6786\u6784\u6774\u678D\u678C\u677A\u679F\u6791\u6799\u6783\u677D\u6781\u6778\u6779\u6794\u6B25\u6B80\u6B7E\u6BDE\u6C1D\u6C93\u6CEC\u6CEB\u6CEE\u6CD9\u6CB6\u6CD4\u6CAD\u6CE7\u6CB7\u6CD0\u6CC2\u6CBA\u6CC3\u6CC6\u6CED\u6CF2\u6CD2\u6CDD\u6CB4\u6C8A\u6C9D\u6C80\u6CDE\u6CC0\u6D30\u6CCD\u6CC7\u6CB0\u6CF9\u6CCF\u6CE9\u6CD1\u7094\u7098\u7085\u7093\u7086\u7084\u7091\u7096\u7082\u709A\u7083\u726A\u72D6\u72CB\u72D8\u72C9\u72DC\u72D2\u72D4\u72DA\u72CC\u72D1\u73A4\u73A1\u73AD\u73A6\u73A2\u73A0\u73AC\u739D\u74DD\u74E8\u753F\u7540\u753E\u758C\u7598\u76AF\u76F3\u76F1\u76F0\u76F5\u77F8\u77FC\u77F9\u77FB\u77FA\u77F7\u7942\u793F\u79C5\u7A78\u7A7B\u7AFB\u7C75\u7CFD\u8035\u808F\u80AE\u80A3\u80B8\u80B5\u80AD\u8220\u82A0\u82C0\u82AB\u829A\u8298\u829B\u82B5\u82A7\u82AE\u82BC\u829E\u82BA\u82B4\u82A8\u82A1\u82A9\u82C2\u82A4\u82C3\u82B6\u82A2\u8670\u866F\u866D\u866E\u8C56\u8FD2\u8FCB\u8FD3\u8FCD\u8FD6\u8FD5\u8FD7\u90B2\u90B4\u90AF\u90B3\u90B0\u9639\u963D\u963C\u963A\u9643\u4FCD\u4FC5\u4FD3\u4FB2\u4FC9\u4FCB\u4FC1\u4FD4\u4FDC\u4FD9\u4FBB\u4FB3\u4FDB\u4FC7\u4FD6\u4FBA\u4FC0\u4FB9\u4FEC\u5244\u5249\u52C0\u52C2\u533D\u537C\u5397\u5396\u5399\u5398\u54BA\u54A1\u54AD\u54A5\u54CF\u54C3\u830D\u54B7\u54AE\u54D6\u54B6\u54C5\u54C6\u54A0\u5470\u54BC\u54A2\u54BE\u5472\u54DE\u54B0\u57B5\u579E\u579F\u57A4\u578C\u5797\u579D\u579B\u5794\u5798\u578F\u5799\u57A5\u579A\u5795\u58F4\u590D\u5953\u59E1\u59DE\u59EE\u5A00\u59F1\u59DD\u59FA\u59FD\u59FC\u59F6\u59E4\u59F2\u59F7\u59DB\u59E9\u59F3\u59F5\u59E0\u59FE\u59F4\u59ED\u5BA8\u5C4C\u5CD0\u5CD8\u5CCC\u5CD7\u5CCB\u5CDB\u5CDE\u5CDA\u5CC9\u5CC7\u5CCA\u5CD6\u5CD3\u5CD4\u5CCF\u5CC8\u5CC6\u5CCE\u5CDF\u5CF8\u5DF9\u5E21\u5E22\u5E23\u5E20\u5E24\u5EB0\u5EA4\u5EA2\u5E9B\u5EA3\u5EA5\u5F07\u5F2E\u5F56\u5F86\u6037\u6039\u6054\u6072\u605E\u6045\u6053\u6047\u6049\u605B\u604C\u6040\u6042\u605F\u6024\u6044\u6058\u6066\u606E\u6242\u6243\u62CF\u630D\u630B\u62F5\u630E\u6303\u62EB\u62F9\u630F\u630C\u62F8\u62F6\u6300\u6313\u6314\u62FA\u6315\u62FB\u62F0\u6541\u6543\u65AA\u65BF\u6636\u6621\u6632\u6635\u661C\u6626\u6622\u6633\u662B\u663A\u661D\u6634\u6639\u662E\u670F\u6710\u67C1\u67F2\u67C8\u67BA\u67DC\u67BB\u67F8\u67D8\u67C0\u67B7\u67C5\u67EB\u67E4\u67DF\u67B5\u67CD\u67B3\u67F7\u67F6\u67EE\u67E3\u67C2\u67B9\u67CE\u67E7\u67F0\u67B2\u67FC\u67C6\u67ED\u67CC\u67AE\u67E6\u67DB\u67FA\u67C9\u67CA\u67C3\u67EA\u67CB\u6B28\u6B82\u6B84\u6BB6\u6BD6\u6BD8\u6BE0\u6C20\u6C21\u6D28\u6D34\u6D2D\u6D1F\u6D3C\u6D3F\u6D12\u6D0A\u6CDA\u6D33\u6D04\u6D19\u6D3A\u6D1A\u6D11\u6D00\u6D1D\u6D42\u6D01\u6D18\u6D37\u6D03\u6D0F\u6D40\u6D07\u6D20\u6D2C\u6D08\u6D22\u6D09\u6D10\u70B7\u709F\u70BE\u70B1\u70B0\u70A1\u70B4\u70B5\u70A9\u7241\u7249\u724A\u726C\u7270\u7273\u726E\u72CA\u72E4\u72E8\u72EB\u72DF\u72EA\u72E6\u72E3\u7385\u73CC\u73C2\u73C8\u73C5\u73B9\u73B6\u73B5\u73B4\u73EB\u73BF\u73C7\u73BE\u73C3\u73C6\u73B8\u73CB\u74EC\u74EE\u752E\u7547\u7548\u75A7\u75AA\u7679\u76C4\u7708\u7703\u7704\u7705\u770A\u76F7\u76FB\u76FA\u77E7\u77E8\u7806\u7811\u7812\u7805\u7810\u780F\u780E\u7809\u7803\u7813\u794A\u794C\u794B\u7945\u7944\u79D5\u79CD\u79CF\u79D6\u79CE\u7A80\u7A7E\u7AD1\u7B00\u7B01\u7C7A\u7C78\u7C79\u7C7F\u7C80\u7C81\u7D03\u7D08\u7D01\u7F58\u7F91\u7F8D\u7FBE\u8007\u800E\u800F\u8014\u8037\u80D8\u80C7\u80E0\u80D1\u80C8\u80C2\u80D0\u80C5\u80E3\u80D9\u80DC\u80CA\u80D5\u80C9\u80CF\u80D7\u80E6\u80CD\u81FF\u8221\u8294\u82D9\u82FE\u82F9\u8307\u82E8\u8300\u82D5\u833A\u82EB\u82D6\u82F4\u82EC\u82E1\u82F2\u82F5\u830C\u82FB\u82F6\u82F0\u82EA\u82E4\u82E0\u82FA\u82F3\u82ED\u8677\u8674\u867C\u8673\u8841\u884E\u8867\u886A\u8869\u89D3\u8A04\u8A07\u8D72\u8FE3\u8FE1\u8FEE\u8FE0\u90F1\u90BD\u90BF\u90D5\u90C5\u90BE\u90C7\u90CB\u90C8\u91D4\u91D3\u9654\u964F\u9651\u9653\u964A\u964E\u501E\u5005\u5007\u5013\u5022\u5030\u501B\u4FF5\u4FF4\u5033\u5037\u502C\u4FF6\u4FF7\u5017\u501C\u5020\u5027\u5035\u502F\u5031\u500E\u515A\u5194\u5193\u51CA\u51C4\u51C5\u51C8\u51CE\u5261\u525A\u5252\u525E\u525F\u5255\u5262\u52CD\u530E\u539E\u5526\u54E2\u5517\u5512\u54E7\u54F3\u54E4\u551A\u54FF\u5504\u5508\u54EB\u5511\u5505\u54F1\u550A\u54FB\u54F7\u54F8\u54E0\u550E\u5503\u550B\u5701\u5702\u57CC\u5832\u57D5\u57D2\u57BA\u57C6\u57BD\u57BC\u57B8\u57B6\u57BF\u57C7\u57D0\u57B9\u57C1\u590E\u594A\u5A19\u5A16\u5A2D\u5A2E\u5A15\u5A0F\u5A17\u5A0A\u5A1E\u5A33\u5B6C\u5BA7\u5BAD\u5BAC\u5C03\u5C56\u5C54\u5CEC\u5CFF\u5CEE\u5CF1\u5CF7\u5D00\u5CF9\u5E29\u5E28\u5EA8\u5EAE\u5EAA\u5EAC\u5F33\u5F30\u5F67\u605D\u605A\u6067\u6041\u60A2\u6088\u6080\u6092\u6081\u609D\u6083\u6095\u609B\u6097\u6087\u609C\u608E\u6219\u6246\u62F2\u6310\u6356\u632C\u6344\u6345\u6336\u6343\u63E4\u6339\u634B\u634A\u633C\u6329\u6341\u6334\u6358\u6354\u6359\u632D\u6347\u6333\u635A\u6351\u6338\u6357\u6340\u6348\u654A\u6546\u65C6\u65C3\u65C4\u65C2\u664A\u665F\u6647\u6651\u6712\u6713\u681F\u681A\u6849\u6832\u6833\u683B\u684B\u684F\u6816\u6831\u681C\u6835\u682B\u682D\u682F\u684E\u6844\u6834\u681D\u6812\u6814\u6826\u6828\u682E\u684D\u683A\u6825\u6820\u6B2C\u6B2F\u6B2D\u6B31\u6B34\u6B6D\u8082\u6B88\u6BE6\u6BE4\u6BE8\u6BE3\u6BE2\u6BE7\u6C25\u6D7A\u6D63\u6D64\u6D76\u6D0D\u6D61\u6D92\u6D58\u6D62\u6D6D\u6D6F\u6D91\u6D8D\u6DEF\u6D7F\u6D86\u6D5E\u6D67\u6D60\u6D97\u6D70\u6D7C\u6D5F\u6D82\u6D98\u6D2F\u6D68\u6D8B\u6D7E\u6D80\u6D84\u6D16\u6D83\u6D7B\u6D7D\u6D75\u6D90\u70DC\u70D3\u70D1\u70DD\u70CB\u7F39\u70E2\u70D7\u70D2\u70DE\u70E0\u70D4\u70CD\u70C5\u70C6\u70C7\u70DA\u70CE\u70E1\u7242\u7278\u7277\u7276\u7300\u72FA\u72F4\u72FE\u72F6\u72F3\u72FB\u7301\u73D3\u73D9\u73E5\u73D6\u73BC\u73E7\u73E3\u73E9\u73DC\u73D2\u73DB\u73D4\u73DD\u73DA\u73D7\u73D8\u73E8\u74DE\u74DF\u74F4\u74F5\u7521\u755B\u755F\u75B0\u75C1\u75BB\u75C4\u75C0\u75BF\u75B6\u75BA\u768A\u76C9\u771D\u771B\u7710\u7713\u7712\u7723\u7711\u7715\u7719\u771A\u7722\u7727\u7823\u782C\u7822\u7835\u782F\u7828\u782E\u782B\u7821\u7829\u7833\u782A\u7831\u7954\u795B\u794F\u795C\u7953\u7952\u7951\u79EB\u79EC\u79E0\u79EE\u79ED\u79EA\u79DC\u79DE\u79DD\u7A86\u7A89\u7A85\u7A8B\u7A8C\u7A8A\u7A87\u7AD8\u7B10\u7B04\u7B13\u7B05\u7B0F\u7B08\u7B0A\u7B0E\u7B09\u7B12\u7C84\u7C91\u7C8A\u7C8C\u7C88\u7C8D\u7C85\u7D1E\u7D1D\u7D11\u7D0E\u7D18\u7D16\u7D13\u7D1F\u7D12\u7D0F\u7D0C\u7F5C\u7F61\u7F5E\u7F60\u7F5D\u7F5B\u7F96\u7F92\u7FC3\u7FC2\u7FC0\u8016\u803E\u8039\u80FA\u80F2\u80F9\u80F5\u8101\u80FB\u8100\u8201\u822F\u8225\u8333\u832D\u8344\u8319\u8351\u8325\u8356\u833F\u8341\u8326\u831C\u8322\u8342\u834E\u831B\u832A\u8308\u833C\u834D\u8316\u8324\u8320\u8337\u832F\u8329\u8347\u8345\u834C\u8353\u831E\u832C\u834B\u8327\u8348\u8653\u8652\u86A2\u86A8\u8696\u868D\u8691\u869E\u8687\u8697\u8686\u868B\u869A\u8685\u86A5\u8699\u86A1\u86A7\u8695\u8698\u868E\u869D\u8690\u8694\u8843\u8844\u886D\u8875\u8876\u8872\u8880\u8871\u887F\u886F\u8883\u887E\u8874\u887C\u8A12\u8C47\u8C57\u8C7B\u8CA4\u8CA3\u8D76\u8D78\u8DB5\u8DB7\u8DB6\u8ED1\u8ED3\u8FFE\u8FF5\u9002\u8FFF\u8FFB\u9004\u8FFC\u8FF6\u90D6\u90E0\u90D9\u90DA\u90E3\u90DF\u90E5\u90D8\u90DB\u90D7\u90DC\u90E4\u9150\u914E\u914F\u91D5\u91E2\u91DA\u965C\u965F\u96BC\u98E3\u9ADF\u9B2F\u4E7F\u5070\u506A\u5061\u505E\u5060\u5053\u504B\u505D\u5072\u5048\u504D\u5041\u505B\u504A\u5062\u5015\u5045\u505F\u5069\u506B\u5063\u5064\u5046\u5040\u506E\u5073\u5057\u5051\u51D0\u526B\u526D\u526C\u526E\u52D6\u52D3\u532D\u539C\u5575\u5576\u553C\u554D\u5550\u5534\u552A\u5551\u5562\u5536\u5535\u5530\u5552\u5545\u550C\u5532\u5565\u554E\u5539\u5548\u552D\u553B\u5540\u554B\u570A\u5707\u57FB\u5814\u57E2\u57F6\u57DC\u57F4\u5800\u57ED\u57FD\u5808\u57F8\u580B\u57F3\u57CF\u5807\u57EE\u57E3\u57F2\u57E5\u57EC\u57E1\u580E\u57FC\u5810\u57E7\u5801\u580C\u57F1\u57E9\u57F0\u580D\u5804\u595C\u5A60\u5A58\u5A55\u5A67\u5A5E\u5A38\u5A35\u5A6D\u5A50\u5A5F\u5A65\u5A6C\u5A53\u5A64\u5A57\u5A43\u5A5D\u5A52\u5A44\u5A5B\u5A48\u5A8E\u5A3E\u5A4D\u5A39\u5A4C\u5A70\u5A69\u5A47\u5A51\u5A56\u5A42\u5A5C\u5B72\u5B6E\u5BC1\u5BC0\u5C59\u5D1E\u5D0B\u5D1D\u5D1A\u5D20\u5D0C\u5D28\u5D0D\u5D26\u5D25\u5D0F\u5D30\u5D12\u5D23\u5D1F\u5D2E\u5E3E\u5E34\u5EB1\u5EB4\u5EB9\u5EB2\u5EB3\u5F36\u5F38\u5F9B\u5F96\u5F9F\u608A\u6090\u6086\u60BE\u60B0\u60BA\u60D3\u60D4\u60CF\u60E4\u60D9\u60DD\u60C8\u60B1\u60DB\u60B7\u60CA\u60BF\u60C3\u60CD\u60C0\u6332\u6365\u638A\u6382\u637D\u63BD\u639E\u63AD\u639D\u6397\u63AB\u638E\u636F\u6387\u6390\u636E\u63AF\u6375\u639C\u636D\u63AE\u637C\u63A4\u633B\u639F\u6378\u6385\u6381\u6391\u638D\u6370\u6553\u65CD\u6665\u6661\u665B\u6659\u665C\u6662\u6718\u6879\u6887\u6890\u689C\u686D\u686E\u68AE\u68AB\u6956\u686F\u68A3\u68AC\u68A9\u6875\u6874\u68B2\u688F\u6877\u6892\u687C\u686B\u6872\u68AA\u6880\u6871\u687E\u689B\u6896\u688B\u68A0\u6889\u68A4\u6878\u687B\u6891\u688C\u688A\u687D\u6B36\u6B33\u6B37\u6B38\u6B91\u6B8F\u6B8D\u6B8E\u6B8C\u6C2A\u6DC0\u6DAB\u6DB4\u6DB3\u6E74\u6DAC\u6DE9\u6DE2\u6DB7\u6DF6\u6DD4\u6E00\u6DC8\u6DE0\u6DDF\u6DD6\u6DBE\u6DE5\u6DDC\u6DDD\u6DDB\u6DF4\u6DCA\u6DBD\u6DED\u6DF0\u6DBA\u6DD5\u6DC2\u6DCF\u6DC9\u6DD0\u6DF2\u6DD3\u6DFD\u6DD7\u6DCD\u6DE3\u6DBB\u70FA\u710D\u70F7\u7117\u70F4\u710C\u70F0\u7104\u70F3\u7110\u70FC\u70FF\u7106\u7113\u7100\u70F8\u70F6\u710B\u7102\u710E\u727E\u727B\u727C\u727F\u731D\u7317\u7307\u7311\u7318\u730A\u7308\u72FF\u730F\u731E\u7388\u73F6\u73F8\u73F5\u7404\u7401\u73FD\u7407\u7400\u73FA\u73FC\u73FF\u740C\u740B\u73F4\u7408\u7564\u7563\u75CE\u75D2\u75CF\u75CB\u75CC\u75D1\u75D0\u768F\u7689\u76D3\u7739\u772F\u772D\u7731\u7732\u7734\u7733\u773D\u7725\u773B\u7735\u7848\u7852\u7849\u784D\u784A\u784C\u7826\u7845\u7850\u7964\u7967\u7969\u796A\u7963\u796B\u7961\u79BB\u79FA\u79F8\u79F6\u79F7\u7A8F\u7A94\u7A90\u7B35\u7B47\u7B34\u7B25\u7B30\u7B22\u7B24\u7B33\u7B18\u7B2A\u7B1D\u7B31\u7B2B\u7B2D\u7B2F\u7B32\u7B38\u7B1A\u7B23\u7C94\u7C98\u7C96\u7CA3\u7D35\u7D3D\u7D38\u7D36\u7D3A\u7D45\u7D2C\u7D29\u7D41\u7D47\u7D3E\u7D3F\u7D4A\u7D3B\u7D28\u7F63\u7F95\u7F9C\u7F9D\u7F9B\u7FCA\u7FCB\u7FCD\u7FD0\u7FD1\u7FC7\u7FCF\u7FC9\u801F\u801E\u801B\u8047\u8043\u8048\u8118\u8125\u8119\u811B\u812D\u811F\u812C\u811E\u8121\u8115\u8127\u811D\u8122\u8211\u8238\u8233\u823A\u8234\u8232\u8274\u8390\u83A3\u83A8\u838D\u837A\u8373\u83A4\u8374\u838F\u8381\u8395\u8399\u8375\u8394\u83A9\u837D\u8383\u838C\u839D\u839B\u83AA\u838B\u837E\u83A5\u83AF\u8388\u8397\u83B0\u837F\u83A6\u8387\u83AE\u8376\u839A\u8659\u8656\u86BF\u86B7\u86C2\u86C1\u86C5\u86BA\u86B0\u86C8\u86B9\u86B3\u86B8\u86CC\u86B4\u86BB\u86BC\u86C3\u86BD\u86BE\u8852\u8889\u8895\u88A8\u88A2\u88AA\u889A\u8891\u88A1\u889F\u8898\u88A7\u8899\u889B\u8897\u88A4\u88AC\u888C\u8893\u888E\u8982\u89D6\u89D9\u89D5\u8A30\u8A27\u8A2C\u8A1E\u8C39\u8C3B\u8C5C\u8C5D\u8C7D\u8CA5\u8D7D\u8D7B\u8D79\u8DBC\u8DC2\u8DB9\u8DBF\u8DC1\u8ED8\u8EDE\u8EDD\u8EDC\u8ED7\u8EE0\u8EE1\u9024\u900B\u9011\u901C\u900C\u9021\u90EF\u90EA\u90F0\u90F4\u90F2\u90F3\u90D4\u90EB\u90EC\u90E9\u9156\u9158\u915A\u9153\u9155\u91EC\u91F4\u91F1\u91F3\u91F8\u91E4\u91F9\u91EA\u91EB\u91F7\u91E8\u91EE\u957A\u9586\u9588\u967C\u966D\u966B\u9671\u966F\u96BF\u976A\u9804\u98E5\u9997\u509B\u5095\u5094\u509E\u508B\u50A3\u5083\u508C\u508E\u509D\u5068\u509C\u5092\u5082\u5087\u515F\u51D4\u5312\u5311\u53A4\u53A7\u5591\u55A8\u55A5\u55AD\u5577\u5645\u55A2\u5593\u5588\u558F\u55B5\u5581\u55A3\u5592\u55A4\u557D\u558C\u55A6\u557F\u5595\u55A1\u558E\u570C\u5829\u5837\u5819\u581E\u5827\u5823\u5828\u57F5\u5848\u5825\u581C\u581B\u5833\u583F\u5836\u582E\u5839\u5838\u582D\u582C\u583B\u5961\u5AAF\u5A94\u5A9F\u5A7A\u5AA2\u5A9E\u5A78\u5AA6\u5A7C\u5AA5\u5AAC\u5A95\u5AAE\u5A37\u5A84\u5A8A\u5A97\u5A83\u5A8B\u5AA9\u5A7B\u5A7D\u5A8C\u5A9C\u5A8F\u5A93\u5A9D\u5BEA\u5BCD\u5BCB\u5BD4\u5BD1\u5BCA\u5BCE\u5C0C\u5C30\u5D37\u5D43\u5D6B\u5D41\u5D4B\u5D3F\u5D35\u5D51\u5D4E\u5D55\u5D33\u5D3A\u5D52\u5D3D\u5D31\u5D59\u5D42\u5D39\u5D49\u5D38\u5D3C\u5D32\u5D36\u5D40\u5D45\u5E44\u5E41\u5F58\u5FA6\u5FA5\u5FAB\u60C9\u60B9\u60CC\u60E2\u60CE\u60C4\u6114\u60F2\u610A\u6116\u6105\u60F5\u6113\u60F8\u60FC\u60FE\u60C1\u6103\u6118\u611D\u6110\u60FF\u6104\u610B\u624A\u6394\u63B1\u63B0\u63CE\u63E5\u63E8\u63EF\u63C3\u649D\u63F3\u63CA\u63E0\u63F6\u63D5\u63F2\u63F5\u6461\u63DF\u63BE\u63DD\u63DC\u63C4\u63D8\u63D3\u63C2\u63C7\u63CC\u63CB\u63C8\u63F0\u63D7\u63D9\u6532\u6567\u656A\u6564\u655C\u6568\u6565\u658C\u659D\u659E\u65AE\u65D0\u65D2\u667C\u666C\u667B\u6680\u6671\u6679\u666A\u6672\u6701\u690C\u68D3\u6904\u68DC\u692A\u68EC\u68EA\u68F1\u690F\u68D6\u68F7\u68EB\u68E4\u68F6\u6913\u6910\u68F3\u68E1\u6907\u68CC\u6908\u6970\u68B4\u6911\u68EF\u68C6\u6914\u68F8\u68D0\u68FD\u68FC\u68E8\u690B\u690A\u6917\u68CE\u68C8\u68DD\u68DE\u68E6\u68F4\u68D1\u6906\u68D4\u68E9\u6915\u6925\u68C7\u6B39\u6B3B\u6B3F\u6B3C\u6B94\u6B97\u6B99\u6B95\u6BBD\u6BF0\u6BF2\u6BF3\u6C30\u6DFC\u6E46\u6E47\u6E1F\u6E49\u6E88\u6E3C\u6E3D\u6E45\u6E62\u6E2B\u6E3F\u6E41\u6E5D\u6E73\u6E1C\u6E33\u6E4B\u6E40\u6E51\u6E3B\u6E03\u6E2E\u6E5E\u6E68\u6E5C\u6E61\u6E31\u6E28\u6E60\u6E71\u6E6B\u6E39\u6E22\u6E30\u6E53\u6E65\u6E27\u6E78\u6E64\u6E77\u6E55\u6E79\u6E52\u6E66\u6E35\u6E36\u6E5A\u7120\u711E\u712F\u70FB\u712E\u7131\u7123\u7125\u7122\u7132\u711F\u7128\u713A\u711B\u724B\u725A\u7288\u7289\u7286\u7285\u728B\u7312\u730B\u7330\u7322\u7331\u7333\u7327\u7332\u732D\u7326\u7323\u7335\u730C\u742E\u742C\u7430\u742B\u7416\u741A\u7421\u742D\u7431\u7424\u7423\u741D\u7429\u7420\u7432\u74FB\u752F\u756F\u756C\u75E7\u75DA\u75E1\u75E6\u75DD\u75DF\u75E4\u75D7\u7695\u7692\u76DA\u7746\u7747\u7744\u774D\u7745\u774A\u774E\u774B\u774C\u77DE\u77EC\u7860\u7864\u7865\u785C\u786D\u7871\u786A\u786E\u7870\u7869\u7868\u785E\u7862\u7974\u7973\u7972\u7970\u7A02\u7A0A\u7A03\u7A0C\u7A04\u7A99\u7AE6\u7AE4\u7B4A\u7B3B\u7B44\u7B48\u7B4C\u7B4E\u7B40\u7B58\u7B45\u7CA2\u7C9E\u7CA8\u7CA1\u7D58\u7D6F\u7D63\u7D53\u7D56\u7D67\u7D6A\u7D4F\u7D6D\u7D5C\u7D6B\u7D52\u7D54\u7D69\u7D51\u7D5F\u7D4E\u7F3E\u7F3F\u7F65\u7F66\u7FA2\u7FA0\u7FA1\u7FD7\u8051\u804F\u8050\u80FE\u80D4\u8143\u814A\u8152\u814F\u8147\u813D\u814D\u813A\u81E6\u81EE\u81F7\u81F8\u81F9\u8204\u823C\u823D\u823F\u8275\u833B\u83CF\u83F9\u8423\u83C0\u83E8\u8412\u83E7\u83E4\u83FC\u83F6\u8410\u83C6\u83C8\u83EB\u83E3\u83BF\u8401\u83DD\u83E5\u83D8\u83FF\u83E1\u83CB\u83CE\u83D6\u83F5\u83C9\u8409\u840F\u83DE\u8411\u8406\u83C2\u83F3\u83D5\u83FA\u83C7\u83D1\u83EA\u8413\u83C3\u83EC\u83EE\u83C4\u83FB\u83D7\u83E2\u841B\u83DB\u83FE\u86D8\u86E2\u86E6\u86D3\u86E3\u86DA\u86EA\u86DD\u86EB\u86DC\u86EC\u86E9\u86D7\u86E8\u86D1\u8848\u8856\u8855\u88BA\u88D7\u88B9\u88B8\u88C0\u88BE\u88B6\u88BC\u88B7\u88BD\u88B2\u8901\u88C9\u8995\u8998\u8997\u89DD\u89DA\u89DB\u8A4E\u8A4D\u8A39\u8A59\u8A40\u8A57\u8A58\u8A44\u8A45\u8A52\u8A48\u8A51\u8A4A\u8A4C\u8A4F\u8C5F\u8C81\u8C80\u8CBA\u8CBE\u8CB0\u8CB9\u8CB5\u8D84\u8D80\u8D89\u8DD8\u8DD3\u8DCD\u8DC7\u8DD6\u8DDC\u8DCF\u8DD5\u8DD9\u8DC8\u8DD7\u8DC5\u8EEF\u8EF7\u8EFA\u8EF9\u8EE6\u8EEE\u8EE5\u8EF5\u8EE7\u8EE8\u8EF6\u8EEB\u8EF1\u8EEC\u8EF4\u8EE9\u902D\u9034\u902F\u9106\u912C\u9104\u90FF\u90FC\u9108\u90F9\u90FB\u9101\u9100\u9107\u9105\u9103\u9161\u9164\u915F\u9162\u9160\u9201\u920A\u9225\u9203\u921A\u9226\u920F\u920C\u9200\u9212\u91FF\u91FD\u9206\u9204\u9227\u9202\u921C\u9224\u9219\u9217\u9205\u9216\u957B\u958D\u958C\u9590\u9687\u967E\u9688\u9689\u9683\u9680\u96C2\u96C8\u96C3\u96F1\u96F0\u976C\u9770\u976E\u9807\u98A9\u98EB\u9CE6\u9EF9\u4E83\u4E84\u4EB6\u50BD\u50BF\u50C6\u50AE\u50C4\u50CA\u50B4\u50C8\u50C2\u50B0\u50C1\u50BA\u50B1\u50CB\u50C9\u50B6\u50B8\u51D7\u527A\u5278\u527B\u527C\u55C3\u55DB\u55CC\u55D0\u55CB\u55CA\u55DD\u55C0\u55D4\u55C4\u55E9\u55BF\u55D2\u558D\u55CF\u55D5\u55E2\u55D6\u55C8\u55F2\u55CD\u55D9\u55C2\u5714\u5853\u5868\u5864\u584F\u584D\u5849\u586F\u5855\u584E\u585D\u5859\u5865\u585B\u583D\u5863\u5871\u58FC\u5AC7\u5AC4\u5ACB\u5ABA\u5AB8\u5AB1\u5AB5\u5AB0\u5ABF\u5AC8\u5ABB\u5AC6\u5AB7\u5AC0\u5ACA\u5AB4\u5AB6\u5ACD\u5AB9\u5A90\u5BD6\u5BD8\u5BD9\u5C1F\u5C33\u5D71\u5D63\u5D4A\u5D65\u5D72\u5D6C\u5D5E\u5D68\u5D67\u5D62\u5DF0\u5E4F\u5E4E\u5E4A\u5E4D\u5E4B\u5EC5\u5ECC\u5EC6\u5ECB\u5EC7\u5F40\u5FAF\u5FAD\u60F7\u6149\u614A\u612B\u6145\u6136\u6132\u612E\u6146\u612F\u614F\u6129\u6140\u6220\u9168\u6223\u6225\u6224\u63C5\u63F1\u63EB\u6410\u6412\u6409\u6420\u6424\u6433\u6443\u641F\u6415\u6418\u6439\u6437\u6422\u6423\u640C\u6426\u6430\u6428\u6441\u6435\u642F\u640A\u641A\u6440\u6425\u6427\u640B\u63E7\u641B\u642E\u6421\u640E\u656F\u6592\u65D3\u6686\u668C\u6695\u6690\u668B\u668A\u6699\u6694\u6678\u6720\u6966\u695F\u6938\u694E\u6962\u6971\u693F\u6945\u696A\u6939\u6942\u6957\u6959\u697A\u6948\u6949\u6935\u696C\u6933\u693D\u6965\u68F0\u6978\u6934\u6969\u6940\u696F\u6944\u6976\u6958\u6941\u6974\u694C\u693B\u694B\u6937\u695C\u694F\u6951\u6932\u6952\u692F\u697B\u693C\u6B46\u6B45\u6B43\u6B42\u6B48\u6B41\u6B9B\uFA0D\u6BFB\u6BFC\u6BF9\u6BF7\u6BF8\u6E9B\u6ED6\u6EC8\u6E8F\u6EC0\u6E9F\u6E93\u6E94\u6EA0\u6EB1\u6EB9\u6EC6\u6ED2\u6EBD\u6EC1\u6E9E\u6EC9\u6EB7\u6EB0\u6ECD\u6EA6\u6ECF\u6EB2\u6EBE\u6EC3\u6EDC\u6ED8\u6E99\u6E92\u6E8E\u6E8D\u6EA4\u6EA1\u6EBF\u6EB3\u6ED0\u6ECA\u6E97\u6EAE\u6EA3\u7147\u7154\u7152\u7163\u7160\u7141\u715D\u7162\u7172\u7178\u716A\u7161\u7142\u7158\u7143\u714B\u7170\u715F\u7150\u7153\u7144\u714D\u715A\u724F\u728D\u728C\u7291\u7290\u728E\u733C\u7342\u733B\u733A\u7340\u734A\u7349\u7444\u744A\u744B\u7452\u7451\u7457\u7440\u744F\u7450\u744E\u7442\u7446\u744D\u7454\u74E1\u74FF\u74FE\u74FD\u751D\u7579\u7577\u6983\u75EF\u760F\u7603\u75F7\u75FE\u75FC\u75F9\u75F8\u7610\u75FB\u75F6\u75ED\u75F5\u75FD\u7699\u76B5\u76DD\u7755\u775F\u7760\u7752\u7756\u775A\u7769\u7767\u7754\u7759\u776D\u77E0\u7887\u789A\u7894\u788F\u7884\u7895\u7885\u7886\u78A1\u7883\u7879\u7899\u7880\u7896\u787B\u797C\u7982\u797D\u7979\u7A11\u7A18\u7A19\u7A12\u7A17\u7A15\u7A22\u7A13\u7A1B\u7A10\u7AA3\u7AA2\u7A9E\u7AEB\u7B66\u7B64\u7B6D\u7B74\u7B69\u7B72\u7B65\u7B73\u7B71\u7B70\u7B61\u7B78\u7B76\u7B63\u7CB2\u7CB4\u7CAF\u7D88\u7D86\u7D80\u7D8D\u7D7F\u7D85\u7D7A\u7D8E\u7D7B\u7D83\u7D7C\u7D8C\u7D94\u7D84\u7D7D\u7D92\u7F6D\u7F6B\u7F67\u7F68\u7F6C\u7FA6\u7FA5\u7FA7\u7FDB\u7FDC\u8021\u8164\u8160\u8177\u815C\u8169\u815B\u8162\u8172\u6721\u815E\u8176\u8167\u816F\u8144\u8161\u821D\u8249\u8244\u8240\u8242\u8245\u84F1\u843F\u8456\u8476\u8479\u848F\u848D\u8465\u8451\u8440\u8486\u8467\u8430\u844D\u847D\u845A\u8459\u8474\u8473\u845D\u8507\u845E\u8437\u843A\u8434\u847A\u8443\u8478\u8432\u8445\u8429\u83D9\u844B\u842F\u8442\u842D\u845F\u8470\u8439\u844E\u844C\u8452\u846F\u84C5\u848E\u843B\u8447\u8436\u8433\u8468\u847E\u8444\u842B\u8460\u8454\u846E\u8450\u870B\u8704\u86F7\u870C\u86FA\u86D6\u86F5\u874D\u86F8\u870E\u8709\u8701\u86F6\u870D\u8705\u88D6\u88CB\u88CD\u88CE\u88DE\u88DB\u88DA\u88CC\u88D0\u8985\u899B\u89DF\u89E5\u89E4\u89E1\u89E0\u89E2\u89DC\u89E6\u8A76\u8A86\u8A7F\u8A61\u8A3F\u8A77\u8A82\u8A84\u8A75\u8A83\u8A81\u8A74\u8A7A\u8C3C\u8C4B\u8C4A\u8C65\u8C64\u8C66\u8C86\u8C84\u8C85\u8CCC\u8D68\u8D69\u8D91\u8D8C\u8D8E\u8D8F\u8D8D\u8D93\u8D94\u8D90\u8D92\u8DF0\u8DE0\u8DEC\u8DF1\u8DEE\u8DD0\u8DE9\u8DE3\u8DE2\u8DE7\u8DF2\u8DEB\u8DF4\u8F06\u8EFF\u8F01\u8F00\u8F05\u8F07\u8F08\u8F02\u8F0B\u9052\u903F\u9044\u9049\u903D\u9110\u910D\u910F\u9111\u9116\u9114\u910B\u910E\u916E\u916F\u9248\u9252\u9230\u923A\u9266\u9233\u9265\u925E\u9283\u922E\u924A\u9246\u926D\u926C\u924F\u9260\u9267\u926F\u9236\u9261\u9270\u9231\u9254\u9263\u9250\u9272\u924E\u9253\u924C\u9256\u9232\u959F\u959C\u959E\u959B\u9692\u9693\u9691\u9697\u96CE\u96FA\u96FD\u96F8\u96F5\u9773\u9777\u9778\u9772\u980F\u980D\u980E\u98AC\u98F6\u98F9\u99AF\u99B2\u99B0\u99B5\u9AAD\u9AAB\u9B5B\u9CEA\u9CED\u9CE7\u9E80\u9EFD\u50E6\u50D4\u50D7\u50E8\u50F3\u50DB\u50EA\u50DD\u50E4\u50D3\u50EC\u50F0\u50EF\u50E3\u50E0\u51D8\u5280\u5281\u52E9\u52EB\u5330\u53AC\u5627\u5615\u560C\u5612\u55FC\u560F\u561C\u5601\u5613\u5602\u55FA\u561D\u5604\u55FF\u55F9\u5889\u587C\u5890\u5898\u5886\u5881\u587F\u5874\u588B\u587A\u5887\u5891\u588E\u5876\u5882\u5888\u587B\u5894\u588F\u58FE\u596B\u5ADC\u5AEE\u5AE5\u5AD5\u5AEA\u5ADA\u5AED\u5AEB\u5AF3\u5AE2\u5AE0\u5ADB\u5AEC\u5ADE\u5ADD\u5AD9\u5AE8\u5ADF\u5B77\u5BE0\u5BE3\u5C63\u5D82\u5D80\u5D7D\u5D86\u5D7A\u5D81\u5D77\u5D8A\u5D89\u5D88\u5D7E\u5D7C\u5D8D\u5D79\u5D7F\u5E58\u5E59\u5E53\u5ED8\u5ED1\u5ED7\u5ECE\u5EDC\u5ED5\u5ED9\u5ED2\u5ED4\u5F44\u5F43\u5F6F\u5FB6\u612C\u6128\u6141\u615E\u6171\u6173\u6152\u6153\u6172\u616C\u6180\u6174\u6154\u617A\u615B\u6165\u613B\u616A\u6161\u6156\u6229\u6227\u622B\u642B\u644D\u645B\u645D\u6474\u6476\u6472\u6473\u647D\u6475\u6466\u64A6\u644E\u6482\u645E\u645C\u644B\u6453\u6460\u6450\u647F\u643F\u646C\u646B\u6459\u6465\u6477\u6573\u65A0\u66A1\u66A0\u669F\u6705\u6704\u6722\u69B1\u69B6\u69C9\u69A0\u69CE\u6996\u69B0\u69AC\u69BC\u6991\u6999\u698E\u69A7\u698D\u69A9\u69BE\u69AF\u69BF\u69C4\u69BD\u69A4\u69D4\u69B9\u69CA\u699A\u69CF\u69B3\u6993\u69AA\u69A1\u699E\u69D9\u6997\u6990\u69C2\u69B5\u69A5\u69C6\u6B4A\u6B4D\u6B4B\u6B9E\u6B9F\u6BA0\u6BC3\u6BC4\u6BFE\u6ECE\u6EF5\u6EF1\u6F03\u6F25\u6EF8\u6F37\u6EFB\u6F2E\u6F09\u6F4E\u6F19\u6F1A\u6F27\u6F18\u6F3B\u6F12\u6EED\u6F0A\u6F36\u6F73\u6EF9\u6EEE\u6F2D\u6F40\u6F30\u6F3C\u6F35\u6EEB\u6F07\u6F0E\u6F43\u6F05\u6EFD\u6EF6\u6F39\u6F1C\u6EFC\u6F3A\u6F1F\u6F0D\u6F1E\u6F08\u6F21\u7187\u7190\u7189\u7180\u7185\u7182\u718F\u717B\u7186\u7181\u7197\u7244\u7253\u7297\u7295\u7293\u7343\u734D\u7351\u734C\u7462\u7473\u7471\u7475\u7472\u7467\u746E\u7500\u7502\u7503\u757D\u7590\u7616\u7608\u760C\u7615\u7611\u760A\u7614\u76B8\u7781\u777C\u7785\u7782\u776E\u7780\u776F\u777E\u7783\u78B2\u78AA\u78B4\u78AD\u78A8\u787E\u78AB\u789E\u78A5\u78A0\u78AC\u78A2\u78A4\u7998\u798A\u798B\u7996\u7995\u7994\u7993\u7997\u7988\u7992\u7990\u7A2B\u7A4A\u7A30\u7A2F\u7A28\u7A26\u7AA8\u7AAB\u7AAC\u7AEE\u7B88\u7B9C\u7B8A\u7B91\u7B90\u7B96\u7B8D\u7B8C\u7B9B\u7B8E\u7B85\u7B98\u5284\u7B99\u7BA4\u7B82\u7CBB\u7CBF\u7CBC\u7CBA\u7DA7\u7DB7\u7DC2\u7DA3\u7DAA\u7DC1\u7DC0\u7DC5\u7D9D\u7DCE\u7DC4\u7DC6\u7DCB\u7DCC\u7DAF\u7DB9\u7D96\u7DBC\u7D9F\u7DA6\u7DAE\u7DA9\u7DA1\u7DC9\u7F73\u7FE2\u7FE3\u7FE5\u7FDE\u8024\u805D\u805C\u8189\u8186\u8183\u8187\u818D\u818C\u818B\u8215\u8497\u84A4\u84A1\u849F\u84BA\u84CE\u84C2\u84AC\u84AE\u84AB\u84B9\u84B4\u84C1\u84CD\u84AA\u849A\u84B1\u84D0\u849D\u84A7\u84BB\u84A2\u8494\u84C7\u84CC\u849B\u84A9\u84AF\u84A8\u84D6\u8498\u84B6\u84CF\u84A0\u84D7\u84D4\u84D2\u84DB\u84B0\u8491\u8661\u8733\u8723\u8728\u876B\u8740\u872E\u871E\u8721\u8719\u871B\u8743\u872C\u8741\u873E\u8746\u8720\u8732\u872A\u872D\u873C\u8712\u873A\u8731\u8735\u8742\u8726\u8727\u8738\u8724\u871A\u8730\u8711\u88F7\u88E7\u88F1\u88F2\u88FA\u88FE\u88EE\u88FC\u88F6\u88FB\u88F0\u88EC\u88EB\u899D\u89A1\u899F\u899E\u89E9\u89EB\u89E8\u8AAB\u8A99\u8A8B\u8A92\u8A8F\u8A96\u8C3D\u8C68\u8C69\u8CD5\u8CCF\u8CD7\u8D96\u8E09\u8E02\u8DFF\u8E0D\u8DFD\u8E0A\u8E03\u8E07\u8E06\u8E05\u8DFE\u8E00\u8E04\u8F10\u8F11\u8F0E\u8F0D\u9123\u911C\u9120\u9122\u911F\u911D\u911A\u9124\u9121\u911B\u917A\u9172\u9179\u9173\u92A5\u92A4\u9276\u929B\u927A\u92A0\u9294\u92AA\u928D\u92A6\u929A\u92AB\u9279\u9297\u927F\u92A3\u92EE\u928E\u9282\u9295\u92A2\u927D\u9288\u92A1\u928A\u9286\u928C\u9299\u92A7\u927E\u9287\u92A9\u929D\u928B\u922D\u969E\u96A1\u96FF\u9758\u977D\u977A\u977E\u9783\u9780\u9782\u977B\u9784\u9781\u977F\u97CE\u97CD\u9816\u98AD\u98AE\u9902\u9900\u9907\u999D\u999C\u99C3\u99B9\u99BB\u99BA\u99C2\u99BD\u99C7\u9AB1\u9AE3\u9AE7\u9B3E\u9B3F\u9B60\u9B61\u9B5F\u9CF1\u9CF2\u9CF5\u9EA7\u50FF\u5103\u5130\u50F8\u5106\u5107\u50F6\u50FE\u510B\u510C\u50FD\u510A\u528B\u528C\u52F1\u52EF\u5648\u5642\u564C\u5635\u5641\u564A\u5649\u5646\u5658\u565A\u5640\u5633\u563D\u562C\u563E\u5638\u562A\u563A\u571A\u58AB\u589D\u58B1\u58A0\u58A3\u58AF\u58AC\u58A5\u58A1\u58FF\u5AFF\u5AF4\u5AFD\u5AF7\u5AF6\u5B03\u5AF8\u5B02\u5AF9\u5B01\u5B07\u5B05\u5B0F\u5C67\u5D99\u5D97\u5D9F\u5D92\u5DA2\u5D93\u5D95\u5DA0\u5D9C\u5DA1\u5D9A\u5D9E\u5E69\u5E5D\u5E60\u5E5C\u7DF3\u5EDB\u5EDE\u5EE1\u5F49\u5FB2\u618B\u6183\u6179\u61B1\u61B0\u61A2\u6189\u619B\u6193\u61AF\u61AD\u619F\u6192\u61AA\u61A1\u618D\u6166\u61B3\u622D\u646E\u6470\u6496\u64A0\u6485\u6497\u649C\u648F\u648B\u648A\u648C\u64A3\u649F\u6468\u64B1\u6498\u6576\u657A\u6579\u657B\u65B2\u65B3\u66B5\u66B0\u66A9\u66B2\u66B7\u66AA\u66AF\u6A00\u6A06\u6A17\u69E5\u69F8\u6A15\u69F1\u69E4\u6A20\u69FF\u69EC\u69E2\u6A1B\u6A1D\u69FE\u6A27\u69F2\u69EE\u6A14\u69F7\u69E7\u6A40\u6A08\u69E6\u69FB\u6A0D\u69FC\u69EB\u6A09\u6A04\u6A18\u6A25\u6A0F\u69F6\u6A26\u6A07\u69F4\u6A16\u6B51\u6BA5\u6BA3\u6BA2\u6BA6\u6C01\u6C00\u6BFF\u6C02\u6F41\u6F26\u6F7E\u6F87\u6FC6\u6F92\u6F8D\u6F89\u6F8C\u6F62\u6F4F\u6F85\u6F5A\u6F96\u6F76\u6F6C\u6F82\u6F55\u6F72\u6F52\u6F50\u6F57\u6F94\u6F93\u6F5D\u6F00\u6F61\u6F6B\u6F7D\u6F67\u6F90\u6F53\u6F8B\u6F69\u6F7F\u6F95\u6F63\u6F77\u6F6A\u6F7B\u71B2\u71AF\u719B\u71B0\u71A0\u719A\u71A9\u71B5\u719D\u71A5\u719E\u71A4\u71A1\u71AA\u719C\u71A7\u71B3\u7298\u729A\u7358\u7352\u735E\u735F\u7360\u735D\u735B\u7361\u735A\u7359\u7362\u7487\u7489\u748A\u7486\u7481\u747D\u7485\u7488\u747C\u7479\u7508\u7507\u757E\u7625\u761E\u7619\u761D\u761C\u7623\u761A\u7628\u761B\u769C\u769D\u769E\u769B\u778D\u778F\u7789\u7788\u78CD\u78BB\u78CF\u78CC\u78D1\u78CE\u78D4\u78C8\u78C3\u78C4\u78C9\u799A\u79A1\u79A0\u799C\u79A2\u799B\u6B76\u7A39\u7AB2\u7AB4\u7AB3\u7BB7\u7BCB\u7BBE\u7BAC\u7BCE\u7BAF\u7BB9\u7BCA\u7BB5\u7CC5\u7CC8\u7CCC\u7CCB\u7DF7\u7DDB\u7DEA\u7DE7\u7DD7\u7DE1\u7E03\u7DFA\u7DE6\u7DF6\u7DF1\u7DF0\u7DEE\u7DDF\u7F76\u7FAC\u7FB0\u7FAD\u7FED\u7FEB\u7FEA\u7FEC\u7FE6\u7FE8\u8064\u8067\u81A3\u819F\u819E\u8195\u81A2\u8199\u8197\u8216\u824F\u8253\u8252\u8250\u824E\u8251\u8524\u853B\u850F\u8500\u8529\u850E\u8509\u850D\u851F\u850A\u8527\u851C\u84FB\u852B\u84FA\u8508\u850C\u84F4\u852A\u84F2\u8515\u84F7\u84EB\u84F3\u84FC\u8512\u84EA\u84E9\u8516\u84FE\u8528\u851D\u852E\u8502\u84FD\u851E\u84F6\u8531\u8526\u84E7\u84E8\u84F0\u84EF\u84F9\u8518\u8520\u8530\u850B\u8519\u852F\u8662\u8756\u8763\u8764\u8777\u87E1\u8773\u8758\u8754\u875B\u8752\u8761\u875A\u8751\u875E\u876D\u876A\u8750\u874E\u875F\u875D\u876F\u876C\u877A\u876E\u875C\u8765\u874F\u877B\u8775\u8762\u8767\u8769\u885A\u8905\u890C\u8914\u890B\u8917\u8918\u8919\u8906\u8916\u8911\u890E\u8909\u89A2\u89A4\u89A3\u89ED\u89F0\u89EC\u8ACF\u8AC6\u8AB8\u8AD3\u8AD1\u8AD4\u8AD5\u8ABB\u8AD7\u8ABE\u8AC0\u8AC5\u8AD8\u8AC3\u8ABA\u8ABD\u8AD9\u8C3E\u8C4D\u8C8F\u8CE5\u8CDF\u8CD9\u8CE8\u8CDA\u8CDD\u8CE7\u8DA0\u8D9C\u8DA1\u8D9B\u8E20\u8E23\u8E25\u8E24\u8E2E\u8E15\u8E1B\u8E16\u8E11\u8E19\u8E26\u8E27\u8E14\u8E12\u8E18\u8E13\u8E1C\u8E17\u8E1A\u8F2C\u8F24\u8F18\u8F1A\u8F20\u8F23\u8F16\u8F17\u9073\u9070\u906F\u9067\u906B\u912F\u912B\u9129\u912A\u9132\u9126\u912E\u9185\u9186\u918A\u9181\u9182\u9184\u9180\u92D0\u92C3\u92C4\u92C0\u92D9\u92B6\u92CF\u92F1\u92DF\u92D8\u92E9\u92D7\u92DD\u92CC\u92EF\u92C2\u92E8\u92CA\u92C8\u92CE\u92E6\u92CD\u92D5\u92C9\u92E0\u92DE\u92E7\u92D1\u92D3\u92B5\u92E1\u92C6\u92B4\u957C\u95AC\u95AB\u95AE\u95B0\u96A4\u96A2\u96D3\u9705\u9708\u9702\u975A\u978A\u978E\u9788\u97D0\u97CF\u981E\u981D\u9826\u9829\u9828\u9820\u981B\u9827\u98B2\u9908\u98FA\u9911\u9914\u9916\u9917\u9915\u99DC\u99CD\u99CF\u99D3\u99D4\u99CE\u99C9\u99D6\u99D8\u99CB\u99D7\u99CC\u9AB3\u9AEC\u9AEB\u9AF3\u9AF2\u9AF1\u9B46\u9B43\u9B67\u9B74\u9B71\u9B66\u9B76\u9B75\u9B70\u9B68\u9B64\u9B6C\u9CFC\u9CFA\u9CFD\u9CFF\u9CF7\u9D07\u9D00\u9CF9\u9CFB\u9D08\u9D05\u9D04\u9E83\u9ED3\u9F0F\u9F10\u511C\u5113\u5117\u511A\u5111\u51DE\u5334\u53E1\u5670\u5660\u566E\u5673\u5666\u5663\u566D\u5672\u565E\u5677\u571C\u571B\u58C8\u58BD\u58C9\u58BF\u58BA\u58C2\u58BC\u58C6\u5B17\u5B19\u5B1B\u5B21\u5B14\u5B13\u5B10\u5B16\u5B28\u5B1A\u5B20\u5B1E\u5BEF\u5DAC\u5DB1\u5DA9\u5DA7\u5DB5\u5DB0\u5DAE\u5DAA\u5DA8\u5DB2\u5DAD\u5DAF\u5DB4\u5E67\u5E68\u5E66\u5E6F\u5EE9\u5EE7\u5EE6\u5EE8\u5EE5\u5F4B\u5FBC\u619D\u61A8\u6196\u61C5\u61B4\u61C6\u61C1\u61CC\u61BA\u61BF\u61B8\u618C\u64D7\u64D6\u64D0\u64CF\u64C9\u64BD\u6489\u64C3\u64DB\u64F3\u64D9\u6533\u657F\u657C\u65A2\u66C8\u66BE\u66C0\u66CA\u66CB\u66CF\u66BD\u66BB\u66BA\u66CC\u6723\u6A34\u6A66\u6A49\u6A67\u6A32\u6A68\u6A3E\u6A5D\u6A6D\u6A76\u6A5B\u6A51\u6A28\u6A5A\u6A3B\u6A3F\u6A41\u6A6A\u6A64\u6A50\u6A4F\u6A54\u6A6F\u6A69\u6A60\u6A3C\u6A5E\u6A56\u6A55\u6A4D\u6A4E\u6A46\u6B55\u6B54\u6B56\u6BA7\u6BAA\u6BAB\u6BC8\u6BC7\u6C04\u6C03\u6C06\u6FAD\u6FCB\u6FA3\u6FC7\u6FBC\u6FCE\u6FC8\u6F5E\u6FC4\u6FBD\u6F9E\u6FCA\u6FA8\u7004\u6FA5\u6FAE\u6FBA\u6FAC\u6FAA\u6FCF\u6FBF\u6FB8\u6FA2\u6FC9\u6FAB\u6FCD\u6FAF\u6FB2\u6FB0\u71C5\u71C2\u71BF\u71B8\u71D6\u71C0\u71C1\u71CB\u71D4\u71CA\u71C7\u71CF\u71BD\u71D8\u71BC\u71C6\u71DA\u71DB\u729D\u729E\u7369\u7366\u7367\u736C\u7365\u736B\u736A\u747F\u749A\u74A0\u7494\u7492\u7495\u74A1\u750B\u7580\u762F\u762D\u7631\u763D\u7633\u763C\u7635\u7632\u7630\u76BB\u76E6\u779A\u779D\u77A1\u779C\u779B\u77A2\u77A3\u7795\u7799\u7797\u78DD\u78E9\u78E5\u78EA\u78DE\u78E3\u78DB\u78E1\u78E2\u78ED\u78DF\u78E0\u79A4\u7A44\u7A48\u7A47\u7AB6\u7AB8\u7AB5\u7AB1\u7AB7\u7BDE\u7BE3\u7BE7\u7BDD\u7BD5\u7BE5\u7BDA\u7BE8\u7BF9\u7BD4\u7BEA\u7BE2\u7BDC\u7BEB\u7BD8\u7BDF\u7CD2\u7CD4\u7CD7\u7CD0\u7CD1\u7E12\u7E21\u7E17\u7E0C\u7E1F\u7E20\u7E13\u7E0E\u7E1C\u7E15\u7E1A\u7E22\u7E0B\u7E0F\u7E16\u7E0D\u7E14\u7E25\u7E24\u7F43\u7F7B\u7F7C\u7F7A\u7FB1\u7FEF\u802A\u8029\u806C\u81B1\u81A6\u81AE\u81B9\u81B5\u81AB\u81B0\u81AC\u81B4\u81B2\u81B7\u81A7\u81F2\u8255\u8256\u8257\u8556\u8545\u856B\u854D\u8553\u8561\u8558\u8540\u8546\u8564\u8541\u8562\u8544\u8551\u8547\u8563\u853E\u855B\u8571\u854E\u856E\u8575\u8555\u8567\u8560\u858C\u8566\u855D\u8554\u8565\u856C\u8663\u8665\u8664\u879B\u878F\u8797\u8793\u8792\u8788\u8781\u8796\u8798\u8779\u8787\u87A3\u8785\u8790\u8791\u879D\u8784\u8794\u879C\u879A\u8789\u891E\u8926\u8930\u892D\u892E\u8927\u8931\u8922\u8929\u8923\u892F\u892C\u891F\u89F1\u8AE0\u8AE2\u8AF2\u8AF4\u8AF5\u8ADD\u8B14\u8AE4\u8ADF\u8AF0\u8AC8\u8ADE\u8AE1\u8AE8\u8AFF\u8AEF\u8AFB\u8C91\u8C92\u8C90\u8CF5\u8CEE\u8CF1\u8CF0\u8CF3\u8D6C\u8D6E\u8DA5\u8DA7\u8E33\u8E3E\u8E38\u8E40\u8E45\u8E36\u8E3C\u8E3D\u8E41\u8E30\u8E3F\u8EBD\u8F36\u8F2E\u8F35\u8F32\u8F39\u8F37\u8F34\u9076\u9079\u907B\u9086\u90FA\u9133\u9135\u9136\u9193\u9190\u9191\u918D\u918F\u9327\u931E\u9308\u931F\u9306\u930F\u937A\u9338\u933C\u931B\u9323\u9312\u9301\u9346\u932D\u930E\u930D\u92CB\u931D\u92FA\u9325\u9313\u92F9\u92F7\u9334\u9302\u9324\u92FF\u9329\u9339\u9335\u932A\u9314\u930C\u930B\u92FE\u9309\u9300\u92FB\u9316\u95BC\u95CD\u95BE\u95B9\u95BA\u95B6\u95BF\u95B5\u95BD\u96A9\u96D4\u970B\u9712\u9710\u9799\u9797\u9794\u97F0\u97F8\u9835\u982F\u9832\u9924\u991F\u9927\u9929\u999E\u99EE\u99EC\u99E5\u99E4\u99F0\u99E3\u99EA\u99E9\u99E7\u9AB9\u9ABF\u9AB4\u9ABB\u9AF6\u9AFA\u9AF9\u9AF7\u9B33\u9B80\u9B85\u9B87\u9B7C\u9B7E\u9B7B\u9B82\u9B93\u9B92\u9B90\u9B7A\u9B95\u9B7D\u9B88\u9D25\u9D17\u9D20\u9D1E\u9D14\u9D29\u9D1D\u9D18\u9D22\u9D10\u9D19\u9D1F\u9E88\u9E86\u9E87\u9EAE\u9EAD\u9ED5\u9ED6\u9EFA\u9F12\u9F3D\u5126\u5125\u5122\u5124\u5120\u5129\u52F4\u5693\u568C\u568D\u5686\u5684\u5683\u567E\u5682\u567F\u5681\u58D6\u58D4\u58CF\u58D2\u5B2D\u5B25\u5B32\u5B23\u5B2C\u5B27\u5B26\u5B2F\u5B2E\u5B7B\u5BF1\u5BF2\u5DB7\u5E6C\u5E6A\u5FBE\u5FBB\u61C3\u61B5\u61BC\u61E7\u61E0\u61E5\u61E4\u61E8\u61DE\u64EF\u64E9\u64E3\u64EB\u64E4\u64E8\u6581\u6580\u65B6\u65DA\u66D2\u6A8D\u6A96\u6A81\u6AA5\u6A89\u6A9F\u6A9B\u6AA1\u6A9E\u6A87\u6A93\u6A8E\u6A95\u6A83\u6AA8\u6AA4\u6A91\u6A7F\u6AA6\u6A9A\u6A85\u6A8C\u6A92\u6B5B\u6BAD\u6C09\u6FCC\u6FA9\u6FF4\u6FD4\u6FE3\u6FDC\u6FED\u6FE7\u6FE6\u6FDE\u6FF2\u6FDD\u6FE2\u6FE8\u71E1\u71F1\u71E8\u71F2\u71E4\u71F0\u71E2\u7373\u736E\u736F\u7497\u74B2\u74AB\u7490\u74AA\u74AD\u74B1\u74A5\u74AF\u7510\u7511\u7512\u750F\u7584\u7643\u7648\u7649\u7647\u76A4\u76E9\u77B5\u77AB\u77B2\u77B7\u77B6\u77B4\u77B1\u77A8\u77F0\u78F3\u78FD\u7902\u78FB\u78FC\u78F2\u7905\u78F9\u78FE\u7904\u79AB\u79A8\u7A5C\u7A5B\u7A56\u7A58\u7A54\u7A5A\u7ABE\u7AC0\u7AC1\u7C05\u7C0F\u7BF2\u7C00\u7BFF\u7BFB\u7C0E\u7BF4\u7C0B\u7BF3\u7C02\u7C09\u7C03\u7C01\u7BF8\u7BFD\u7C06\u7BF0\u7BF1\u7C10\u7C0A\u7CE8\u7E2D\u7E3C\u7E42\u7E33\u9848\u7E38\u7E2A\u7E49\u7E40\u7E47\u7E29\u7E4C\u7E30\u7E3B\u7E36\u7E44\u7E3A\u7F45\u7F7F\u7F7E\u7F7D\u7FF4\u7FF2\u802C\u81BB\u81C4\u81CC\u81CA\u81C5\u81C7\u81BC\u81E9\u825B\u825A\u825C\u8583\u8580\u858F\u85A7\u8595\u85A0\u858B\u85A3\u857B\u85A4\u859A\u859E\u8577\u857C\u8589\u85A1\u857A\u8578\u8557\u858E\u8596\u8586\u858D\u8599\u859D\u8581\u85A2\u8582\u8588\u8585\u8579\u8576\u8598\u8590\u859F\u8668\u87BE\u87AA\u87AD\u87C5\u87B0\u87AC\u87B9\u87B5\u87BC\u87AE\u87C9\u87C3\u87C2\u87CC\u87B7\u87AF\u87C4\u87CA\u87B4\u87B6\u87BF\u87B8\u87BD\u87DE\u87B2\u8935\u8933\u893C\u893E\u8941\u8952\u8937\u8942\u89AD\u89AF\u89AE\u89F2\u89F3\u8B1E\u8B18\u8B16\u8B11\u8B05\u8B0B\u8B22\u8B0F\u8B12\u8B15\u8B07\u8B0D\u8B08\u8B06\u8B1C\u8B13\u8B1A\u8C4F\u8C70\u8C72\u8C71\u8C6F\u8C95\u8C94\u8CF9\u8D6F\u8E4E\u8E4D\u8E53\u8E50\u8E4C\u8E47\u8F43\u8F40\u9085\u907E\u9138\u919A\u91A2\u919B\u9199\u919F\u91A1\u919D\u91A0\u93A1\u9383\u93AF\u9364\u9356\u9347\u937C\u9358\u935C\u9376\u9349\u9350\u9351\u9360\u936D\u938F\u934C\u936A\u9379\u9357\u9355\u9352\u934F\u9371\u9377\u937B\u9361\u935E\u9363\u9367\u9380\u934E\u9359\u95C7\u95C0\u95C9\u95C3\u95C5\u95B7\u96AE\u96B0\u96AC\u9720\u971F\u9718\u971D\u9719\u979A\u97A1\u979C\u979E\u979D\u97D5\u97D4\u97F1\u9841\u9844\u984A\u9849\u9845\u9843\u9925\u992B\u992C\u992A\u9933\u9932\u992F\u992D\u9931\u9930\u9998\u99A3\u99A1\u9A02\u99FA\u99F4\u99F7\u99F9\u99F8\u99F6\u99FB\u99FD\u99FE\u99FC\u9A03\u9ABE\u9AFE\u9AFD\u9B01\u9AFC\u9B48\u9B9A\u9BA8\u9B9E\u9B9B\u9BA6\u9BA1\u9BA5\u9BA4\u9B86\u9BA2\u9BA0\u9BAF\u9D33\u9D41\u9D67\u9D36\u9D2E\u9D2F\u9D31\u9D38\u9D30\u9D45\u9D42\u9D43\u9D3E\u9D37\u9D40\u9D3D\u7FF5\u9D2D\u9E8A\u9E89\u9E8D\u9EB0\u9EC8\u9EDA\u9EFB\u9EFF\u9F24\u9F23\u9F22\u9F54\u9FA0\u5131\u512D\u512E\u5698\u569C\u5697\u569A\u569D\u5699\u5970\u5B3C\u5C69\u5C6A\u5DC0\u5E6D\u5E6E\u61D8\u61DF\u61ED\u61EE\u61F1\u61EA\u61F0\u61EB\u61D6\u61E9\u64FF\u6504\u64FD\u64F8\u6501\u6503\u64FC\u6594\u65DB\u66DA\u66DB\u66D8\u6AC5\u6AB9\u6ABD\u6AE1\u6AC6\u6ABA\u6AB6\u6AB7\u6AC7\u6AB4\u6AAD\u6B5E\u6BC9\u6C0B\u7007\u700C\u700D\u7001\u7005\u7014\u700E\u6FFF\u7000\u6FFB\u7026\u6FFC\u6FF7\u700A\u7201\u71FF\u71F9\u7203\u71FD\u7376\u74B8\u74C0\u74B5\u74C1\u74BE\u74B6\u74BB\u74C2\u7514\u7513\u765C\u7664\u7659\u7650\u7653\u7657\u765A\u76A6\u76BD\u76EC\u77C2\u77BA\u78FF\u790C\u7913\u7914\u7909\u7910\u7912\u7911\u79AD\u79AC\u7A5F\u7C1C\u7C29\u7C19\u7C20\u7C1F\u7C2D\u7C1D\u7C26\u7C28\u7C22\u7C25\u7C30\u7E5C\u7E50\u7E56\u7E63\u7E58\u7E62\u7E5F\u7E51\u7E60\u7E57\u7E53\u7FB5\u7FB3\u7FF7\u7FF8\u8075\u81D1\u81D2\u81D0\u825F\u825E\u85B4\u85C6\u85C0\u85C3\u85C2\u85B3\u85B5\u85BD\u85C7\u85C4\u85BF\u85CB\u85CE\u85C8\u85C5\u85B1\u85B6\u85D2\u8624\u85B8\u85B7\u85BE\u8669\u87E7\u87E6\u87E2\u87DB\u87EB\u87EA\u87E5\u87DF\u87F3\u87E4\u87D4\u87DC\u87D3\u87ED\u87D8\u87E3\u87A4\u87D7\u87D9\u8801\u87F4\u87E8\u87DD\u8953\u894B\u894F\u894C\u8946\u8950\u8951\u8949\u8B2A\u8B27\u8B23\u8B33\u8B30\u8B35\u8B47\u8B2F\u8B3C\u8B3E\u8B31\u8B25\u8B37\u8B26\u8B36\u8B2E\u8B24\u8B3B\u8B3D\u8B3A\u8C42\u8C75\u8C99\u8C98\u8C97\u8CFE\u8D04\u8D02\u8D00\u8E5C\u8E62\u8E60\u8E57\u8E56\u8E5E\u8E65\u8E67\u8E5B\u8E5A\u8E61\u8E5D\u8E69\u8E54\u8F46\u8F47\u8F48\u8F4B\u9128\u913A\u913B\u913E\u91A8\u91A5\u91A7\u91AF\u91AA\u93B5\u938C\u9392\u93B7\u939B\u939D\u9389\u93A7\u938E\u93AA\u939E\u93A6\u9395\u9388\u9399\u939F\u938D\u93B1\u9391\u93B2\u93A4\u93A8\u93B4\u93A3\u93A5\u95D2\u95D3\u95D1\u96B3\u96D7\u96DA\u5DC2\u96DF\u96D8\u96DD\u9723\u9722\u9725\u97AC\u97AE\u97A8\u97AB\u97A4\u97AA\u97A2\u97A5\u97D7\u97D9\u97D6\u97D8\u97FA\u9850\u9851\u9852\u98B8\u9941\u993C\u993A\u9A0F\u9A0B\u9A09\u9A0D\u9A04\u9A11\u9A0A\u9A05\u9A07\u9A06\u9AC0\u9ADC\u9B08\u9B04\u9B05\u9B29\u9B35\u9B4A\u9B4C\u9B4B\u9BC7\u9BC6\u9BC3\u9BBF\u9BC1\u9BB5\u9BB8\u9BD3\u9BB6\u9BC4\u9BB9\u9BBD\u9D5C\u9D53\u9D4F\u9D4A\u9D5B\u9D4B\u9D59\u9D56\u9D4C\u9D57\u9D52\u9D54\u9D5F\u9D58\u9D5A\u9E8E\u9E8C\u9EDF\u9F01\u9F00\u9F16\u9F25\u9F2B\u9F2A\u9F29\u9F28\u9F4C\u9F55\u5134\u5135\u5296\u52F7\u53B4\u56AB\u56AD\u56A6\u56A7\u56AA\u56AC\u58DA\u58DD\u58DB\u5912\u5B3D\u5B3E\u5B3F\u5DC3\u5E70\u5FBF\u61FB\u6507\u6510\u650D\u6509\u650C\u650E\u6584\u65DE\u65DD\u66DE\u6AE7\u6AE0\u6ACC\u6AD1\u6AD9\u6ACB\u6ADF\u6ADC\u6AD0\u6AEB\u6ACF\u6ACD\u6ADE\u6B60\u6BB0\u6C0C\u7019\u7027\u7020\u7016\u702B\u7021\u7022\u7023\u7029\u7017\u7024\u701C\u702A\u720C\u720A\u7207\u7202\u7205\u72A5\u72A6\u72A4\u72A3\u72A1\u74CB\u74C5\u74B7\u74C3\u7516\u7660\u77C9\u77CA\u77C4\u77F1\u791D\u791B\u7921\u791C\u7917\u791E\u79B0\u7A67\u7A68\u7C33\u7C3C\u7C39\u7C2C\u7C3B\u7CEC\u7CEA\u7E76\u7E75\u7E78\u7E70\u7E77\u7E6F\u7E7A\u7E72\u7E74\u7E68\u7F4B\u7F4A\u7F83\u7F86\u7FB7\u7FFD\u7FFE\u8078\u81D7\u81D5\u8264\u8261\u8263\u85EB\u85F1\u85ED\u85D9\u85E1\u85E8\u85DA\u85D7\u85EC\u85F2\u85F8\u85D8\u85DF\u85E3\u85DC\u85D1\u85F0\u85E6\u85EF\u85DE\u85E2\u8800\u87FA\u8803\u87F6\u87F7\u8809\u880C\u880B\u8806\u87FC\u8808\u87FF\u880A\u8802\u8962\u895A\u895B\u8957\u8961\u895C\u8958\u895D\u8959\u8988\u89B7\u89B6\u89F6\u8B50\u8B48\u8B4A\u8B40\u8B53\u8B56\u8B54\u8B4B\u8B55\u8B51\u8B42\u8B52\u8B57\u8C43\u8C77\u8C76\u8C9A\u8D06\u8D07\u8D09\u8DAC\u8DAA\u8DAD\u8DAB\u8E6D\u8E78\u8E73\u8E6A\u8E6F\u8E7B\u8EC2\u8F52\u8F51\u8F4F\u8F50\u8F53\u8FB4\u9140\u913F\u91B0\u91AD\u93DE\u93C7\u93CF\u93C2\u93DA\u93D0\u93F9\u93EC\u93CC\u93D9\u93A9\u93E6\u93CA\u93D4\u93EE\u93E3\u93D5\u93C4\u93CE\u93C0\u93D2\u93E7\u957D\u95DA\u95DB\u96E1\u9729\u972B\u972C\u9728\u9726\u97B3\u97B7\u97B6\u97DD\u97DE\u97DF\u985C\u9859\u985D\u9857\u98BF\u98BD\u98BB\u98BE\u9948\u9947\u9943\u99A6\u99A7\u9A1A\u9A15\u9A25\u9A1D\u9A24\u9A1B\u9A22\u9A20\u9A27\u9A23\u9A1E\u9A1C\u9A14\u9AC2\u9B0B\u9B0A\u9B0E\u9B0C\u9B37\u9BEA\u9BEB\u9BE0\u9BDE\u9BE4\u9BE6\u9BE2\u9BF0\u9BD4\u9BD7\u9BEC\u9BDC\u9BD9\u9BE5\u9BD5\u9BE1\u9BDA\u9D77\u9D81\u9D8A\u9D84\u9D88\u9D71\u9D80\u9D78\u9D86\u9D8B\u9D8C\u9D7D\u9D6B\u9D74\u9D75\u9D70\u9D69\u9D85\u9D73\u9D7B\u9D82\u9D6F\u9D79\u9D7F\u9D87\u9D68\u9E94\u9E91\u9EC0\u9EFC\u9F2D\u9F40\u9F41\u9F4D\u9F56\u9F57\u9F58\u5337\u56B2\u56B5\u56B3\u58E3\u5B45\u5DC6\u5DC7\u5EEE\u5EEF\u5FC0\u5FC1\u61F9\u6517\u6516\u6515\u6513\u65DF\u66E8\u66E3\u66E4\u6AF3\u6AF0\u6AEA\u6AE8\u6AF9\u6AF1\u6AEE\u6AEF\u703C\u7035\u702F\u7037\u7034\u7031\u7042\u7038\u703F\u703A\u7039\u7040\u703B\u7033\u7041\u7213\u7214\u72A8\u737D\u737C\u74BA\u76AB\u76AA\u76BE\u76ED\u77CC\u77CE\u77CF\u77CD\u77F2\u7925\u7923\u7927\u7928\u7924\u7929\u79B2\u7A6E\u7A6C\u7A6D\u7AF7\u7C49\u7C48\u7C4A\u7C47\u7C45\u7CEE\u7E7B\u7E7E\u7E81\u7E80\u7FBA\u7FFF\u8079\u81DB\u81D9\u820B\u8268\u8269\u8622\u85FF\u8601\u85FE\u861B\u8600\u85F6\u8604\u8609\u8605\u860C\u85FD\u8819\u8810\u8811\u8817\u8813\u8816\u8963\u8966\u89B9\u89F7\u8B60\u8B6A\u8B5D\u8B68\u8B63\u8B65\u8B67\u8B6D\u8DAE\u8E86\u8E88\u8E84\u8F59\u8F56\u8F57\u8F55\u8F58\u8F5A\u908D\u9143\u9141\u91B7\u91B5\u91B2\u91B3\u940B\u9413\u93FB\u9420\u940F\u9414\u93FE\u9415\u9410\u9428\u9419\u940D\u93F5\u9400\u93F7\u9407\u940E\u9416\u9412\u93FA\u9409\u93F8\u940A\u93FF\u93FC\u940C\u93F6\u9411\u9406\u95DE\u95E0\u95DF\u972E\u972F\u97B9\u97BB\u97FD\u97FE\u9860\u9862\u9863\u985F\u98C1\u98C2\u9950\u994E\u9959\u994C\u994B\u9953\u9A32\u9A34\u9A31\u9A2C\u9A2A\u9A36\u9A29\u9A2E\u9A38\u9A2D\u9AC7\u9ACA\u9AC6\u9B10\u9B12\u9B11\u9C0B\u9C08\u9BF7\u9C05\u9C12\u9BF8\u9C40\u9C07\u9C0E\u9C06\u9C17\u9C14\u9C09\u9D9F\u9D99\u9DA4\u9D9D\u9D92\u9D98\u9D90\u9D9B\u9DA0\u9D94\u9D9C\u9DAA\u9D97\u9DA1\u9D9A\u9DA2\u9DA8\u9D9E\u9DA3\u9DBF\u9DA9\u9D96\u9DA6\u9DA7\u9E99\u9E9B\u9E9A\u9EE5\u9EE4\u9EE7\u9EE6\u9F30\u9F2E\u9F5B\u9F60\u9F5E\u9F5D\u9F59\u9F91\u513A\u5139\u5298\u5297\u56C3\u56BD\u56BE\u5B48\u5B47\u5DCB\u5DCF\u5EF1\u61FD\u651B\u6B02\u6AFC\u6B03\u6AF8\u6B00\u7043\u7044\u704A\u7048\u7049\u7045\u7046\u721D\u721A\u7219\u737E\u7517\u766A\u77D0\u792D\u7931\u792F\u7C54\u7C53\u7CF2\u7E8A\u7E87\u7E88\u7E8B\u7E86\u7E8D\u7F4D\u7FBB\u8030\u81DD\u8618\u862A\u8626\u861F\u8623\u861C\u8619\u8627\u862E\u8621\u8620\u8629\u861E\u8625\u8829\u881D\u881B\u8820\u8824\u881C\u882B\u884A\u896D\u8969\u896E\u896B\u89FA\u8B79\u8B78\u8B45\u8B7A\u8B7B\u8D10\u8D14\u8DAF\u8E8E\u8E8C\u8F5E\u8F5B\u8F5D\u9146\u9144\u9145\u91B9\u943F\u943B\u9436\u9429\u943D\u943C\u9430\u9439\u942A\u9437\u942C\u9440\u9431\u95E5\u95E4\u95E3\u9735\u973A\u97BF\u97E1\u9864\u98C9\u98C6\u98C0\u9958\u9956\u9A39\u9A3D\u9A46\u9A44\u9A42\u9A41\u9A3A\u9A3F\u9ACD\u9B15\u9B17\u9B18\u9B16\u9B3A\u9B52\u9C2B\u9C1D\u9C1C\u9C2C\u9C23\u9C28\u9C29\u9C24\u9C21\u9DB7\u9DB6\u9DBC\u9DC1\u9DC7\u9DCA\u9DCF\u9DBE\u9DC5\u9DC3\u9DBB\u9DB5\u9DCE\u9DB9\u9DBA\u9DAC\u9DC8\u9DB1\u9DAD\u9DCC\u9DB3\u9DCD\u9DB2\u9E7A\u9E9C\u9EEB\u9EEE\u9EED\u9F1B\u9F18\u9F1A\u9F31\u9F4E\u9F65\u9F64\u9F92\u4EB9\u56C6\u56C5\u56CB\u5971\u5B4B\u5B4C\u5DD5\u5DD1\u5EF2\u6521\u6520\u6526\u6522\u6B0B\u6B08\u6B09\u6C0D\u7055\u7056\u7057\u7052\u721E\u721F\u72A9\u737F\u74D8\u74D5\u74D9\u74D7\u766D\u76AD\u7935\u79B4\u7A70\u7A71\u7C57\u7C5C\u7C59\u7C5B\u7C5A\u7CF4\u7CF1\u7E91\u7F4F\u7F87\u81DE\u826B\u8634\u8635\u8633\u862C\u8632\u8636\u882C\u8828\u8826\u882A\u8825\u8971\u89BF\u89BE\u89FB\u8B7E\u8B84\u8B82\u8B86\u8B85\u8B7F\u8D15\u8E95\u8E94\u8E9A\u8E92\u8E90\u8E96\u8E97\u8F60\u8F62\u9147\u944C\u9450\u944A\u944B\u944F\u9447\u9445\u9448\u9449\u9446\u973F\u97E3\u986A\u9869\u98CB\u9954\u995B\u9A4E\u9A53\u9A54\u9A4C\u9A4F\u9A48\u9A4A\u9A49\u9A52\u9A50\u9AD0\u9B19\u9B2B\u9B3B\u9B56\u9B55\u9C46\u9C48\u9C3F\u9C44\u9C39\u9C33\u9C41\u9C3C\u9C37\u9C34\u9C32\u9C3D\u9C36\u9DDB\u9DD2\u9DDE\u9DDA\u9DCB\u9DD0\u9DDC\u9DD1\u9DDF\u9DE9\u9DD9\u9DD8\u9DD6\u9DF5\u9DD5\u9DDD\u9EB6\u9EF0\u9F35\u9F33\u9F32\u9F42\u9F6B\u9F95\u9FA2\u513D\u5299\u58E8\u58E7\u5972\u5B4D\u5DD8\u882F\u5F4F\u6201\u6203\u6204\u6529\u6525\u6596\u66EB\u6B11\u6B12\u6B0F\u6BCA\u705B\u705A\u7222\u7382\u7381\u7383\u7670\u77D4\u7C67\u7C66\u7E95\u826C\u863A\u8640\u8639\u863C\u8631\u863B\u863E\u8830\u8832\u882E\u8833\u8976\u8974\u8973\u89FE\u8B8C\u8B8E\u8B8B\u8B88\u8C45\u8D19\u8E98\u8F64\u8F63\u91BC\u9462\u9455\u945D\u9457\u945E\u97C4\u97C5\u9800\u9A56\u9A59\u9B1E\u9B1F\u9B20\u9C52\u9C58\u9C50\u9C4A\u9C4D\u9C4B\u9C55\u9C59\u9C4C\u9C4E\u9DFB\u9DF7\u9DEF\u9DE3\u9DEB\u9DF8\u9DE4\u9DF6\u9DE1\u9DEE\u9DE6\u9DF2\u9DF0\u9DE2\u9DEC\u9DF4\u9DF3\u9DE8\u9DED\u9EC2\u9ED0\u9EF2\u9EF3\u9F06\u9F1C\u9F38\u9F37\u9F36\u9F43\u9F4F\u9F71\u9F70\u9F6E\u9F6F\u56D3\u56CD\u5B4E\u5C6D\u652D\u66ED\u66EE\u6B13\u705F\u7061\u705D\u7060\u7223\u74DB\u74E5\u77D5\u7938\u79B7\u79B6\u7C6A\u7E97\u7F89\u826D\u8643\u8838\u8837\u8835\u884B\u8B94\u8B95\u8E9E\u8E9F\u8EA0\u8E9D\u91BE\u91BD\u91C2\u946B\u9468\u9469\u96E5\u9746\u9743\u9747\u97C7\u97E5\u9A5E\u9AD5\u9B59\u9C63\u9C67\u9C66\u9C62\u9C5E\u9C60\u9E02\u9DFE\u9E07\u9E03\u9E06\u9E05\u9E00\u9E01\u9E09\u9DFF\u9DFD\u9E04\u9EA0\u9F1E\u9F46\u9F74\u9F75\u9F76\u56D4\u652E\u65B8\u6B18\u6B19\u6B17\u6B1A\u7062\u7226\u72AA\u77D8\u77D9\u7939\u7C69\u7C6B\u7CF6\u7E9A\u7E98\u7E9B\u7E99\u81E0\u81E1\u8646\u8647\u8648\u8979\u897A\u897C\u897B\u89FF\u8B98\u8B99\u8EA5\u8EA4\u8EA3\u946E\u946D\u946F\u9471\u9473\u9749\u9872\u995F\u9C68\u9C6E\u9C6D\u9E0B\u9E0D\u9E10\u9E0F\u9E12\u9E11\u9EA1\u9EF5\u9F09\u9F47\u9F78\u9F7B\u9F7A\u9F79\u571E\u7066\u7C6F\u883C\u8DB2\u8EA6\u91C3\u9474\u9478\u9476\u9475\u9A60\u9C74\u9C73\u9C71\u9C75\u9E14\u9E13\u9EF6\u9F0A\u9FA4\u7068\u7065\u7CF7\u866A\u883E\u883D\u883F\u8B9E\u8C9C\u8EA9\u8EC9\u974B\u9873\u9874\u98CC\u9961\u99AB\u9A64\u9A66\u9A67\u9B24\u9E15\u9E17\u9F48\u6207\u6B1E\u7227\u864C\u8EA8\u9482\u9480\u9481\u9A69\u9A68\u9B2E\u9E19\u7229\u864B\u8B9F\u9483\u9C79\u9EB7\u7675\u9A6B\u9C7A\u9E1D\u7069\u706A\u9EA4\u9F7E\u9F49\u9F98\u7881\u92B9\u88CF\u58BB\u6052\u7CA7\u5AFA\u2554\u2566\u2557\u2560\u256C\u2563\u255A\u2569\u255D\u2552\u2564\u2555\u255E\u256A\u2561\u2558\u2567\u255B\u2553\u2565\u2556\u255F\u256B\u2562\u2559\u2568\u255C\u2551\u2550\u256D\u256E\u2570\u256F\uFFED\u0547\u92DB\u05DF\u3FC5\u854C\u42B5\u73EF\u51B5\u3649\u4942\u89E4\u9344\u19DB\u82EE\u3CC8\u783C\u6744\u62DF\u4933\u89AA\u02A0\u6BB3\u1305\u4FAB\u24ED\u5008\u6D29\u7A84\u3600\u4AB1\u2513\u5029\u037E\u5FA4\u0380\u0347\u6EDB\u041F\u507D\u5101\u347A\u510E\u986C\u3743\u8416\u49A4\u0487\u5160\u33B4\u516A\u0BFF\u20FC\u02E5\u2530\u058E\u3233\u1983\u5B82\u877D\u05B3\u3C99\u51B2\u51B8\u9D34\u51C9\u51CF\u51D1\u3CDC\u51D3\u4AA6\u51B3\u51E2\u5342\u51ED\u83CD\u693E\u372D\u5F7B\u520B\u5226\u523C\u52B5\u5257\u5294\u52B9\u52C5\u7C15\u8542\u52E0\u860D\u6B13\u5305\u8ADE\u5549\u6ED9\u3F80\u0954\u3FEC\u5333\u5344\u0BE2\u6CCB\u1726\u681B\u73D5\u604A\u3EAA\u38CC\u16E8\u71DD\u44A2\u536D\u5374\u86AB\u537E\u537F\u1596\u1613\u77E6\u5393\u8A9B\u53A0\u53AB\u53AE\u73A7\u5772\u3F59\u739C\u53C1\u53C5\u6C49\u4E49\u57FE\u53D9\u3AAB\u0B8F\u53E0\u3FEB\u2DA3\u53F6\u0C77\u5413\u7079\u552B\u6657\u6D5B\u546D\u6B53\u0D74\u555D\u548F\u54A4\u47A6\u170D\u0EDD\u3DB4\u0D4D\u89BC\u2698\u5547\u4CED\u542F\u7417\u5586\u55A9\u5605\u18D7\u403A\u4552\u4435\u66B3\u10B4\u5637\u66CD\u328A\u66A4\u66AD\u564D\u564F\u78F1\u56F1\u9787\u53FE\u5700\u56EF\u56ED\u8B66\u3623\u124F\u5746\u41A5\u6C6E\u708B\u5742\u36B1\u6C7E\u57E6\u1416\u5803\u1454\u4363\u5826\u4BF5\u585C\u58AA\u3561\u58E0\u58DC\u123C\u58FB\u5BFF\u5743\uA150\u4278\u93D3\u35A1\u591F\u68A6\u36C3\u6E59\u163E\u5A24\u5553\u1692\u8505\u59C9\u0D4E\u6C81\u6D2A\u17DC\u59D9\u17FB\u17B2\u6DA6\u6D71\u1828\u16D5\u59F9\u6E45\u5AAB\u5A63\u36E6\u49A9\u5A77\u3708\u5A96\u7465\u5AD3\u6FA1\u2554\u3D85\u1911\u3732\u16B8\u5E83\u52D0\u5B76\u6588\u5B7C\u7A0E\u4004\u485D\u0204\u5BD5\u6160\u1A34\u59CC\u05A5\u5BF3\u5B9D\u4D10\u5C05\u1B44\u5C13\u73CE\u5C14\u1CA5\u6B28\u5C49\u48DD\u5C85\u5CE9\u5CEF\u5D8B\u1DF9\u1E37\u5D10\u5D18\u5D46\u1EA4\u5CBA\u5DD7\u82FC\u382D\u4901\u2049\u2173\u8287\u3836\u3BC2\u5E2E\u6A8A\u5E75\u5E7A\u44BC\u0CD3\u53A6\u4EB7\u5ED0\u53A8\u1771\u5E09\u5EF4\u8482\u5EF9\u5EFB\u38A0\u5EFC\u683E\u941B\u5F0D\u01C1\uF894\u3ADE\u48AE\u133A\u5F3A\u6888\u23D0\u5F58\u2471\u5F63\u97BD\u6E6E\u5F72\u9340\u8A36\u5FA7\u5DB6\u3D5F\u5250\u1F6A\u70F8\u2668\u91D6\u029E\u8A29\u6031\u6685\u1877\u3963\u3DC7\u3639\u5790\u27B4\u7971\u3E40\u609E\u60A4\u60B3\u4982\u498F\u7A53\u74A4\u50E1\u5AA0\u6164\u8424\u6142\uF8A6\u6ED2\u6181\u51F4\u0656\u6187\u5BAA\u3FB7\u285F\u61D3\u8B9D\u995D\u61D0\u3932\u2980\u28C1\u6023\u615C\u651E\u638B\u0118\u62C5\u1770\u62D5\u2E0D\u636C\u49DF\u3A17\u6438\u63F8\u138E\u17FC\u6490\u6F8A\u2E36\u9814\u408C\u571D\u64E1\u64E5\u947B\u3A66\u643A\u3A57\u654D\u6F16\u4A28\u4A23\u6585\u656D\u655F\u307E\u65B5\u4940\u4B37\u65D1\u40D8\u1829\u65E0\u65E3\u5FDF\u3400\u6618\u31F7\u31F8\u6644\u31A4\u31A5\u664B\u0E75\u6667\u51E6\u6673\u6674\u1E3D\u3231\u85F4\u31C8\u5313\u77C5\u28F7\u99A4\u6702\u439C\u4A21\u3B2B\u69FA\u37C2\u675E\u6767\u6762\u41CD\u90ED\u67D7\u44E9\u6822\u6E50\u923C\u6801\u33E6\u6DA0\u685D\u346F\u69E1\u6A0B\u8ADF\u6973\u68C3\u35CD\u6901\u6900\u3D32\u3A01\u363C\u3B80\u67AC\u6961\u8A4A\u42FC\u6936\u6998\u3BA1\u03C9\u8363\u5090\u69F9\u3659\u212A\u6A45\u3703\u6A9D\u3BF3\u67B1\u6AC8\u919C\u3C0D\u6B1D\u0923\u60DE\u6B35\u6B74\u27CD\u6EB5\u3ADB\u03B5\u1958\u3740\u5421\u3B5A\u6BE1\u3EFC\u6BDC\u6C37\u248B\u48F1\u6B51\u6C5A\u8226\u6C79\u3DBC\u44C5\u3DBD\u41A4\u490C\u4900\u3CC9\u36E5\u3CEB\u0D32\u9B83\u31F9\u2491\u7F8F\u6837\u6D25\u6DA1\u6DEB\u6D96\u6D5C\u6E7C\u6F04\u497F\u4085\u6E72\u8533\u6F74\u51C7\u6C9C\u6E1D\u842E\u8B21\u6E2F\u3E2F\u7453\u3F82\u79CC\u6E4F\u5A91\u304B\u6FF8\u370D\u6F9D\u3E30\u6EFA\u1497\u403D\u4555\u93F0\u6F44\u6F5C\u3D4E\u6F74\u9170\u3D3B\u6F9F\u4144\u6FD3\u4091\u4155\u4039\u3FF0\u3FB4\u413F\u51DF\u4156\u4157\u4140\u61DD\u704B\u707E\u70A7\u7081\u70CC\u70D5\u70D6\u70DF\u4104\u3DE8\u71B4\u7196\u4277\u712B\u7145\u5A88\u714A\u716E\u5C9C\u4365\u714F\u9362\u42C1\u712C\u445A\u4A27\u4A22\u71BA\u8BE8\u70BD\u720E\u9442\u7215\u5911\u9443\u7224\u9341\u5605\u722E\u7240\u4974\u68BD\u7255\u7257\u3E55\u3044\u680D\u6F3D\u7282\u732A\u732B\u4823\u882B\u48ED\u8804\u7328\u732E\u73CF\u73AA\u0C3A\u6A2E\u73C9\u7449\u41E2\u16E7\u4A24\u6623\u36C5\u49B7\u498D\u49FB\u73F7\u7415\u6903\u4A26\u7439\u05C3\u3ED7\u745C\u28AD\u7460\u8EB2\u7447\u73E4\u7476\u83B9\u746C\u3730\u7474\u93F1\u6A2C\u7482\u4953\u4A8C\u415F\u4A79\u8B8F\u5B46\u8C03\u189E\u74C8\u1988\u750E\u74E9\u751E\u8ED9\u1A4B\u5BD7\u8EAC\u9385\u754D\u754A\u7567\u756E\u4F82\u3F04\u4D13\u758E\u745D\u759E\u75B4\u7602\u762C\u7651\u764F\u766F\u7676\u63F5\u7690\u81EF\u37F8\u6911\u690E\u76A1\u76A5\u76B7\u76CC\u6F9F\u8462\u509D\u517D\u1E1C\u771E\u7726\u7740\u64AF\u5220\u7758\u32AC\u77AF\u8964\u8968\u16C1\u77F4\u7809\u1376\u4A12\u68CA\u78AF\u78C7\u78D3\u96A5\u792E\u55E0\u78D7\u7934\u78B1\u760C\u8FB8\u8884\u8B2B\u6083\u261C\u7986\u8900\u6902\u7980\u5857\u799D\u7B39\u793C\u79A9\u6E2A\u7126\u3EA8\u79C6\u910D\u79D4";
+
+ private static boolean readBit(int i) {
+ return (ASTRALNESS.charAt(i >> 4) & (1 << (i & 0xF))) != 0;
+ }
+
+ static char lowBits(int pointer) {
+ if (pointer < 942) {
+ return '\u0000';
+ }
+ if (pointer < 1068) {
+ return TABLE0.charAt(pointer - 942);
+ }
+ if (pointer < 1099) {
+ return '\u0000';
+ }
+ if (pointer < 1172) {
+ return TABLE1.charAt(pointer - 1099);
+ }
+ if (pointer < 1256) {
+ return '\u0000';
+ }
+ if (pointer < 5466) {
+ return TABLE2.charAt(pointer - 1256);
+ }
+ if (pointer < 5495) {
+ return '\u0000';
+ }
+ if (pointer < 11214) {
+ return TABLE3.charAt(pointer - 5495);
+ }
+ if (pointer < 11254) {
+ return '\u0000';
+ }
+ if (pointer < 19782) {
+ return TABLE4.charAt(pointer - 11254);
+ }
+ return '\u0000';
+ }
+
+ static boolean isAstral(int pointer) {
+ if (pointer < 947) {
+ return false;
+ }
+ if (pointer < 1119) {
+ return readBit(0 + (pointer - 947));
+ }
+ if (pointer < 1256) {
+ return false;
+ }
+ if (pointer < 1269) {
+ return readBit(172 + (pointer - 1256));
+ }
+ if (pointer < 1336) {
+ return false;
+ }
+ if (pointer < 1364) {
+ return readBit(185 + (pointer - 1336));
+ }
+ if (pointer < 1413) {
+ return false;
+ }
+ if (pointer < 1912) {
+ return readBit(213 + (pointer - 1413));
+ }
+ if (pointer < 2012) {
+ return false;
+ }
+ if (pointer < 3800) {
+ return readBit(712 + (pointer - 2012));
+ }
+ if (pointer < 3883) {
+ return false;
+ }
+ if (pointer == 3883) {
+ return true;
+ }
+ if (pointer < 3985) {
+ return false;
+ }
+ if (pointer < 5024) {
+ return readBit(2501 + (pointer - 3985));
+ }
+ if (pointer < 11205) {
+ return false;
+ }
+ if (pointer < 11214) {
+ return readBit(3540 + (pointer - 11205));
+ }
+ if (pointer < 18997) {
+ return false;
+ }
+ if (pointer < 19782) {
+ return readBit(3549 + (pointer - 18997));
+ }
+ return false;
+ }
+
+ public static int findPointer(char lowBits, boolean isAstral) {
+ if (!isAstral) {
+ switch (lowBits) {
+ case 0x2550:
+ return 18991;
+ case 0x255E:
+ return 18975;
+ case 0x2561:
+ return 18977;
+ case 0x256A:
+ return 18976;
+ case 0x5341:
+ return 5512;
+ case 0x5345:
+ return 5599;
+ default:
+ break;
+ }
+ }
+ for (int i = 3768; i < TABLE2.length(); i++) {
+ if (TABLE2.charAt(i) == lowBits) {
+ int pointer = i + 1256;
+ if (isAstral == isAstral(pointer)) {
+ return pointer;
+ }
+ }
+ }
+ for (int i = 0; i < TABLE3.length(); i++) {
+ if (TABLE3.charAt(i) == lowBits) {
+ int pointer = i + 5495;
+ if (isAstral == isAstral(pointer)) {
+ return pointer;
+ }
+ }
+ }
+ for (int i = 0; i < TABLE4.length(); i++) {
+ if (TABLE4.charAt(i) == lowBits) {
+ int pointer = i + 11254;
+ if (isAstral == isAstral(pointer)) {
+ return pointer;
+ }
+ }
+ }
+ return 0;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java
new file mode 100644
index 000000000..cc56b892f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CoderResult;
+
+public class Big5Decoder extends Decoder {
+
+ private int big5Lead = 0;
+
+ private char pendingTrail = '\u0000';
+
+ protected Big5Decoder(Charset cs) {
+ super(cs, 0.5f, 1.0f);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ assert !(this.report && (big5Lead != 0)):
+ "When reporting, this method should never return with big5Lead set.";
+ if (pendingTrail != '\u0000') {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = '\u0000';
+ }
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = ((int) in.get() & 0xFF);
+ if (big5Lead == 0) {
+ if (b <= 0x7F) {
+ out.put((char) b);
+ continue;
+ }
+ if (b >= 0x81 && b <= 0xFE) {
+ if (this.report && !in.hasRemaining()) {
+ // The Java API is badly documented. Need to do this
+ // crazy thing and hope the caller knows about the
+ // undocumented aspects of the API!
+ in.position(in.position() - 1);
+ return CoderResult.UNDERFLOW;
+ }
+ big5Lead = b;
+ continue;
+ }
+ if (this.report) {
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put('\uFFFD');
+ continue;
+ }
+ int lead = big5Lead;
+ big5Lead = 0;
+ int offset = (b < 0x7F) ? 0x40 : 0x62;
+ if ((b >= 0x40 && b <= 0x7E) || (b >= 0xA1 && b <= 0xFE)) {
+ int pointer = (lead - 0x81) * 157 + (b - offset);
+ char outTrail;
+ switch (pointer) {
+ case 1133:
+ out.put('\u00CA');
+ outTrail = '\u0304';
+ break;
+ case 1135:
+ out.put('\u00CA');
+ outTrail = '\u030C';
+ break;
+ case 1164:
+ out.put('\u00EA');
+ outTrail = '\u0304';
+ break;
+ case 1166:
+ out.put('\u00EA');
+ outTrail = '\u030C';
+ break;
+ default:
+ char lowBits = Big5Data.lowBits(pointer);
+ if (lowBits == '\u0000') {
+ // The following |if| block fixes
+ // https://github.com/whatwg/encoding/issues/5
+ if (b <= 0x7F) {
+ // prepend byte to stream
+ // Always legal, since we've always just read a byte
+ // if we come here.
+ in.position(in.position() - 1);
+ }
+ if (this.report) {
+ // This can go past the start of the buffer
+ // if the caller does not conform to the
+ // undocumented aspects of the API.
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2);
+ }
+ out.put('\uFFFD');
+ continue;
+ }
+ if (Big5Data.isAstral(pointer)) {
+ int codePoint = lowBits | 0x20000;
+ out.put((char) (0xD7C0 + (codePoint >> 10)));
+ outTrail = (char) (0xDC00 + (codePoint & 0x3FF));
+ break;
+ }
+ out.put(lowBits);
+ continue;
+ }
+ if (!out.hasRemaining()) {
+ pendingTrail = outTrail;
+ return CoderResult.OVERFLOW;
+ }
+ out.put(outTrail);
+ continue;
+ }
+ // pointer is null
+ if (b <= 0x7F) {
+ // prepend byte to stream
+ // Always legal, since we've always just read a byte
+ // if we come here.
+ in.position(in.position() - 1);
+ }
+ if (this.report) {
+ // if position() == 0, the caller is not using the
+ // undocumented part of the API right and the line
+ // below will throw!
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2);
+ }
+ out.put('\uFFFD');
+ continue;
+ }
+ }
+
+ @Override protected CoderResult implFlush(CharBuffer out) {
+ if (pendingTrail != '\u0000') {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = '\u0000';
+ }
+ if (big5Lead != 0) {
+ assert !this.report: "How come big5Lead got to be non-zero when decodeLoop() returned in the reporting mode?";
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put('\uFFFD');
+ big5Lead = 0;
+ }
+ return CoderResult.UNDERFLOW;
+ }
+
+ @Override protected void implReset() {
+ big5Lead = 0;
+ pendingTrail = '\u0000';
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java
new file mode 100644
index 000000000..de5132151
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CoderResult;
+
+public class Big5Encoder extends Encoder {
+
+ private char utf16Lead = '\u0000';
+
+ private byte pendingTrail = 0;
+
+ protected Big5Encoder(Charset cs) {
+ super(cs, 1.5f, 2.0f);
+ }
+
+ @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+ assert !((this.reportMalformed || this.reportUnmappable) && (utf16Lead != '\u0000')):
+ "When reporting, this method should never return with utf16Lead set.";
+ if (pendingTrail != 0) {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = 0;
+ }
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ boolean isAstral; // true means Plane 2, false means BMP
+ char lowBits; // The low 16 bits of the code point
+ char codeUnit = in.get();
+ int highBits = (codeUnit & 0xFC00);
+ if (highBits == 0xD800) {
+ // high surrogate
+ if (utf16Lead != '\u0000') {
+ // High surrogate follows another high surrogate. The
+ // *previous* code unit is in error.
+ if (this.reportMalformed) {
+ // The caller had better adhere to the API contract.
+ // Otherwise, this may throw.
+ in.position(in.position() - 2);
+ utf16Lead = '\u0000';
+ return CoderResult.malformedForLength(1);
+ }
+ out.put((byte) '?');
+ }
+ utf16Lead = codeUnit;
+ continue;
+ }
+ if (highBits == 0xDC00) {
+ // low surrogate
+ if (utf16Lead == '\u0000') {
+ // Got low surrogate without a previous high surrogate
+ if (this.reportMalformed) {
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put((byte) '?');
+ continue;
+ }
+ int codePoint = (utf16Lead << 10) + codeUnit - 56613888;
+ utf16Lead = '\u0000';
+ // Plane 2 is the only astral plane that has potentially
+ // Big5-encodable characters.
+ if ((0xFF0000 & codePoint) != 0x20000) {
+ if (this.reportUnmappable) {
+ in.position(in.position() - 2);
+ return CoderResult.unmappableForLength(2);
+ }
+ out.put((byte) '?');
+ continue;
+ }
+ isAstral = true;
+ lowBits = (char)(codePoint & 0xFFFF);
+ } else {
+ // not a surrogate
+ if (utf16Lead != '\u0000') {
+ // Non-surrogate follows a high surrogate. The *previous*
+ // code unit is in error.
+ utf16Lead = '\u0000';
+ if (this.reportMalformed) {
+ // The caller had better adhere to the API contract.
+ // Otherwise, this may throw.
+ in.position(in.position() - 2);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put((byte) '?');
+ // Let's unconsume this code unit and reloop in order to
+ // re-check if the output buffer still has space.
+ in.position(in.position() - 1);
+ continue;
+ }
+ isAstral = false;
+ lowBits = codeUnit;
+ }
+ // isAstral now tells us if we have a Plane 2 or a BMP character.
+ // lowBits tells us the low 16 bits.
+ // After all the above setup to deal with UTF-16, we are now
+ // finally ready to follow the spec.
+ if (!isAstral && lowBits <= 0x7F) {
+ out.put((byte)lowBits);
+ continue;
+ }
+ int pointer = Big5Data.findPointer(lowBits, isAstral);
+ if (pointer == 0) {
+ if (this.reportUnmappable) {
+ if (isAstral) {
+ in.position(in.position() - 2);
+ return CoderResult.unmappableForLength(2);
+ }
+ in.position(in.position() - 1);
+ return CoderResult.unmappableForLength(1);
+ }
+ out.put((byte)'?');
+ continue;
+ }
+ int lead = pointer / 157 + 0x81;
+ int trail = pointer % 157;
+ if (trail < 0x3F) {
+ trail += 0x40;
+ } else {
+ trail += 0x62;
+ }
+ out.put((byte)lead);
+ if (!out.hasRemaining()) {
+ pendingTrail = (byte)trail;
+ return CoderResult.OVERFLOW;
+ }
+ out.put((byte)trail);
+ continue;
+ }
+ }
+
+ @Override protected CoderResult implFlush(ByteBuffer out) {
+ if (pendingTrail != 0) {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = 0;
+ }
+ if (utf16Lead != '\u0000') {
+ assert !this.reportMalformed: "How come utf16Lead got to be non-zero when decodeLoop() returned in the reporting mode?";
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put((byte)'?');
+ utf16Lead = '\u0000';
+ }
+ return CoderResult.UNDERFLOW;
+ }
+
+ @Override protected void implReset() {
+ utf16Lead = '\u0000';
+ pendingTrail = 0;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java
new file mode 100644
index 000000000..41e06c63a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+
+public abstract class Decoder extends CharsetDecoder {
+
+ protected boolean report = true;
+
+ protected Decoder(Charset cs, float averageCharsPerByte, float maxCharsPerByte) {
+ super(cs, averageCharsPerByte, maxCharsPerByte);
+ }
+
+ @Override protected final void implOnMalformedInput(CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ this.report = false;
+ return;
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ this.report = true;
+ return;
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override protected final void implOnUnmappableCharacter(
+ CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ return; // We don't actually care, since there are no unmappables.
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ return; // We don't actually care, since there are no unmappables.
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override protected final void implReplaceWith(String newReplacement) {
+ if (!"\uFFFD".equals(newReplacement)) {
+ throw new IllegalArgumentException("Only U+FFFD is allowed as the replacement.");
+ }
+ }
+
+ // TODO: Check if the JDK decoders reset the reporting state on reset()
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java
new file mode 100644
index 000000000..6fc011ed2
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+
+public abstract class Encoder extends CharsetEncoder {
+
+ boolean reportMalformed = true;
+
+ boolean reportUnmappable = true;
+
+ protected Encoder(Charset cs, float averageBytesPerChar,
+ float maxBytesPerChar) {
+ super(cs, averageBytesPerChar, maxBytesPerChar);
+ }
+
+ @Override protected final void implOnMalformedInput(CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ this.reportMalformed = false;
+ return;
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ this.reportUnmappable = true;
+ return;
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override protected final void implOnUnmappableCharacter(
+ CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ this.reportUnmappable = false;
+ return;
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ this.reportMalformed = true;
+ return;
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override public boolean isLegalReplacement(byte[] repl) {
+ if (repl == null) {
+ return false;
+ }
+ if (repl.length != 1) {
+ return false;
+ }
+ if (repl[0] != '?') {
+ return false;
+ }
+ return true;
+ }
+
+ @Override protected final void implReplaceWith(byte[] newReplacement) {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java
new file mode 100644
index 000000000..6e59ef7c7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java
@@ -0,0 +1,886 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Represents an <a href="https://encoding.spec.whatwg.org/#encoding">encoding</a>
+ * as defined in the <a href="https://encoding.spec.whatwg.org/">Encoding
+ * Standard</a>, provides access to each encoding defined in the Encoding
+ * Standard via a static constant and provides the
+ * "<a href="https://encoding.spec.whatwg.org/#concept-encoding-get">get an
+ * encoding</a>" algorithm defined in the Encoding Standard.
+ *
+ * <p>This class inherits from {@link Charset} to allow the Encoding
+ * Standard-compliant encodings to be used in contexts that support
+ * <code>Charset</code> instances. However, by design, the Encoding
+ * Standard-compliant encodings are not supplied via a {@link CharsetProvider}
+ * and, therefore, are not available via and do not interfere with the static
+ * methods provided by <code>Charset</code>. (This class provides methods of
+ * the same name to hide each static method of <code>Charset</code> to help
+ * avoid accidental calls to the static methods of the superclass when working
+ * with Encoding Standard-compliant encodings.)
+ *
+ * <p>When an application needs to use a particular encoding, such as utf-8
+ * or windows-1252, the corresponding constant, i.e.
+ * {@link #UTF_8 Encoding.UTF_8} and {@link #WINDOWS_1252 Encoding.WINDOWS_1252}
+ * respectively, should be used. However, when the application receives an
+ * encoding label from external input, the method {@link #forName(String)
+ * forName()} should be used to obtain the object representing the encoding
+ * identified by the label. In contexts where labels that map to the
+ * <a href="https://encoding.spec.whatwg.org/#replacement">replacement
+ * encoding</a> should be treated as unknown, the method {@link
+ * #forNameNoReplacement(String) forNameNoReplacement()} should be used instead.
+ *
+ *
+ * @author hsivonen
+ */
+public abstract class Encoding extends Charset {
+
+ private static final String[] LABELS = {
+ "866",
+ "ansi_x3.4-1968",
+ "arabic",
+ "ascii",
+ "asmo-708",
+ "big5",
+ "big5-hkscs",
+ "chinese",
+ "cn-big5",
+ "cp1250",
+ "cp1251",
+ "cp1252",
+ "cp1253",
+ "cp1254",
+ "cp1255",
+ "cp1256",
+ "cp1257",
+ "cp1258",
+ "cp819",
+ "cp866",
+ "csbig5",
+ "cseuckr",
+ "cseucpkdfmtjapanese",
+ "csgb2312",
+ "csibm866",
+ "csiso2022jp",
+ "csiso2022kr",
+ "csiso58gb231280",
+ "csiso88596e",
+ "csiso88596i",
+ "csiso88598e",
+ "csiso88598i",
+ "csisolatin1",
+ "csisolatin2",
+ "csisolatin3",
+ "csisolatin4",
+ "csisolatin5",
+ "csisolatin6",
+ "csisolatin9",
+ "csisolatinarabic",
+ "csisolatincyrillic",
+ "csisolatingreek",
+ "csisolatinhebrew",
+ "cskoi8r",
+ "csksc56011987",
+ "csmacintosh",
+ "csshiftjis",
+ "cyrillic",
+ "dos-874",
+ "ecma-114",
+ "ecma-118",
+ "elot_928",
+ "euc-jp",
+ "euc-kr",
+ "gb18030",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "greek",
+ "greek8",
+ "hebrew",
+ "hz-gb-2312",
+ "ibm819",
+ "ibm866",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-jp",
+ "iso-2022-kr",
+ "iso-8859-1",
+ "iso-8859-10",
+ "iso-8859-11",
+ "iso-8859-13",
+ "iso-8859-14",
+ "iso-8859-15",
+ "iso-8859-16",
+ "iso-8859-2",
+ "iso-8859-3",
+ "iso-8859-4",
+ "iso-8859-5",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-8859-7",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-8859-8-i",
+ "iso-8859-9",
+ "iso-ir-100",
+ "iso-ir-101",
+ "iso-ir-109",
+ "iso-ir-110",
+ "iso-ir-126",
+ "iso-ir-127",
+ "iso-ir-138",
+ "iso-ir-144",
+ "iso-ir-148",
+ "iso-ir-149",
+ "iso-ir-157",
+ "iso-ir-58",
+ "iso8859-1",
+ "iso8859-10",
+ "iso8859-11",
+ "iso8859-13",
+ "iso8859-14",
+ "iso8859-15",
+ "iso8859-2",
+ "iso8859-3",
+ "iso8859-4",
+ "iso8859-5",
+ "iso8859-6",
+ "iso8859-7",
+ "iso8859-8",
+ "iso8859-9",
+ "iso88591",
+ "iso885910",
+ "iso885911",
+ "iso885913",
+ "iso885914",
+ "iso885915",
+ "iso88592",
+ "iso88593",
+ "iso88594",
+ "iso88595",
+ "iso88596",
+ "iso88597",
+ "iso88598",
+ "iso88599",
+ "iso_8859-1",
+ "iso_8859-15",
+ "iso_8859-1:1987",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "iso_8859-5",
+ "iso_8859-5:1988",
+ "iso_8859-6",
+ "iso_8859-6:1987",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8-ru",
+ "koi8-u",
+ "koi8_r",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "l1",
+ "l2",
+ "l3",
+ "l4",
+ "l5",
+ "l6",
+ "l9",
+ "latin1",
+ "latin2",
+ "latin3",
+ "latin4",
+ "latin5",
+ "latin6",
+ "logical",
+ "mac",
+ "macintosh",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "sun_eu_greek",
+ "tis-620",
+ "unicode-1-1-utf-8",
+ "us-ascii",
+ "utf-16",
+ "utf-16be",
+ "utf-16le",
+ "utf-8",
+ "utf8",
+ "visual",
+ "windows-1250",
+ "windows-1251",
+ "windows-1252",
+ "windows-1253",
+ "windows-1254",
+ "windows-1255",
+ "windows-1256",
+ "windows-1257",
+ "windows-1258",
+ "windows-31j",
+ "windows-874",
+ "windows-949",
+ "x-cp1250",
+ "x-cp1251",
+ "x-cp1252",
+ "x-cp1253",
+ "x-cp1254",
+ "x-cp1255",
+ "x-cp1256",
+ "x-cp1257",
+ "x-cp1258",
+ "x-euc-jp",
+ "x-gbk",
+ "x-mac-cyrillic",
+ "x-mac-roman",
+ "x-mac-ukrainian",
+ "x-sjis",
+ "x-user-defined",
+ "x-x-big5",
+ };
+
+ private static final Encoding[] ENCODINGS_FOR_LABELS = {
+ Ibm866.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso6.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso6.INSTANCE,
+ Big5.INSTANCE,
+ Big5.INSTANCE,
+ Gbk.INSTANCE,
+ Big5.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ Windows1252.INSTANCE,
+ Ibm866.INSTANCE,
+ Big5.INSTANCE,
+ EucKr.INSTANCE,
+ EucJp.INSTANCE,
+ Gbk.INSTANCE,
+ Ibm866.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Replacement.INSTANCE,
+ Gbk.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso15.INSTANCE,
+ Iso6.INSTANCE,
+ Iso5.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Koi8R.INSTANCE,
+ EucKr.INSTANCE,
+ Macintosh.INSTANCE,
+ ShiftJis.INSTANCE,
+ Iso5.INSTANCE,
+ Windows874.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ EucJp.INSTANCE,
+ EucKr.INSTANCE,
+ Gb18030.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Replacement.INSTANCE,
+ Windows1252.INSTANCE,
+ Ibm866.INSTANCE,
+ Replacement.INSTANCE,
+ Replacement.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Replacement.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso16.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso7.INSTANCE,
+ Iso6.INSTANCE,
+ Iso8.INSTANCE,
+ Iso5.INSTANCE,
+ Windows1254.INSTANCE,
+ EucKr.INSTANCE,
+ Iso10.INSTANCE,
+ Gbk.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso15.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1254.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8U.INSTANCE,
+ Koi8U.INSTANCE,
+ Koi8R.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso15.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso8I.INSTANCE,
+ Macintosh.INSTANCE,
+ Macintosh.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ Iso7.INSTANCE,
+ Windows874.INSTANCE,
+ Utf8.INSTANCE,
+ Windows1252.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf16Be.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf8.INSTANCE,
+ Utf8.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ ShiftJis.INSTANCE,
+ Windows874.INSTANCE,
+ EucKr.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ EucJp.INSTANCE,
+ Gbk.INSTANCE,
+ MacCyrillic.INSTANCE,
+ Macintosh.INSTANCE,
+ MacCyrillic.INSTANCE,
+ ShiftJis.INSTANCE,
+ UserDefined.INSTANCE,
+ Big5.INSTANCE,
+ };
+
+ private static final Encoding[] ENCODINGS = {
+ Big5.INSTANCE,
+ EucJp.INSTANCE,
+ EucKr.INSTANCE,
+ Gb18030.INSTANCE,
+ Gbk.INSTANCE,
+ Ibm866.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Iso10.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso16.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8U.INSTANCE,
+ Macintosh.INSTANCE,
+ Replacement.INSTANCE,
+ ShiftJis.INSTANCE,
+ Utf16Be.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf8.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ Windows874.INSTANCE,
+ MacCyrillic.INSTANCE,
+ UserDefined.INSTANCE,
+ };
+
+ /**
+ * The big5 encoding.
+ */
+ public static final Encoding BIG5 = Big5.INSTANCE;
+
+ /**
+ * The euc-jp encoding.
+ */
+ public static final Encoding EUC_JP = EucJp.INSTANCE;
+
+ /**
+ * The euc-kr encoding.
+ */
+ public static final Encoding EUC_KR = EucKr.INSTANCE;
+
+ /**
+ * The gb18030 encoding.
+ */
+ public static final Encoding GB18030 = Gb18030.INSTANCE;
+
+ /**
+ * The gbk encoding.
+ */
+ public static final Encoding GBK = Gbk.INSTANCE;
+
+ /**
+ * The ibm866 encoding.
+ */
+ public static final Encoding IBM866 = Ibm866.INSTANCE;
+
+ /**
+ * The iso-2022-jp encoding.
+ */
+ public static final Encoding ISO_2022_JP = Iso2022Jp.INSTANCE;
+
+ /**
+ * The iso-8859-10 encoding.
+ */
+ public static final Encoding ISO_8859_10 = Iso10.INSTANCE;
+
+ /**
+ * The iso-8859-13 encoding.
+ */
+ public static final Encoding ISO_8859_13 = Iso13.INSTANCE;
+
+ /**
+ * The iso-8859-14 encoding.
+ */
+ public static final Encoding ISO_8859_14 = Iso14.INSTANCE;
+
+ /**
+ * The iso-8859-15 encoding.
+ */
+ public static final Encoding ISO_8859_15 = Iso15.INSTANCE;
+
+ /**
+ * The iso-8859-16 encoding.
+ */
+ public static final Encoding ISO_8859_16 = Iso16.INSTANCE;
+
+ /**
+ * The iso-8859-2 encoding.
+ */
+ public static final Encoding ISO_8859_2 = Iso2.INSTANCE;
+
+ /**
+ * The iso-8859-3 encoding.
+ */
+ public static final Encoding ISO_8859_3 = Iso3.INSTANCE;
+
+ /**
+ * The iso-8859-4 encoding.
+ */
+ public static final Encoding ISO_8859_4 = Iso4.INSTANCE;
+
+ /**
+ * The iso-8859-5 encoding.
+ */
+ public static final Encoding ISO_8859_5 = Iso5.INSTANCE;
+
+ /**
+ * The iso-8859-6 encoding.
+ */
+ public static final Encoding ISO_8859_6 = Iso6.INSTANCE;
+
+ /**
+ * The iso-8859-7 encoding.
+ */
+ public static final Encoding ISO_8859_7 = Iso7.INSTANCE;
+
+ /**
+ * The iso-8859-8 encoding.
+ */
+ public static final Encoding ISO_8859_8 = Iso8.INSTANCE;
+
+ /**
+ * The iso-8859-8-i encoding.
+ */
+ public static final Encoding ISO_8859_8_I = Iso8I.INSTANCE;
+
+ /**
+ * The koi8-r encoding.
+ */
+ public static final Encoding KOI8_R = Koi8R.INSTANCE;
+
+ /**
+ * The koi8-u encoding.
+ */
+ public static final Encoding KOI8_U = Koi8U.INSTANCE;
+
+ /**
+ * The macintosh encoding.
+ */
+ public static final Encoding MACINTOSH = Macintosh.INSTANCE;
+
+ /**
+ * The replacement encoding.
+ */
+ public static final Encoding REPLACEMENT = Replacement.INSTANCE;
+
+ /**
+ * The shift_jis encoding.
+ */
+ public static final Encoding SHIFT_JIS = ShiftJis.INSTANCE;
+
+ /**
+ * The utf-16be encoding.
+ */
+ public static final Encoding UTF_16BE = Utf16Be.INSTANCE;
+
+ /**
+ * The utf-16le encoding.
+ */
+ public static final Encoding UTF_16LE = Utf16Le.INSTANCE;
+
+ /**
+ * The utf-8 encoding.
+ */
+ public static final Encoding UTF_8 = Utf8.INSTANCE;
+
+ /**
+ * The windows-1250 encoding.
+ */
+ public static final Encoding WINDOWS_1250 = Windows1250.INSTANCE;
+
+ /**
+ * The windows-1251 encoding.
+ */
+ public static final Encoding WINDOWS_1251 = Windows1251.INSTANCE;
+
+ /**
+ * The windows-1252 encoding.
+ */
+ public static final Encoding WINDOWS_1252 = Windows1252.INSTANCE;
+
+ /**
+ * The windows-1253 encoding.
+ */
+ public static final Encoding WINDOWS_1253 = Windows1253.INSTANCE;
+
+ /**
+ * The windows-1254 encoding.
+ */
+ public static final Encoding WINDOWS_1254 = Windows1254.INSTANCE;
+
+ /**
+ * The windows-1255 encoding.
+ */
+ public static final Encoding WINDOWS_1255 = Windows1255.INSTANCE;
+
+ /**
+ * The windows-1256 encoding.
+ */
+ public static final Encoding WINDOWS_1256 = Windows1256.INSTANCE;
+
+ /**
+ * The windows-1257 encoding.
+ */
+ public static final Encoding WINDOWS_1257 = Windows1257.INSTANCE;
+
+ /**
+ * The windows-1258 encoding.
+ */
+ public static final Encoding WINDOWS_1258 = Windows1258.INSTANCE;
+
+ /**
+ * The windows-874 encoding.
+ */
+ public static final Encoding WINDOWS_874 = Windows874.INSTANCE;
+
+ /**
+ * The x-mac-cyrillic encoding.
+ */
+ public static final Encoding X_MAC_CYRILLIC = MacCyrillic.INSTANCE;
+
+ /**
+ * The x-user-defined encoding.
+ */
+ public static final Encoding X_USER_DEFINED = UserDefined.INSTANCE;
+
+
+private static SortedMap<String, Charset> encodings = null;
+
+ protected Encoding(String canonicalName, String[] aliases) {
+ super(canonicalName, aliases);
+ }
+
+ private enum State {
+ HEAD, LABEL, TAIL
+ };
+
+ public static Encoding forName(String label) {
+ if (label == null) {
+ throw new IllegalArgumentException("Label must not be null.");
+ }
+ if (label.length() == 0) {
+ throw new IllegalCharsetNameException(label);
+ }
+ // First try the fast path
+ int index = Arrays.binarySearch(LABELS, label);
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ // Else, slow path
+ StringBuilder sb = new StringBuilder();
+ State state = State.HEAD;
+ for (int i = 0; i < label.length(); i++) {
+ char c = label.charAt(i);
+ if ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t')
+ || (c == '\u000C')) {
+ if (state == State.LABEL) {
+ state = State.TAIL;
+ }
+ continue;
+ }
+ if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if ((c == '-') || (c == '+') || (c == '.') || (c == ':')
+ || (c == '_')) {
+ switch (state) {
+ case LABEL:
+ sb.append(c);
+ continue;
+ case HEAD:
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ throw new IllegalCharsetNameException(label);
+ }
+ index = Arrays.binarySearch(LABELS, sb.toString());
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ throw new UnsupportedCharsetException(label);
+ }
+
+ public static Encoding forNameNoReplacement(String label) {
+ Encoding encoding = Encoding.forName(label);
+ if (encoding == Encoding.REPLACEMENT) {
+ throw new UnsupportedCharsetException(label);
+ }
+ return encoding;
+ }
+
+ public static boolean isSupported(String label) {
+ try {
+ Encoding.forName(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static boolean isSupportedNoReplacement(String label) {
+ try {
+ Encoding.forNameNoReplacement(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static SortedMap<String, Charset> availableCharsets() {
+ if (encodings == null) {
+ TreeMap<String, Charset> map = new TreeMap<String, Charset>();
+ for (Encoding encoding : ENCODINGS) {
+ map.put(encoding.name(), encoding);
+ }
+ encodings = Collections.unmodifiableSortedMap(map);
+ }
+ return encodings;
+ }
+
+ public static Encoding defaultCharset() {
+ return WINDOWS_1252;
+ }
+
+ @Override public boolean canEncode() {
+ return false;
+ }
+
+ @Override public boolean contains(Charset cs) {
+ return false;
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ throw new UnsupportedOperationException("Encoder not implemented.");
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java
new file mode 100644
index 000000000..05fbef810
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class EucJp extends Encoding {
+
+ private static final String[] LABELS = {
+ "cseucpkdfmtjapanese",
+ "euc-jp",
+ "x-euc-jp"
+ };
+
+ private static final String NAME = "euc-jp";
+
+ static final EucJp INSTANCE = new EucJp();
+
+ private EucJp() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java
new file mode 100644
index 000000000..a3923e224
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class EucKr extends Encoding {
+
+ private static final String[] LABELS = {
+ "cseuckr",
+ "csksc56011987",
+ "euc-kr",
+ "iso-ir-149",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "windows-949"
+ };
+
+ private static final String NAME = "euc-kr";
+
+ static final EucKr INSTANCE = new EucKr();
+
+ private EucKr() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java
new file mode 100644
index 000000000..34a1f36b5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CoderResult;
+
+public final class FallibleSingleByteDecoder extends InfallibleSingleByteDecoder {
+
+ public FallibleSingleByteDecoder(Encoding cs, char[] upperHalf) {
+ super(cs, upperHalf);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ if (!this.report) {
+ return super.decodeLoop(in, out);
+ } else {
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = (int) in.get();
+ if (b >= 0) {
+ out.put((char) b);
+ } else {
+ char mapped = this.upperHalf[b + 128];
+ if (mapped == '\uFFFD') {
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put(mapped);
+ }
+ }
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java
new file mode 100644
index 000000000..fcb090dde
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Gb18030 extends Encoding {
+
+ private static final String[] LABELS = {
+ "gb18030"
+ };
+
+ private static final String NAME = "gb18030";
+
+ static final Gb18030 INSTANCE = new Gb18030();
+
+ private Gb18030() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java
new file mode 100644
index 000000000..2dc3694ed
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Gbk extends Encoding {
+
+ private static final String[] LABELS = {
+ "chinese",
+ "csgb2312",
+ "csiso58gb231280",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "iso-ir-58",
+ "x-gbk"
+ };
+
+ private static final String NAME = "gbk";
+
+ static final Gbk INSTANCE = new Gbk();
+
+ private Gbk() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName("gb18030").newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java
new file mode 100644
index 000000000..037e62835
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Ibm866 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u2591',
+ '\u2592',
+ '\u2593',
+ '\u2502',
+ '\u2524',
+ '\u2561',
+ '\u2562',
+ '\u2556',
+ '\u2555',
+ '\u2563',
+ '\u2551',
+ '\u2557',
+ '\u255d',
+ '\u255c',
+ '\u255b',
+ '\u2510',
+ '\u2514',
+ '\u2534',
+ '\u252c',
+ '\u251c',
+ '\u2500',
+ '\u253c',
+ '\u255e',
+ '\u255f',
+ '\u255a',
+ '\u2554',
+ '\u2569',
+ '\u2566',
+ '\u2560',
+ '\u2550',
+ '\u256c',
+ '\u2567',
+ '\u2568',
+ '\u2564',
+ '\u2565',
+ '\u2559',
+ '\u2558',
+ '\u2552',
+ '\u2553',
+ '\u256b',
+ '\u256a',
+ '\u2518',
+ '\u250c',
+ '\u2588',
+ '\u2584',
+ '\u258c',
+ '\u2590',
+ '\u2580',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u044f',
+ '\u0401',
+ '\u0451',
+ '\u0404',
+ '\u0454',
+ '\u0407',
+ '\u0457',
+ '\u040e',
+ '\u045e',
+ '\u00b0',
+ '\u2219',
+ '\u00b7',
+ '\u221a',
+ '\u2116',
+ '\u00a4',
+ '\u25a0',
+ '\u00a0'
+ };
+
+ private static final String[] LABELS = {
+ "866",
+ "cp866",
+ "csibm866",
+ "ibm866"
+ };
+
+ private static final String NAME = "ibm866";
+
+ static final Encoding INSTANCE = new Ibm866();
+
+ private Ibm866() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java
new file mode 100644
index 000000000..7cc63072c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CoderResult;
+
+public class InfallibleSingleByteDecoder extends Decoder {
+
+ protected final char[] upperHalf;
+
+ protected InfallibleSingleByteDecoder(Encoding cs, char[] upperHalf) {
+ super(cs, 1.0f, 1.0f);
+ this.upperHalf = upperHalf;
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ // TODO figure out if it's worthwhile to optimize the case where both
+ // buffers are array-backed.
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = (int) in.get();
+ if (b >= 0) {
+ out.put((char) b);
+ } else {
+ out.put(this.upperHalf[b + 128]);
+ }
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java
new file mode 100644
index 000000000..895cb5eed
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso10 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u0112',
+ '\u0122',
+ '\u012a',
+ '\u0128',
+ '\u0136',
+ '\u00a7',
+ '\u013b',
+ '\u0110',
+ '\u0160',
+ '\u0166',
+ '\u017d',
+ '\u00ad',
+ '\u016a',
+ '\u014a',
+ '\u00b0',
+ '\u0105',
+ '\u0113',
+ '\u0123',
+ '\u012b',
+ '\u0129',
+ '\u0137',
+ '\u00b7',
+ '\u013c',
+ '\u0111',
+ '\u0161',
+ '\u0167',
+ '\u017e',
+ '\u2015',
+ '\u016b',
+ '\u014b',
+ '\u0100',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u012e',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u0116',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00d0',
+ '\u0145',
+ '\u014c',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u0168',
+ '\u00d8',
+ '\u0172',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u00de',
+ '\u00df',
+ '\u0101',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u012f',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u0117',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u00f0',
+ '\u0146',
+ '\u014d',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u0169',
+ '\u00f8',
+ '\u0173',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u00fe',
+ '\u0138'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin6",
+ "iso-8859-10",
+ "iso-ir-157",
+ "iso8859-10",
+ "iso885910",
+ "l6",
+ "latin6"
+ };
+
+ private static final String NAME = "iso-8859-10";
+
+ static final Encoding INSTANCE = new Iso10();
+
+ private Iso10() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java
new file mode 100644
index 000000000..60e6f5339
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso13 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u201d',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u201e',
+ '\u00a6',
+ '\u00a7',
+ '\u00d8',
+ '\u00a9',
+ '\u0156',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00c6',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u201c',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00f8',
+ '\u00b9',
+ '\u0157',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00e6',
+ '\u0104',
+ '\u012e',
+ '\u0100',
+ '\u0106',
+ '\u00c4',
+ '\u00c5',
+ '\u0118',
+ '\u0112',
+ '\u010c',
+ '\u00c9',
+ '\u0179',
+ '\u0116',
+ '\u0122',
+ '\u0136',
+ '\u012a',
+ '\u013b',
+ '\u0160',
+ '\u0143',
+ '\u0145',
+ '\u00d3',
+ '\u014c',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u0172',
+ '\u0141',
+ '\u015a',
+ '\u016a',
+ '\u00dc',
+ '\u017b',
+ '\u017d',
+ '\u00df',
+ '\u0105',
+ '\u012f',
+ '\u0101',
+ '\u0107',
+ '\u00e4',
+ '\u00e5',
+ '\u0119',
+ '\u0113',
+ '\u010d',
+ '\u00e9',
+ '\u017a',
+ '\u0117',
+ '\u0123',
+ '\u0137',
+ '\u012b',
+ '\u013c',
+ '\u0161',
+ '\u0144',
+ '\u0146',
+ '\u00f3',
+ '\u014d',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u0173',
+ '\u0142',
+ '\u015b',
+ '\u016b',
+ '\u00fc',
+ '\u017c',
+ '\u017e',
+ '\u2019'
+ };
+
+ private static final String[] LABELS = {
+ "iso-8859-13",
+ "iso8859-13",
+ "iso885913"
+ };
+
+ private static final String NAME = "iso-8859-13";
+
+ static final Encoding INSTANCE = new Iso13();
+
+ private Iso13() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java
new file mode 100644
index 000000000..d4a180e6e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso14 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u1e02',
+ '\u1e03',
+ '\u00a3',
+ '\u010a',
+ '\u010b',
+ '\u1e0a',
+ '\u00a7',
+ '\u1e80',
+ '\u00a9',
+ '\u1e82',
+ '\u1e0b',
+ '\u1ef2',
+ '\u00ad',
+ '\u00ae',
+ '\u0178',
+ '\u1e1e',
+ '\u1e1f',
+ '\u0120',
+ '\u0121',
+ '\u1e40',
+ '\u1e41',
+ '\u00b6',
+ '\u1e56',
+ '\u1e81',
+ '\u1e57',
+ '\u1e83',
+ '\u1e60',
+ '\u1ef3',
+ '\u1e84',
+ '\u1e85',
+ '\u1e61',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u0174',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u1e6a',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u0176',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u0175',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u1e6b',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u0177',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "iso-8859-14",
+ "iso8859-14",
+ "iso885914"
+ };
+
+ private static final String NAME = "iso-8859-14";
+
+ static final Encoding INSTANCE = new Iso14();
+
+ private Iso14() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java
new file mode 100644
index 000000000..a60e4b6ef
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso15 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u20ac',
+ '\u00a5',
+ '\u0160',
+ '\u00a7',
+ '\u0161',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u017d',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u017e',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u0152',
+ '\u0153',
+ '\u0178',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00d0',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u00de',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u00f0',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u00fe',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin9",
+ "iso-8859-15",
+ "iso8859-15",
+ "iso885915",
+ "iso_8859-15",
+ "l9"
+ };
+
+ private static final String NAME = "iso-8859-15";
+
+ static final Encoding INSTANCE = new Iso15();
+
+ private Iso15() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java
new file mode 100644
index 000000000..5eb1926db
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso16 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u0105',
+ '\u0141',
+ '\u20ac',
+ '\u201e',
+ '\u0160',
+ '\u00a7',
+ '\u0161',
+ '\u00a9',
+ '\u0218',
+ '\u00ab',
+ '\u0179',
+ '\u00ad',
+ '\u017a',
+ '\u017b',
+ '\u00b0',
+ '\u00b1',
+ '\u010c',
+ '\u0142',
+ '\u017d',
+ '\u201d',
+ '\u00b6',
+ '\u00b7',
+ '\u017e',
+ '\u010d',
+ '\u0219',
+ '\u00bb',
+ '\u0152',
+ '\u0153',
+ '\u0178',
+ '\u017c',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u0106',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u0110',
+ '\u0143',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u0150',
+ '\u00d6',
+ '\u015a',
+ '\u0170',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u0118',
+ '\u021a',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u0107',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u0111',
+ '\u0144',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u0151',
+ '\u00f6',
+ '\u015b',
+ '\u0171',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u0119',
+ '\u021b',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "iso-8859-16"
+ };
+
+ private static final String NAME = "iso-8859-16";
+
+ static final Encoding INSTANCE = new Iso16();
+
+ private Iso16() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java
new file mode 100644
index 000000000..7a5f6322a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso2 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u02d8',
+ '\u0141',
+ '\u00a4',
+ '\u013d',
+ '\u015a',
+ '\u00a7',
+ '\u00a8',
+ '\u0160',
+ '\u015e',
+ '\u0164',
+ '\u0179',
+ '\u00ad',
+ '\u017d',
+ '\u017b',
+ '\u00b0',
+ '\u0105',
+ '\u02db',
+ '\u0142',
+ '\u00b4',
+ '\u013e',
+ '\u015b',
+ '\u02c7',
+ '\u00b8',
+ '\u0161',
+ '\u015f',
+ '\u0165',
+ '\u017a',
+ '\u02dd',
+ '\u017e',
+ '\u017c',
+ '\u0154',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u0139',
+ '\u0106',
+ '\u00c7',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u011a',
+ '\u00cd',
+ '\u00ce',
+ '\u010e',
+ '\u0110',
+ '\u0143',
+ '\u0147',
+ '\u00d3',
+ '\u00d4',
+ '\u0150',
+ '\u00d6',
+ '\u00d7',
+ '\u0158',
+ '\u016e',
+ '\u00da',
+ '\u0170',
+ '\u00dc',
+ '\u00dd',
+ '\u0162',
+ '\u00df',
+ '\u0155',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u013a',
+ '\u0107',
+ '\u00e7',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u011b',
+ '\u00ed',
+ '\u00ee',
+ '\u010f',
+ '\u0111',
+ '\u0144',
+ '\u0148',
+ '\u00f3',
+ '\u00f4',
+ '\u0151',
+ '\u00f6',
+ '\u00f7',
+ '\u0159',
+ '\u016f',
+ '\u00fa',
+ '\u0171',
+ '\u00fc',
+ '\u00fd',
+ '\u0163',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin2",
+ "iso-8859-2",
+ "iso-ir-101",
+ "iso8859-2",
+ "iso88592",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "l2",
+ "latin2"
+ };
+
+ private static final String NAME = "iso-8859-2";
+
+ static final Encoding INSTANCE = new Iso2();
+
+ private Iso2() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java
new file mode 100644
index 000000000..6ebadc947
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Iso2022Jp extends Encoding {
+
+ private static final String[] LABELS = {
+ "csiso2022jp",
+ "iso-2022-jp"
+ };
+
+ private static final String NAME = "iso-2022-jp";
+
+ static final Iso2022Jp INSTANCE = new Iso2022Jp();
+
+ private Iso2022Jp() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java
new file mode 100644
index 000000000..0667a160c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso3 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0126',
+ '\u02d8',
+ '\u00a3',
+ '\u00a4',
+ '\ufffd',
+ '\u0124',
+ '\u00a7',
+ '\u00a8',
+ '\u0130',
+ '\u015e',
+ '\u011e',
+ '\u0134',
+ '\u00ad',
+ '\ufffd',
+ '\u017b',
+ '\u00b0',
+ '\u0127',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u0125',
+ '\u00b7',
+ '\u00b8',
+ '\u0131',
+ '\u015f',
+ '\u011f',
+ '\u0135',
+ '\u00bd',
+ '\ufffd',
+ '\u017c',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\ufffd',
+ '\u00c4',
+ '\u010a',
+ '\u0108',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\ufffd',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u0120',
+ '\u00d6',
+ '\u00d7',
+ '\u011c',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u016c',
+ '\u015c',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\ufffd',
+ '\u00e4',
+ '\u010b',
+ '\u0109',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\ufffd',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u0121',
+ '\u00f6',
+ '\u00f7',
+ '\u011d',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u016d',
+ '\u015d',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin3",
+ "iso-8859-3",
+ "iso-ir-109",
+ "iso8859-3",
+ "iso88593",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "l3",
+ "latin3"
+ };
+
+ private static final String NAME = "iso-8859-3";
+
+ static final Encoding INSTANCE = new Iso3();
+
+ private Iso3() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java
new file mode 100644
index 000000000..b954869ab
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso4 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u0138',
+ '\u0156',
+ '\u00a4',
+ '\u0128',
+ '\u013b',
+ '\u00a7',
+ '\u00a8',
+ '\u0160',
+ '\u0112',
+ '\u0122',
+ '\u0166',
+ '\u00ad',
+ '\u017d',
+ '\u00af',
+ '\u00b0',
+ '\u0105',
+ '\u02db',
+ '\u0157',
+ '\u00b4',
+ '\u0129',
+ '\u013c',
+ '\u02c7',
+ '\u00b8',
+ '\u0161',
+ '\u0113',
+ '\u0123',
+ '\u0167',
+ '\u014a',
+ '\u017e',
+ '\u014b',
+ '\u0100',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u012e',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u0116',
+ '\u00cd',
+ '\u00ce',
+ '\u012a',
+ '\u0110',
+ '\u0145',
+ '\u014c',
+ '\u0136',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u0172',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u0168',
+ '\u016a',
+ '\u00df',
+ '\u0101',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u012f',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u0117',
+ '\u00ed',
+ '\u00ee',
+ '\u012b',
+ '\u0111',
+ '\u0146',
+ '\u014d',
+ '\u0137',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u0173',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u0169',
+ '\u016b',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin4",
+ "iso-8859-4",
+ "iso-ir-110",
+ "iso8859-4",
+ "iso88594",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "l4",
+ "latin4"
+ };
+
+ private static final String NAME = "iso-8859-4";
+
+ static final Encoding INSTANCE = new Iso4();
+
+ private Iso4() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java
new file mode 100644
index 000000000..13946cdbb
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso5 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0401',
+ '\u0402',
+ '\u0403',
+ '\u0404',
+ '\u0405',
+ '\u0406',
+ '\u0407',
+ '\u0408',
+ '\u0409',
+ '\u040a',
+ '\u040b',
+ '\u040c',
+ '\u00ad',
+ '\u040e',
+ '\u040f',
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u044f',
+ '\u2116',
+ '\u0451',
+ '\u0452',
+ '\u0453',
+ '\u0454',
+ '\u0455',
+ '\u0456',
+ '\u0457',
+ '\u0458',
+ '\u0459',
+ '\u045a',
+ '\u045b',
+ '\u045c',
+ '\u00a7',
+ '\u045e',
+ '\u045f'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatincyrillic",
+ "cyrillic",
+ "iso-8859-5",
+ "iso-ir-144",
+ "iso8859-5",
+ "iso88595",
+ "iso_8859-5",
+ "iso_8859-5:1988"
+ };
+
+ private static final String NAME = "iso-8859-5";
+
+ static final Encoding INSTANCE = new Iso5();
+
+ private Iso5() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java
new file mode 100644
index 000000000..02e6df8ba
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso6 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u00a4',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u060c',
+ '\u00ad',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u061b',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u061f',
+ '\ufffd',
+ '\u0621',
+ '\u0622',
+ '\u0623',
+ '\u0624',
+ '\u0625',
+ '\u0626',
+ '\u0627',
+ '\u0628',
+ '\u0629',
+ '\u062a',
+ '\u062b',
+ '\u062c',
+ '\u062d',
+ '\u062e',
+ '\u062f',
+ '\u0630',
+ '\u0631',
+ '\u0632',
+ '\u0633',
+ '\u0634',
+ '\u0635',
+ '\u0636',
+ '\u0637',
+ '\u0638',
+ '\u0639',
+ '\u063a',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u0640',
+ '\u0641',
+ '\u0642',
+ '\u0643',
+ '\u0644',
+ '\u0645',
+ '\u0646',
+ '\u0647',
+ '\u0648',
+ '\u0649',
+ '\u064a',
+ '\u064b',
+ '\u064c',
+ '\u064d',
+ '\u064e',
+ '\u064f',
+ '\u0650',
+ '\u0651',
+ '\u0652',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "arabic",
+ "asmo-708",
+ "csiso88596e",
+ "csiso88596i",
+ "csisolatinarabic",
+ "ecma-114",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-ir-127",
+ "iso8859-6",
+ "iso88596",
+ "iso_8859-6",
+ "iso_8859-6:1987"
+ };
+
+ private static final String NAME = "iso-8859-6";
+
+ static final Encoding INSTANCE = new Iso6();
+
+ private Iso6() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java
new file mode 100644
index 000000000..630e702de
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso7 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u2018',
+ '\u2019',
+ '\u00a3',
+ '\u20ac',
+ '\u20af',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u037a',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\ufffd',
+ '\u2015',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u0384',
+ '\u0385',
+ '\u0386',
+ '\u00b7',
+ '\u0388',
+ '\u0389',
+ '\u038a',
+ '\u00bb',
+ '\u038c',
+ '\u00bd',
+ '\u038e',
+ '\u038f',
+ '\u0390',
+ '\u0391',
+ '\u0392',
+ '\u0393',
+ '\u0394',
+ '\u0395',
+ '\u0396',
+ '\u0397',
+ '\u0398',
+ '\u0399',
+ '\u039a',
+ '\u039b',
+ '\u039c',
+ '\u039d',
+ '\u039e',
+ '\u039f',
+ '\u03a0',
+ '\u03a1',
+ '\ufffd',
+ '\u03a3',
+ '\u03a4',
+ '\u03a5',
+ '\u03a6',
+ '\u03a7',
+ '\u03a8',
+ '\u03a9',
+ '\u03aa',
+ '\u03ab',
+ '\u03ac',
+ '\u03ad',
+ '\u03ae',
+ '\u03af',
+ '\u03b0',
+ '\u03b1',
+ '\u03b2',
+ '\u03b3',
+ '\u03b4',
+ '\u03b5',
+ '\u03b6',
+ '\u03b7',
+ '\u03b8',
+ '\u03b9',
+ '\u03ba',
+ '\u03bb',
+ '\u03bc',
+ '\u03bd',
+ '\u03be',
+ '\u03bf',
+ '\u03c0',
+ '\u03c1',
+ '\u03c2',
+ '\u03c3',
+ '\u03c4',
+ '\u03c5',
+ '\u03c6',
+ '\u03c7',
+ '\u03c8',
+ '\u03c9',
+ '\u03ca',
+ '\u03cb',
+ '\u03cc',
+ '\u03cd',
+ '\u03ce',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatingreek",
+ "ecma-118",
+ "elot_928",
+ "greek",
+ "greek8",
+ "iso-8859-7",
+ "iso-ir-126",
+ "iso8859-7",
+ "iso88597",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "sun_eu_greek"
+ };
+
+ private static final String NAME = "iso-8859-7";
+
+ static final Encoding INSTANCE = new Iso7();
+
+ private Iso7() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java
new file mode 100644
index 000000000..10ee33486
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso8 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00d7',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00f7',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u2017',
+ '\u05d0',
+ '\u05d1',
+ '\u05d2',
+ '\u05d3',
+ '\u05d4',
+ '\u05d5',
+ '\u05d6',
+ '\u05d7',
+ '\u05d8',
+ '\u05d9',
+ '\u05da',
+ '\u05db',
+ '\u05dc',
+ '\u05dd',
+ '\u05de',
+ '\u05df',
+ '\u05e0',
+ '\u05e1',
+ '\u05e2',
+ '\u05e3',
+ '\u05e4',
+ '\u05e5',
+ '\u05e6',
+ '\u05e7',
+ '\u05e8',
+ '\u05e9',
+ '\u05ea',
+ '\ufffd',
+ '\ufffd',
+ '\u200e',
+ '\u200f',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "csiso88598e",
+ "csisolatinhebrew",
+ "hebrew",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-ir-138",
+ "iso8859-8",
+ "iso88598",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "visual"
+ };
+
+ private static final String NAME = "iso-8859-8";
+
+ static final Encoding INSTANCE = new Iso8();
+
+ private Iso8() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java
new file mode 100644
index 000000000..732e1c952
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso8I extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00d7',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00f7',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u2017',
+ '\u05d0',
+ '\u05d1',
+ '\u05d2',
+ '\u05d3',
+ '\u05d4',
+ '\u05d5',
+ '\u05d6',
+ '\u05d7',
+ '\u05d8',
+ '\u05d9',
+ '\u05da',
+ '\u05db',
+ '\u05dc',
+ '\u05dd',
+ '\u05de',
+ '\u05df',
+ '\u05e0',
+ '\u05e1',
+ '\u05e2',
+ '\u05e3',
+ '\u05e4',
+ '\u05e5',
+ '\u05e6',
+ '\u05e7',
+ '\u05e8',
+ '\u05e9',
+ '\u05ea',
+ '\ufffd',
+ '\ufffd',
+ '\u200e',
+ '\u200f',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "csiso88598i",
+ "iso-8859-8-i",
+ "logical"
+ };
+
+ private static final String NAME = "iso-8859-8-i";
+
+ static final Encoding INSTANCE = new Iso8I();
+
+ private Iso8I() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java
new file mode 100644
index 000000000..b6157bd8e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Koi8R extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u2500',
+ '\u2502',
+ '\u250c',
+ '\u2510',
+ '\u2514',
+ '\u2518',
+ '\u251c',
+ '\u2524',
+ '\u252c',
+ '\u2534',
+ '\u253c',
+ '\u2580',
+ '\u2584',
+ '\u2588',
+ '\u258c',
+ '\u2590',
+ '\u2591',
+ '\u2592',
+ '\u2593',
+ '\u2320',
+ '\u25a0',
+ '\u2219',
+ '\u221a',
+ '\u2248',
+ '\u2264',
+ '\u2265',
+ '\u00a0',
+ '\u2321',
+ '\u00b0',
+ '\u00b2',
+ '\u00b7',
+ '\u00f7',
+ '\u2550',
+ '\u2551',
+ '\u2552',
+ '\u0451',
+ '\u2553',
+ '\u2554',
+ '\u2555',
+ '\u2556',
+ '\u2557',
+ '\u2558',
+ '\u2559',
+ '\u255a',
+ '\u255b',
+ '\u255c',
+ '\u255d',
+ '\u255e',
+ '\u255f',
+ '\u2560',
+ '\u2561',
+ '\u0401',
+ '\u2562',
+ '\u2563',
+ '\u2564',
+ '\u2565',
+ '\u2566',
+ '\u2567',
+ '\u2568',
+ '\u2569',
+ '\u256a',
+ '\u256b',
+ '\u256c',
+ '\u00a9',
+ '\u044e',
+ '\u0430',
+ '\u0431',
+ '\u0446',
+ '\u0434',
+ '\u0435',
+ '\u0444',
+ '\u0433',
+ '\u0445',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u044f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0436',
+ '\u0432',
+ '\u044c',
+ '\u044b',
+ '\u0437',
+ '\u0448',
+ '\u044d',
+ '\u0449',
+ '\u0447',
+ '\u044a',
+ '\u042e',
+ '\u0410',
+ '\u0411',
+ '\u0426',
+ '\u0414',
+ '\u0415',
+ '\u0424',
+ '\u0413',
+ '\u0425',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u042f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0416',
+ '\u0412',
+ '\u042c',
+ '\u042b',
+ '\u0417',
+ '\u0428',
+ '\u042d',
+ '\u0429',
+ '\u0427',
+ '\u042a'
+ };
+
+ private static final String[] LABELS = {
+ "cskoi8r",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8_r"
+ };
+
+ private static final String NAME = "koi8-r";
+
+ static final Encoding INSTANCE = new Koi8R();
+
+ private Koi8R() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java
new file mode 100644
index 000000000..8150838d3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Koi8U extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u2500',
+ '\u2502',
+ '\u250c',
+ '\u2510',
+ '\u2514',
+ '\u2518',
+ '\u251c',
+ '\u2524',
+ '\u252c',
+ '\u2534',
+ '\u253c',
+ '\u2580',
+ '\u2584',
+ '\u2588',
+ '\u258c',
+ '\u2590',
+ '\u2591',
+ '\u2592',
+ '\u2593',
+ '\u2320',
+ '\u25a0',
+ '\u2219',
+ '\u221a',
+ '\u2248',
+ '\u2264',
+ '\u2265',
+ '\u00a0',
+ '\u2321',
+ '\u00b0',
+ '\u00b2',
+ '\u00b7',
+ '\u00f7',
+ '\u2550',
+ '\u2551',
+ '\u2552',
+ '\u0451',
+ '\u0454',
+ '\u2554',
+ '\u0456',
+ '\u0457',
+ '\u2557',
+ '\u2558',
+ '\u2559',
+ '\u255a',
+ '\u255b',
+ '\u0491',
+ '\u045e',
+ '\u255e',
+ '\u255f',
+ '\u2560',
+ '\u2561',
+ '\u0401',
+ '\u0404',
+ '\u2563',
+ '\u0406',
+ '\u0407',
+ '\u2566',
+ '\u2567',
+ '\u2568',
+ '\u2569',
+ '\u256a',
+ '\u0490',
+ '\u040e',
+ '\u00a9',
+ '\u044e',
+ '\u0430',
+ '\u0431',
+ '\u0446',
+ '\u0434',
+ '\u0435',
+ '\u0444',
+ '\u0433',
+ '\u0445',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u044f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0436',
+ '\u0432',
+ '\u044c',
+ '\u044b',
+ '\u0437',
+ '\u0448',
+ '\u044d',
+ '\u0449',
+ '\u0447',
+ '\u044a',
+ '\u042e',
+ '\u0410',
+ '\u0411',
+ '\u0426',
+ '\u0414',
+ '\u0415',
+ '\u0424',
+ '\u0413',
+ '\u0425',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u042f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0416',
+ '\u0412',
+ '\u042c',
+ '\u042b',
+ '\u0417',
+ '\u0428',
+ '\u042d',
+ '\u0429',
+ '\u0427',
+ '\u042a'
+ };
+
+ private static final String[] LABELS = {
+ "koi8-ru",
+ "koi8-u"
+ };
+
+ private static final String NAME = "koi8-u";
+
+ static final Encoding INSTANCE = new Koi8U();
+
+ private Koi8U() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java
new file mode 100644
index 000000000..f46546ce2
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class MacCyrillic extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u2020',
+ '\u00b0',
+ '\u0490',
+ '\u00a3',
+ '\u00a7',
+ '\u2022',
+ '\u00b6',
+ '\u0406',
+ '\u00ae',
+ '\u00a9',
+ '\u2122',
+ '\u0402',
+ '\u0452',
+ '\u2260',
+ '\u0403',
+ '\u0453',
+ '\u221e',
+ '\u00b1',
+ '\u2264',
+ '\u2265',
+ '\u0456',
+ '\u00b5',
+ '\u0491',
+ '\u0408',
+ '\u0404',
+ '\u0454',
+ '\u0407',
+ '\u0457',
+ '\u0409',
+ '\u0459',
+ '\u040a',
+ '\u045a',
+ '\u0458',
+ '\u0405',
+ '\u00ac',
+ '\u221a',
+ '\u0192',
+ '\u2248',
+ '\u2206',
+ '\u00ab',
+ '\u00bb',
+ '\u2026',
+ '\u00a0',
+ '\u040b',
+ '\u045b',
+ '\u040c',
+ '\u045c',
+ '\u0455',
+ '\u2013',
+ '\u2014',
+ '\u201c',
+ '\u201d',
+ '\u2018',
+ '\u2019',
+ '\u00f7',
+ '\u201e',
+ '\u040e',
+ '\u045e',
+ '\u040f',
+ '\u045f',
+ '\u2116',
+ '\u0401',
+ '\u0451',
+ '\u044f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u20ac'
+ };
+
+ private static final String[] LABELS = {
+ "x-mac-cyrillic",
+ "x-mac-ukrainian"
+ };
+
+ private static final String NAME = "x-mac-cyrillic";
+
+ static final Encoding INSTANCE = new MacCyrillic();
+
+ private MacCyrillic() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java
new file mode 100644
index 000000000..70e356f23
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Macintosh extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u00c4',
+ '\u00c5',
+ '\u00c7',
+ '\u00c9',
+ '\u00d1',
+ '\u00d6',
+ '\u00dc',
+ '\u00e1',
+ '\u00e0',
+ '\u00e2',
+ '\u00e4',
+ '\u00e3',
+ '\u00e5',
+ '\u00e7',
+ '\u00e9',
+ '\u00e8',
+ '\u00ea',
+ '\u00eb',
+ '\u00ed',
+ '\u00ec',
+ '\u00ee',
+ '\u00ef',
+ '\u00f1',
+ '\u00f3',
+ '\u00f2',
+ '\u00f4',
+ '\u00f6',
+ '\u00f5',
+ '\u00fa',
+ '\u00f9',
+ '\u00fb',
+ '\u00fc',
+ '\u2020',
+ '\u00b0',
+ '\u00a2',
+ '\u00a3',
+ '\u00a7',
+ '\u2022',
+ '\u00b6',
+ '\u00df',
+ '\u00ae',
+ '\u00a9',
+ '\u2122',
+ '\u00b4',
+ '\u00a8',
+ '\u2260',
+ '\u00c6',
+ '\u00d8',
+ '\u221e',
+ '\u00b1',
+ '\u2264',
+ '\u2265',
+ '\u00a5',
+ '\u00b5',
+ '\u2202',
+ '\u2211',
+ '\u220f',
+ '\u03c0',
+ '\u222b',
+ '\u00aa',
+ '\u00ba',
+ '\u03a9',
+ '\u00e6',
+ '\u00f8',
+ '\u00bf',
+ '\u00a1',
+ '\u00ac',
+ '\u221a',
+ '\u0192',
+ '\u2248',
+ '\u2206',
+ '\u00ab',
+ '\u00bb',
+ '\u2026',
+ '\u00a0',
+ '\u00c0',
+ '\u00c3',
+ '\u00d5',
+ '\u0152',
+ '\u0153',
+ '\u2013',
+ '\u2014',
+ '\u201c',
+ '\u201d',
+ '\u2018',
+ '\u2019',
+ '\u00f7',
+ '\u25ca',
+ '\u00ff',
+ '\u0178',
+ '\u2044',
+ '\u20ac',
+ '\u2039',
+ '\u203a',
+ '\ufb01',
+ '\ufb02',
+ '\u2021',
+ '\u00b7',
+ '\u201a',
+ '\u201e',
+ '\u2030',
+ '\u00c2',
+ '\u00ca',
+ '\u00c1',
+ '\u00cb',
+ '\u00c8',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00cc',
+ '\u00d3',
+ '\u00d4',
+ '\uf8ff',
+ '\u00d2',
+ '\u00da',
+ '\u00db',
+ '\u00d9',
+ '\u0131',
+ '\u02c6',
+ '\u02dc',
+ '\u00af',
+ '\u02d8',
+ '\u02d9',
+ '\u02da',
+ '\u00b8',
+ '\u02dd',
+ '\u02db',
+ '\u02c7'
+ };
+
+ private static final String[] LABELS = {
+ "csmacintosh",
+ "mac",
+ "macintosh",
+ "x-mac-roman"
+ };
+
+ private static final String NAME = "macintosh";
+
+ static final Encoding INSTANCE = new Macintosh();
+
+ private Macintosh() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java
new file mode 100644
index 000000000..abb6e24e7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Replacement extends Encoding {
+
+ private static final String[] LABELS = {
+ "csiso2022kr",
+ "hz-gb-2312",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-kr"
+ };
+
+ private static final String NAME = "replacement";
+
+ static final Replacement INSTANCE = new Replacement();
+
+ private Replacement() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new ReplacementDecoder(this);
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java
new file mode 100644
index 000000000..f6f2448f6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CoderResult;
+
+class ReplacementDecoder extends Decoder {
+
+ private boolean haveEmitted = false;
+
+ ReplacementDecoder(Charset cs) {
+ super(cs, 1.0f, 1.0f);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (haveEmitted) {
+ in.position(in.limit());
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ in.position(in.limit());
+ haveEmitted = true;
+ if (this.report) {
+ return CoderResult.malformedForLength(1);
+ }
+ out.put('\uFFFD');
+ }
+ }
+
+ /**
+ * @see java.nio.charset.CharsetDecoder#implFlush(java.nio.CharBuffer)
+ */
+ @Override protected CoderResult implFlush(CharBuffer out) {
+ // TODO Auto-generated method stub
+ return super.implFlush(out);
+ }
+
+ /**
+ * @see java.nio.charset.CharsetDecoder#implReset()
+ */
+ @Override protected void implReset() {
+ // TODO Auto-generated method stub
+ super.implReset();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java
new file mode 100644
index 000000000..6638eab39
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class ShiftJis extends Encoding {
+
+ private static final String[] LABELS = {
+ "csshiftjis",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "windows-31j",
+ "x-sjis"
+ };
+
+ private static final String NAME = "shift_jis";
+
+ static final ShiftJis INSTANCE = new ShiftJis();
+
+ private ShiftJis() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java
new file mode 100644
index 000000000..61534cb28
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class UserDefined extends Encoding {
+
+ private static final String[] LABELS = {
+ "x-user-defined"
+ };
+
+ private static final String NAME = "x-user-defined";
+
+ static final UserDefined INSTANCE = new UserDefined();
+
+ private UserDefined() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new UserDefinedDecoder(this);
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java
new file mode 100644
index 000000000..c14ca8627
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+
+class UserDefinedDecoder extends Decoder {
+
+ UserDefinedDecoder(Charset cs) {
+ super(cs, 1.0f, 1.0f);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ // TODO figure out if it's worthwhile to optimize the case where both
+ // buffers are array-backed.
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = (int)in.get();
+ if (b >= 0) {
+ out.put((char)b);
+ } else {
+ out.put((char)(b + 128 + 0xF780));
+ }
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java
new file mode 100644
index 000000000..16c0d2fd5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Utf16Be extends Encoding {
+
+ private static final String[] LABELS = {
+ "utf-16be"
+ };
+
+ private static final String NAME = "utf-16be";
+
+ static final Utf16Be INSTANCE = new Utf16Be();
+
+ private Utf16Be() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java
new file mode 100644
index 000000000..7381235b5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Utf16Le extends Encoding {
+
+ private static final String[] LABELS = {
+ "utf-16",
+ "utf-16le"
+ };
+
+ private static final String NAME = "utf-16le";
+
+ static final Utf16Le INSTANCE = new Utf16Le();
+
+ private Utf16Le() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java
new file mode 100644
index 000000000..d6ea7b514
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Utf8 extends Encoding {
+
+ private static final String[] LABELS = {
+ "unicode-1-1-utf-8",
+ "utf-8",
+ "utf8"
+ };
+
+ private static final String NAME = "utf-8";
+
+ static final Utf8 INSTANCE = new Utf8();
+
+ private Utf8() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java
new file mode 100644
index 000000000..0b3f50875
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1250 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0083',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u0088',
+ '\u2030',
+ '\u0160',
+ '\u2039',
+ '\u015a',
+ '\u0164',
+ '\u017d',
+ '\u0179',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u0161',
+ '\u203a',
+ '\u015b',
+ '\u0165',
+ '\u017e',
+ '\u017a',
+ '\u00a0',
+ '\u02c7',
+ '\u02d8',
+ '\u0141',
+ '\u00a4',
+ '\u0104',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u015e',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u017b',
+ '\u00b0',
+ '\u00b1',
+ '\u02db',
+ '\u0142',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u0105',
+ '\u015f',
+ '\u00bb',
+ '\u013d',
+ '\u02dd',
+ '\u013e',
+ '\u017c',
+ '\u0154',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u0139',
+ '\u0106',
+ '\u00c7',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u011a',
+ '\u00cd',
+ '\u00ce',
+ '\u010e',
+ '\u0110',
+ '\u0143',
+ '\u0147',
+ '\u00d3',
+ '\u00d4',
+ '\u0150',
+ '\u00d6',
+ '\u00d7',
+ '\u0158',
+ '\u016e',
+ '\u00da',
+ '\u0170',
+ '\u00dc',
+ '\u00dd',
+ '\u0162',
+ '\u00df',
+ '\u0155',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u013a',
+ '\u0107',
+ '\u00e7',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u011b',
+ '\u00ed',
+ '\u00ee',
+ '\u010f',
+ '\u0111',
+ '\u0144',
+ '\u0148',
+ '\u00f3',
+ '\u00f4',
+ '\u0151',
+ '\u00f6',
+ '\u00f7',
+ '\u0159',
+ '\u016f',
+ '\u00fa',
+ '\u0171',
+ '\u00fc',
+ '\u00fd',
+ '\u0163',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "cp1250",
+ "windows-1250",
+ "x-cp1250"
+ };
+
+ private static final String NAME = "windows-1250";
+
+ static final Encoding INSTANCE = new Windows1250();
+
+ private Windows1250() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java
new file mode 100644
index 000000000..def5cf11e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1251 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0402',
+ '\u0403',
+ '\u201a',
+ '\u0453',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u20ac',
+ '\u2030',
+ '\u0409',
+ '\u2039',
+ '\u040a',
+ '\u040c',
+ '\u040b',
+ '\u040f',
+ '\u0452',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u0459',
+ '\u203a',
+ '\u045a',
+ '\u045c',
+ '\u045b',
+ '\u045f',
+ '\u00a0',
+ '\u040e',
+ '\u045e',
+ '\u0408',
+ '\u00a4',
+ '\u0490',
+ '\u00a6',
+ '\u00a7',
+ '\u0401',
+ '\u00a9',
+ '\u0404',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u0407',
+ '\u00b0',
+ '\u00b1',
+ '\u0406',
+ '\u0456',
+ '\u0491',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u0451',
+ '\u2116',
+ '\u0454',
+ '\u00bb',
+ '\u0458',
+ '\u0405',
+ '\u0455',
+ '\u0457',
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u044f'
+ };
+
+ private static final String[] LABELS = {
+ "cp1251",
+ "windows-1251",
+ "x-cp1251"
+ };
+
+ private static final String NAME = "windows-1251";
+
+ static final Encoding INSTANCE = new Windows1251();
+
+ private Windows1251() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java
new file mode 100644
index 000000000..4b3fa1ffa
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1252 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u0160',
+ '\u2039',
+ '\u0152',
+ '\u008d',
+ '\u017d',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u0161',
+ '\u203a',
+ '\u0153',
+ '\u009d',
+ '\u017e',
+ '\u0178',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00d0',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u00de',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u00f0',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u00fe',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "ansi_x3.4-1968",
+ "ascii",
+ "cp1252",
+ "cp819",
+ "csisolatin1",
+ "ibm819",
+ "iso-8859-1",
+ "iso-ir-100",
+ "iso8859-1",
+ "iso88591",
+ "iso_8859-1",
+ "iso_8859-1:1987",
+ "l1",
+ "latin1",
+ "us-ascii",
+ "windows-1252",
+ "x-cp1252"
+ };
+
+ private static final String NAME = "windows-1252";
+
+ static final Encoding INSTANCE = new Windows1252();
+
+ private Windows1252() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java
new file mode 100644
index 000000000..c96e8630c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1253 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u0088',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0385',
+ '\u0386',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\ufffd',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u2015',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u0384',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u0388',
+ '\u0389',
+ '\u038a',
+ '\u00bb',
+ '\u038c',
+ '\u00bd',
+ '\u038e',
+ '\u038f',
+ '\u0390',
+ '\u0391',
+ '\u0392',
+ '\u0393',
+ '\u0394',
+ '\u0395',
+ '\u0396',
+ '\u0397',
+ '\u0398',
+ '\u0399',
+ '\u039a',
+ '\u039b',
+ '\u039c',
+ '\u039d',
+ '\u039e',
+ '\u039f',
+ '\u03a0',
+ '\u03a1',
+ '\ufffd',
+ '\u03a3',
+ '\u03a4',
+ '\u03a5',
+ '\u03a6',
+ '\u03a7',
+ '\u03a8',
+ '\u03a9',
+ '\u03aa',
+ '\u03ab',
+ '\u03ac',
+ '\u03ad',
+ '\u03ae',
+ '\u03af',
+ '\u03b0',
+ '\u03b1',
+ '\u03b2',
+ '\u03b3',
+ '\u03b4',
+ '\u03b5',
+ '\u03b6',
+ '\u03b7',
+ '\u03b8',
+ '\u03b9',
+ '\u03ba',
+ '\u03bb',
+ '\u03bc',
+ '\u03bd',
+ '\u03be',
+ '\u03bf',
+ '\u03c0',
+ '\u03c1',
+ '\u03c2',
+ '\u03c3',
+ '\u03c4',
+ '\u03c5',
+ '\u03c6',
+ '\u03c7',
+ '\u03c8',
+ '\u03c9',
+ '\u03ca',
+ '\u03cb',
+ '\u03cc',
+ '\u03cd',
+ '\u03ce',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "cp1253",
+ "windows-1253",
+ "x-cp1253"
+ };
+
+ private static final String NAME = "windows-1253";
+
+ static final Encoding INSTANCE = new Windows1253();
+
+ private Windows1253() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java
new file mode 100644
index 000000000..fc3aa9839
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1254 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u0160',
+ '\u2039',
+ '\u0152',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u0161',
+ '\u203a',
+ '\u0153',
+ '\u009d',
+ '\u009e',
+ '\u0178',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u011e',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u0130',
+ '\u015e',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u011f',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u0131',
+ '\u015f',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "cp1254",
+ "csisolatin5",
+ "iso-8859-9",
+ "iso-ir-148",
+ "iso8859-9",
+ "iso88599",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "l5",
+ "latin5",
+ "windows-1254",
+ "x-cp1254"
+ };
+
+ private static final String NAME = "windows-1254";
+
+ static final Encoding INSTANCE = new Windows1254();
+
+ private Windows1254() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java
new file mode 100644
index 000000000..957203d80
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1255 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u20aa',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00d7',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00f7',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u05b0',
+ '\u05b1',
+ '\u05b2',
+ '\u05b3',
+ '\u05b4',
+ '\u05b5',
+ '\u05b6',
+ '\u05b7',
+ '\u05b8',
+ '\u05b9',
+ '\ufffd',
+ '\u05bb',
+ '\u05bc',
+ '\u05bd',
+ '\u05be',
+ '\u05bf',
+ '\u05c0',
+ '\u05c1',
+ '\u05c2',
+ '\u05c3',
+ '\u05f0',
+ '\u05f1',
+ '\u05f2',
+ '\u05f3',
+ '\u05f4',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u05d0',
+ '\u05d1',
+ '\u05d2',
+ '\u05d3',
+ '\u05d4',
+ '\u05d5',
+ '\u05d6',
+ '\u05d7',
+ '\u05d8',
+ '\u05d9',
+ '\u05da',
+ '\u05db',
+ '\u05dc',
+ '\u05dd',
+ '\u05de',
+ '\u05df',
+ '\u05e0',
+ '\u05e1',
+ '\u05e2',
+ '\u05e3',
+ '\u05e4',
+ '\u05e5',
+ '\u05e6',
+ '\u05e7',
+ '\u05e8',
+ '\u05e9',
+ '\u05ea',
+ '\ufffd',
+ '\ufffd',
+ '\u200e',
+ '\u200f',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "cp1255",
+ "windows-1255",
+ "x-cp1255"
+ };
+
+ private static final String NAME = "windows-1255";
+
+ static final Encoding INSTANCE = new Windows1255();
+
+ private Windows1255() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java
new file mode 100644
index 000000000..87d805e1e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1256 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u067e',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u0679',
+ '\u2039',
+ '\u0152',
+ '\u0686',
+ '\u0698',
+ '\u0688',
+ '\u06af',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u06a9',
+ '\u2122',
+ '\u0691',
+ '\u203a',
+ '\u0153',
+ '\u200c',
+ '\u200d',
+ '\u06ba',
+ '\u00a0',
+ '\u060c',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u06be',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u061b',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u061f',
+ '\u06c1',
+ '\u0621',
+ '\u0622',
+ '\u0623',
+ '\u0624',
+ '\u0625',
+ '\u0626',
+ '\u0627',
+ '\u0628',
+ '\u0629',
+ '\u062a',
+ '\u062b',
+ '\u062c',
+ '\u062d',
+ '\u062e',
+ '\u062f',
+ '\u0630',
+ '\u0631',
+ '\u0632',
+ '\u0633',
+ '\u0634',
+ '\u0635',
+ '\u0636',
+ '\u00d7',
+ '\u0637',
+ '\u0638',
+ '\u0639',
+ '\u063a',
+ '\u0640',
+ '\u0641',
+ '\u0642',
+ '\u0643',
+ '\u00e0',
+ '\u0644',
+ '\u00e2',
+ '\u0645',
+ '\u0646',
+ '\u0647',
+ '\u0648',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u0649',
+ '\u064a',
+ '\u00ee',
+ '\u00ef',
+ '\u064b',
+ '\u064c',
+ '\u064d',
+ '\u064e',
+ '\u00f4',
+ '\u064f',
+ '\u0650',
+ '\u00f7',
+ '\u0651',
+ '\u00f9',
+ '\u0652',
+ '\u00fb',
+ '\u00fc',
+ '\u200e',
+ '\u200f',
+ '\u06d2'
+ };
+
+ private static final String[] LABELS = {
+ "cp1256",
+ "windows-1256",
+ "x-cp1256"
+ };
+
+ private static final String NAME = "windows-1256";
+
+ static final Encoding INSTANCE = new Windows1256();
+
+ private Windows1256() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java
new file mode 100644
index 000000000..140e9b458
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1257 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0083',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u0088',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u008c',
+ '\u00a8',
+ '\u02c7',
+ '\u00b8',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u009c',
+ '\u00af',
+ '\u02db',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\ufffd',
+ '\u00a6',
+ '\u00a7',
+ '\u00d8',
+ '\u00a9',
+ '\u0156',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00c6',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00f8',
+ '\u00b9',
+ '\u0157',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00e6',
+ '\u0104',
+ '\u012e',
+ '\u0100',
+ '\u0106',
+ '\u00c4',
+ '\u00c5',
+ '\u0118',
+ '\u0112',
+ '\u010c',
+ '\u00c9',
+ '\u0179',
+ '\u0116',
+ '\u0122',
+ '\u0136',
+ '\u012a',
+ '\u013b',
+ '\u0160',
+ '\u0143',
+ '\u0145',
+ '\u00d3',
+ '\u014c',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u0172',
+ '\u0141',
+ '\u015a',
+ '\u016a',
+ '\u00dc',
+ '\u017b',
+ '\u017d',
+ '\u00df',
+ '\u0105',
+ '\u012f',
+ '\u0101',
+ '\u0107',
+ '\u00e4',
+ '\u00e5',
+ '\u0119',
+ '\u0113',
+ '\u010d',
+ '\u00e9',
+ '\u017a',
+ '\u0117',
+ '\u0123',
+ '\u0137',
+ '\u012b',
+ '\u013c',
+ '\u0161',
+ '\u0144',
+ '\u0146',
+ '\u00f3',
+ '\u014d',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u0173',
+ '\u0142',
+ '\u015b',
+ '\u016b',
+ '\u00fc',
+ '\u017c',
+ '\u017e',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "cp1257",
+ "windows-1257",
+ "x-cp1257"
+ };
+
+ private static final String NAME = "windows-1257";
+
+ static final Encoding INSTANCE = new Windows1257();
+
+ private Windows1257() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java
new file mode 100644
index 000000000..130107789
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1258 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u0152',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u0153',
+ '\u009d',
+ '\u009e',
+ '\u0178',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u0300',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u0110',
+ '\u00d1',
+ '\u0309',
+ '\u00d3',
+ '\u00d4',
+ '\u01a0',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u01af',
+ '\u0303',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u0301',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u0111',
+ '\u00f1',
+ '\u0323',
+ '\u00f3',
+ '\u00f4',
+ '\u01a1',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u01b0',
+ '\u20ab',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "cp1258",
+ "windows-1258",
+ "x-cp1258"
+ };
+
+ private static final String NAME = "windows-1258";
+
+ static final Encoding INSTANCE = new Windows1258();
+
+ private Windows1258() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java
new file mode 100644
index 000000000..f93be0175
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows874 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u2026',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0e01',
+ '\u0e02',
+ '\u0e03',
+ '\u0e04',
+ '\u0e05',
+ '\u0e06',
+ '\u0e07',
+ '\u0e08',
+ '\u0e09',
+ '\u0e0a',
+ '\u0e0b',
+ '\u0e0c',
+ '\u0e0d',
+ '\u0e0e',
+ '\u0e0f',
+ '\u0e10',
+ '\u0e11',
+ '\u0e12',
+ '\u0e13',
+ '\u0e14',
+ '\u0e15',
+ '\u0e16',
+ '\u0e17',
+ '\u0e18',
+ '\u0e19',
+ '\u0e1a',
+ '\u0e1b',
+ '\u0e1c',
+ '\u0e1d',
+ '\u0e1e',
+ '\u0e1f',
+ '\u0e20',
+ '\u0e21',
+ '\u0e22',
+ '\u0e23',
+ '\u0e24',
+ '\u0e25',
+ '\u0e26',
+ '\u0e27',
+ '\u0e28',
+ '\u0e29',
+ '\u0e2a',
+ '\u0e2b',
+ '\u0e2c',
+ '\u0e2d',
+ '\u0e2e',
+ '\u0e2f',
+ '\u0e30',
+ '\u0e31',
+ '\u0e32',
+ '\u0e33',
+ '\u0e34',
+ '\u0e35',
+ '\u0e36',
+ '\u0e37',
+ '\u0e38',
+ '\u0e39',
+ '\u0e3a',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u0e3f',
+ '\u0e40',
+ '\u0e41',
+ '\u0e42',
+ '\u0e43',
+ '\u0e44',
+ '\u0e45',
+ '\u0e46',
+ '\u0e47',
+ '\u0e48',
+ '\u0e49',
+ '\u0e4a',
+ '\u0e4b',
+ '\u0e4c',
+ '\u0e4d',
+ '\u0e4e',
+ '\u0e4f',
+ '\u0e50',
+ '\u0e51',
+ '\u0e52',
+ '\u0e53',
+ '\u0e54',
+ '\u0e55',
+ '\u0e56',
+ '\u0e57',
+ '\u0e58',
+ '\u0e59',
+ '\u0e5a',
+ '\u0e5b',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "dos-874",
+ "iso-8859-11",
+ "iso8859-11",
+ "iso885911",
+ "tis-620",
+ "windows-874"
+ };
+
+ private static final String NAME = "windows-874";
+
+ static final Encoding INSTANCE = new Windows874();
+
+ private Windows874() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java
new file mode 100644
index 000000000..0967a5814
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+public @interface Auto {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java
new file mode 100644
index 000000000..bcb8a2b00
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+public @interface CharacterName {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java
new file mode 100644
index 000000000..2ba7f418a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Marker for translating into the C++ const keyword on the declaration in
+ * question.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Const {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java
new file mode 100644
index 000000000..117da8d3c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The type for attribute IDness. (In Java, an interned string
+ * <code>"CDATA"</code> or <code>"ID"</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface IdType {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java
new file mode 100644
index 000000000..cc0728b1b
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Translates into the C++ inline keyword.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Inline {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java
new file mode 100644
index 000000000..44444d525
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Marks a string type as being the literal string type (typically const char*)
+ * in C++.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Literal {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java
new file mode 100644
index 000000000..1f91ba93b
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The local name of an element or attribute. Must be comparable with
+ * <code>==</code> (interned <code>String</code> in Java).
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Local {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java
new file mode 100644
index 000000000..cf011d33e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The array type marked with this annotation won't have its
+ * <code>.length</code> read.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface NoLength {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java
new file mode 100644
index 000000000..03baa75f5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The namespace URI type. (In Java, an interned <code>String</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface NsUri {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java
new file mode 100644
index 000000000..268e531a3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The type for namespace prefixes. (In Java, an interned <code>String</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Prefix {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java
new file mode 100644
index 000000000..e6d4807b6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The type for qualified names. (In Java, an interned <code>String</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface QName {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java
new file mode 100644
index 000000000..e293e1af5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Marks a method as virtualy in C++.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Virtual {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html
new file mode 100644
index 000000000..af15d3827
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2008 Mozilla Foundation
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides annotations for facilitating automated translation
+of the source code into other programming languages.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java
new file mode 100644
index 000000000..f3b3e74ca
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import java.io.IOException;
+
+/**
+ * An interface for providing a method for reading a stream of bytes one byte at
+ * a time.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface ByteReadable {
+ /**
+ * Returns the value of the next byte as an integer from 0 to 0xFF or -1 if
+ * the stream has ended.
+ *
+ * @return integer from 0 to 0xFF or -1 on EOF
+ * @throws IOException
+ */
+ public int readByte() throws IOException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java
new file mode 100644
index 000000000..4a5769f54
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import org.xml.sax.SAXException;
+
+/**
+ * An interface for receiving notifications of UTF-16 code units read from a character stream.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface CharacterHandler {
+
+ /**
+ * Receive notification of a run of UTF-16 code units.
+ * @param ch the buffer
+ * @param start start index in the buffer
+ * @param length the number of characters to process starting from <code>start</code>
+ * @throws SAXException if things go wrong
+ */
+ public void characters(char[] ch, int start, int length)
+ throws SAXException;
+
+ /**
+ * Signals the end of the stream. Can be used for cleanup. Doesn't mean that the stream ended successfully.
+ *
+ * @throws SAXException if things go wrong
+ */
+ public void end() throws SAXException;
+
+ /**
+ * Signals the start of the stream. Can be used for setup.
+ *
+ * @throws SAXException if things go wrong
+ */
+ public void start() throws SAXException;
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java
new file mode 100644
index 000000000..a34af51fa
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Used for indicating desired behavior with legacy doctypes.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum DoctypeExpectation {
+ /**
+ * Be a pure HTML5 parser.
+ */
+ HTML,
+
+ /**
+ * Require the HTML 4.01 Transitional public id. Turn on HTML4-specific
+ * additional errors regardless of doctype.
+ */
+ HTML401_TRANSITIONAL,
+
+ /**
+ * Require the HTML 4.01 Transitional public id and a system id. Turn on
+ * HTML4-specific additional errors regardless of doctype.
+ */
+ HTML401_STRICT,
+
+ /**
+ * Treat the doctype required by HTML 5, doctypes with the HTML 4.01 Strict
+ * public id and doctypes with the HTML 4.01 Transitional public id and a
+ * system id as non-errors. Turn on HTML4-specific additional errors if the
+ * public id is the HTML 4.01 Strict or Transitional public id.
+ */
+ AUTO,
+
+ /**
+ * Never enable HTML4-specific error checks. Never report any doctype
+ * condition as an error. (Doctype tokens in wrong places will be
+ * reported as errors, though.) The application may decide what to log
+ * in response to calls to <code>DocumentModeHanler</code>. This mode
+ * in meant for doing surveys on existing content.
+ */
+ NO_DOCTYPE_ERRORS
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java
new file mode 100644
index 000000000..e30eddd87
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Represents the HTML document compatibility mode.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum DocumentMode {
+ /**
+ * The Standards Mode
+ */
+ STANDARDS_MODE,
+
+ /**
+ * The Limited Quirks Mode aka. The Almost Standards Mode
+ */
+ ALMOST_STANDARDS_MODE,
+
+ /**
+ * The Quirks Mode
+ */
+ QUIRKS_MODE
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java
new file mode 100644
index 000000000..55377e0e4
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+
+import org.xml.sax.SAXException;
+
+/**
+ * A callback interface for receiving notification about the document mode.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface DocumentModeHandler {
+
+ /**
+ * Receive notification of the document mode.
+ *
+ * @param mode the document mode
+ * @param publicIdentifier the public id of the doctype or <code>null</code> if unavailable
+ * @param systemIdentifier the system id of the doctype or <code>null</code> if unavailable
+ * @param html4SpecificAdditionalErrorChecks <code>true</code> if HTML 4-specific checks were enabled, <code>false</code> otherwise
+ * @throws SAXException if things go wrong
+ */
+ public void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java
new file mode 100644
index 000000000..6f185aeaf
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import org.xml.sax.SAXException;
+
+/**
+ * An interface for communicating about character encoding names with the
+ * environment of the parser.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface EncodingDeclarationHandler {
+
+ /**
+ * Indicates that the parser has found an internal encoding declaration with
+ * the charset value <code>charset</code>.
+ *
+ * @param charset
+ * the charset name found.
+ * @return <code>true</code> if the value of <code>charset</code> was an
+ * encoding name for a supported ASCII-superset encoding.
+ * @throws SAXException
+ * if something went wrong
+ */
+ public boolean internalEncodingDeclaration(String charset) throws SAXException;
+
+ /**
+ * Queries the environment for the encoding in use (for error reporting).
+ *
+ * @return the encoding in use
+ * @throws SAXException
+ * if something went wrong
+ */
+ public String getCharacterEncoding() throws SAXException;
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java
new file mode 100644
index 000000000..40f15ce7d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Indicates a request for character encoding sniffer choice.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum Heuristics {
+
+ /**
+ * Perform no heuristic sniffing.
+ */
+ NONE,
+
+ /**
+ * Use both jchardet and ICU4J.
+ */
+ ALL,
+
+ /**
+ * Use jchardet only.
+ */
+ CHARDET,
+
+ /**
+ * Use ICU4J only.
+ */
+ ICU
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java
new file mode 100644
index 000000000..deab4c60f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * A placeholder type that translates into the type of the C++ class that
+ * implements an interning service for local names (<code>@Local</code> in
+ * Java).
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface Interner {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java
new file mode 100644
index 000000000..18f49e99d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import nu.validator.htmlparser.annotation.Const;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.impl.ElementName;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.Tokenizer;
+
+import org.xml.sax.SAXException;
+
+/**
+ * <code>Tokenizer</code> reports tokens through this interface.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface TokenHandler {
+
+ /**
+ * This method is called at the start of tokenization before any other
+ * methods on this interface are called. Implementations should hold the
+ * reference to the <code>Tokenizer</code> in order to set the content
+ * model flag and in order to be able to query for <code>Locator</code>
+ * data.
+ *
+ * @param self
+ * the <code>Tokenizer</code>.
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void startTokenization(Tokenizer self) throws SAXException;
+
+ /**
+ * If this handler implementation cares about comments, return
+ * <code>true</code>. If not, return <code>false</code>.
+ *
+ * @return whether this handler wants comments
+ * @throws SAXException
+ * if something went wrong
+ */
+ public boolean wantsComments() throws SAXException;
+
+ /**
+ * Receive a doctype token.
+ *
+ * @param name
+ * the name
+ * @param publicIdentifier
+ * the public id
+ * @param systemIdentifier
+ * the system id
+ * @param forceQuirks
+ * whether the token is correct
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void doctype(String name, String publicIdentifier,
+ String systemIdentifier, boolean forceQuirks) throws SAXException;
+
+ /**
+ * Receive a start tag token.
+ *
+ * @param eltName
+ * the tag name
+ * @param attributes
+ * the attributes
+ * @param selfClosing
+ * TODO
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void startTag(ElementName eltName, HtmlAttributes attributes,
+ boolean selfClosing) throws SAXException;
+
+ /**
+ * Receive an end tag token.
+ *
+ * @param eltName
+ * the tag name
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void endTag(ElementName eltName) throws SAXException;
+
+ /**
+ * Receive a comment token. The data is junk if the
+ * <code>wantsComments()</code> returned <code>false</code>.
+ *
+ * @param buf
+ * a buffer holding the data
+ * @param start the offset into the buffer
+ * @param length
+ * the number of code units to read
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void comment(@NoLength char[] buf, int start, int length) throws SAXException;
+
+ /**
+ * Receive character tokens. This method has the same semantics as the SAX
+ * method of the same name.
+ *
+ * @param buf
+ * a buffer holding the data
+ * @param start
+ * offset into the buffer
+ * @param length
+ * the number of code units to read
+ * @throws SAXException
+ * if something went wrong
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ public void characters(@Const @NoLength char[] buf, int start, int length)
+ throws SAXException;
+
+ /**
+ * Reports a U+0000 that's being turned into a U+FFFD.
+ *
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void zeroOriginatingReplacementCharacter() throws SAXException;
+
+ /**
+ * The end-of-file token.
+ *
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void eof() throws SAXException;
+
+ /**
+ * The perform final cleanup.
+ *
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void endTokenization() throws SAXException;
+
+ /**
+ * Checks if the CDATA sections are allowed.
+ *
+ * @return <code>true</code> if CDATA sections are allowed
+ * @throws SAXException
+ * if something went wrong
+ */
+ public boolean cdataSectionAllowed() throws SAXException;
+
+ /**
+ * Notifies the token handler of the worst case amount of data to be
+ * reported via <code>characters()</code> and
+ * <code>zeroOriginatingReplacementCharacter()</code>.
+ *
+ * @param inputLength the maximum number of chars that can be reported
+ * via <code>characters()</code> and
+ * <code>zeroOriginatingReplacementCharacter()</code> before a new call to
+ * this method.
+ */
+ public void ensureBufferSpace(int inputLength) throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java
new file mode 100644
index 000000000..eec23c71c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import org.xml.sax.SAXException;
+
+/**
+ * An interface for intercepting information about the state transitions that
+ * the tokenizer is making.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface TransitionHandler {
+
+ /**
+ * This method is called for every tokenizer state transition.
+ *
+ * @param from
+ * the state the tokenizer is transitioning from
+ * @param to
+ * the state being transitioned to
+ * @param reconsume
+ * <code>true</code> if the current input character is going to
+ * be reconsumed in the new state
+ * @param pos
+ * the current index into the input stream
+ * @throws SAXException
+ * if something went wrong
+ */
+ void transition(int from, int to, boolean reconsume, int pos)
+ throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java
new file mode 100644
index 000000000..c959df655
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Policy for XML 1.0 violations.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum XmlViolationPolicy {
+ /**
+ * Conform to HTML 5, allow XML 1.0 to be violated.
+ */
+ ALLOW,
+
+ /**
+ * Halt when something cannot be mapped to XML 1.0.
+ */
+ FATAL,
+
+ /**
+ * Be non-conforming and alter the infoset to fit
+ * XML 1.0 when something would otherwise not be
+ * mappable to XML 1.0.
+ */
+ ALTER_INFOSET
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html
new file mode 100644
index 000000000..43f141cd8
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides common interfaces and enumerations.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java
new file mode 100644
index 000000000..2b8eff230
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.dom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.validator.htmlparser.impl.CoalescingTreeBuilder;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+
+import org.w3c.dom.DOMException;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.Text;
+import org.xml.sax.SAXException;
+
+/**
+ * The tree builder glue for building a tree through the public DOM APIs.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+class DOMTreeBuilder extends CoalescingTreeBuilder<Element> {
+
+ /**
+ * The DOM impl.
+ */
+ private DOMImplementation implementation;
+
+ /**
+ * The current doc.
+ */
+ private Document document;
+
+ /**
+ * The constructor.
+ *
+ * @param implementation
+ * the DOM impl.
+ */
+ protected DOMTreeBuilder(DOMImplementation implementation) {
+ super();
+ this.implementation = implementation;
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#addAttributesToElement(java.lang.Object,
+ * nu.validator.htmlparser.impl.HtmlAttributes)
+ */
+ @Override protected void addAttributesToElement(Element element,
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String localName = attributes.getLocalNameNoBoundsCheck(i);
+ String uri = attributes.getURINoBoundsCheck(i);
+ if (!element.hasAttributeNS(uri, localName)) {
+ element.setAttributeNS(uri, localName,
+ attributes.getValueNoBoundsCheck(i));
+ }
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCharacters(java.lang.Object,
+ * java.lang.String)
+ */
+ @Override protected void appendCharacters(Element parent, String text)
+ throws SAXException {
+ try {
+ Node lastChild = parent.getLastChild();
+ if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setData(lastAsText.getData() + text);
+ return;
+ }
+ parent.appendChild(document.createTextNode(text));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendChildrenToNewParent(java.lang.Object,
+ * java.lang.Object)
+ */
+ @Override protected void appendChildrenToNewParent(Element oldParent,
+ Element newParent) throws SAXException {
+ try {
+ while (oldParent.hasChildNodes()) {
+ newParent.appendChild(oldParent.getFirstChild());
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendComment(java.lang.Object,
+ * java.lang.String)
+ */
+ @Override protected void appendComment(Element parent, String comment)
+ throws SAXException {
+ try {
+ parent.appendChild(document.createComment(comment));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCommentToDocument(java.lang.String)
+ */
+ @Override protected void appendCommentToDocument(String comment)
+ throws SAXException {
+ try {
+ document.appendChild(document.createComment(comment));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, String, nu.validator.htmlparser.impl.HtmlAttributes, Object)
+ */
+ @Override protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element intendedParent) throws SAXException {
+ try {
+ Element rv = document.createElementNS(ns, name);
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.setAttributeNS(attributes.getURINoBoundsCheck(i),
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i));
+ if (attributes.getTypeNoBoundsCheck(i) == "ID") {
+ rv.setIdAttributeNS(null, attributes.getLocalName(i), true);
+ }
+ }
+ return rv;
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createHtmlElementSetAsRoot(nu.validator.htmlparser.impl.HtmlAttributes)
+ */
+ @Override protected Element createHtmlElementSetAsRoot(
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ Element rv = document.createElementNS(
+ "http://www.w3.org/1999/xhtml", "html");
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.setAttributeNS(attributes.getURINoBoundsCheck(i),
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i));
+ }
+ document.appendChild(rv);
+ return rv;
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendElement(java.lang.Object,
+ * java.lang.Object)
+ */
+ @Override protected void appendElement(Element child, Element newParent)
+ throws SAXException {
+ try {
+ newParent.appendChild(child);
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#hasChildren(java.lang.Object)
+ */
+ @Override protected boolean hasChildren(Element element)
+ throws SAXException {
+ try {
+ return element.hasChildNodes();
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String,
+ * java.lang.String, org.xml.sax.Attributes, java.lang.Object)
+ */
+ @Override protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException {
+ try {
+ Element rv = createElement(ns, name, attributes, intendedParent);
+ rv.setUserData("nu.validator.form-pointer", form, null);
+ return rv;
+ } catch (DOMException e) {
+ fatal(e);
+ return null;
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override protected void start(boolean fragment) throws SAXException {
+ document = implementation.createDocument(null, null, null);
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode,
+ * java.lang.String, java.lang.String, boolean)
+ */
+ protected void documentMode(DocumentMode mode, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+ document.setUserData("nu.validator.document-mode", mode, null);
+ }
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ Document getDocument() {
+ Document rv = document;
+ document = null;
+ return rv;
+ }
+
+ /**
+ * Return the document fragment.
+ *
+ * @return the document fragment
+ */
+ DocumentFragment getDocumentFragment() {
+ DocumentFragment rv = document.createDocumentFragment();
+ Node rootElt = document.getFirstChild();
+ while (rootElt.hasChildNodes()) {
+ rv.appendChild(rootElt.getFirstChild());
+ }
+ document = null;
+ return rv;
+ }
+
+ @Override
+ protected Element createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParentNode();
+ Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent);
+
+ if (parent != null) { // always an element if not null
+ parent.insertBefore(child, table);
+ } else {
+ stackParent.appendChild(child);
+ }
+
+ return child;
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override protected void insertFosterParentedCharacters(String text,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ Node previousSibling = table.getPreviousSibling();
+ if (previousSibling != null
+ && previousSibling.getNodeType() == Node.TEXT_NODE) {
+ Text lastAsText = (Text) previousSibling;
+ lastAsText.setData(lastAsText.getData() + text);
+ return;
+ }
+ parent.insertBefore(document.createTextNode(text), table);
+ return;
+ }
+ Node lastChild = stackParent.getLastChild();
+ if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setData(lastAsText.getData() + text);
+ return;
+ }
+ stackParent.appendChild(document.createTextNode(text));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void insertFosterParentedChild(Element child,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ parent.insertBefore(child, table);
+ } else {
+ stackParent.appendChild(child);
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void detachFromParent(Element element)
+ throws SAXException {
+ try {
+ Node parent = element.getParentNode();
+ if (parent != null) {
+ parent.removeChild(element);
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java
new file mode 100644
index 000000000..5e366be7b
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.dom;
+
+import org.w3c.dom.DocumentType;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class Dom2Sax {
+
+ private static String emptyIfNull(String namespaceURI) {
+ return namespaceURI == null ? "" : namespaceURI;
+ }
+
+ private final NamedNodeMapAttributes attributes = new NamedNodeMapAttributes();
+
+ private final ContentHandler contentHandler;
+
+ private final LexicalHandler lexicalHandler;
+
+ /**
+ * @param contentHandler
+ * @param lexicalHandler
+ */
+ public Dom2Sax(ContentHandler contentHandler, LexicalHandler lexicalHandler) {
+ if (contentHandler == null) {
+ throw new IllegalArgumentException("ContentHandler must not be null.");
+ }
+ this.contentHandler = contentHandler;
+ this.lexicalHandler = lexicalHandler;
+ }
+
+ public void parse(Node node) throws SAXException {
+ Node current = node;
+ Node next;
+ char[] buf;
+ for (;;) {
+ switch (current.getNodeType()) {
+ case Node.ELEMENT_NODE:
+ attributes.setNamedNodeMap(current.getAttributes());
+ // To work around severe bogosity in the default DOM
+ // impl, use the node name if local name is null.
+ String localName = current.getLocalName();
+ contentHandler.startElement(
+ emptyIfNull(current.getNamespaceURI()),
+ localName == null ? current.getNodeName()
+ : localName, null, attributes);
+ attributes.clear();
+ break;
+ case Node.TEXT_NODE:
+ buf = current.getNodeValue().toCharArray();
+ contentHandler.characters(buf, 0, buf.length);
+ break;
+ case Node.CDATA_SECTION_NODE:
+ if (lexicalHandler != null) {
+ lexicalHandler.startCDATA();
+ }
+ buf = current.getNodeValue().toCharArray();
+ contentHandler.characters(buf, 0, buf.length);
+ if (lexicalHandler != null) {
+ lexicalHandler.endCDATA();
+ }
+ break;
+ case Node.COMMENT_NODE:
+ if (lexicalHandler != null) {
+ buf = current.getNodeValue().toCharArray();
+ lexicalHandler.comment(buf, 0, buf.length);
+ }
+ break;
+ case Node.DOCUMENT_NODE:
+ contentHandler.startDocument();
+ break;
+ case Node.DOCUMENT_TYPE_NODE:
+ if (lexicalHandler != null) {
+ DocumentType doctype = (DocumentType) current;
+ lexicalHandler.startDTD(doctype.getName(),
+ doctype.getPublicId(), doctype.getSystemId());
+ lexicalHandler.endDTD();
+ }
+ break;
+ case Node.PROCESSING_INSTRUCTION_NODE:
+ contentHandler.processingInstruction(current.getNodeName(), current.getNodeValue());
+ break;
+ case Node.ENTITY_REFERENCE_NODE:
+ contentHandler.skippedEntity(current.getNodeName());
+ break;
+ }
+ if ((next = current.getFirstChild()) != null) {
+ current = next;
+ continue;
+ }
+ for (;;) {
+ switch (current.getNodeType()) {
+ case Node.ELEMENT_NODE:
+ // To work around severe bogosity in the default DOM
+ // impl, use the node name if local name is null.
+ String localName = current.getLocalName();
+ contentHandler.endElement(
+ emptyIfNull(current.getNamespaceURI()),
+ localName == null ? current.getNodeName()
+ : localName, null);
+ break;
+ case Node.DOCUMENT_NODE:
+ contentHandler.endDocument();
+ break;
+ }
+ if (current == node) {
+ return;
+ }
+ if ((next = current.getNextSibling()) != null) {
+ current = next;
+ break;
+ }
+ current = current.getParentNode();
+ }
+ }
+ }
+
+ private class NamedNodeMapAttributes implements Attributes {
+
+ private NamedNodeMap map;
+
+ private int length;
+
+ public void setNamedNodeMap(NamedNodeMap attributes) {
+ this.map = attributes;
+ this.length = attributes.getLength();
+ }
+
+ public void clear() {
+ this.map = null;
+ }
+
+ public int getIndex(String qName) {
+ for (int i = 0; i < length; i++) {
+ Node n = map.item(i);
+ if (n.getNodeName().equals(qName)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public int getIndex(String uri, String localName) {
+ for (int i = 0; i < length; i++) {
+ Node n = map.item(i);
+ if (n.getLocalName().equals(localName) && emptyIfNull(n.getNamespaceURI()).equals(uri)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public String getLocalName(int index) {
+ if (index < length && index >= 0) {
+ return map.item(index).getLocalName();
+ } else {
+ return null;
+ }
+ }
+
+ public String getQName(int index) {
+ if (index < length && index >= 0) {
+ return map.item(index).getNodeName();
+ } else {
+ return null;
+ }
+ }
+
+ public String getType(int index) {
+ if (index < length && index >= 0) {
+ return "id".equals(map.item(index).getLocalName()) ? "ID" : "CDATA";
+ } else {
+ return null;
+ }
+ }
+
+ public String getType(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public String getType(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public String getURI(int index) {
+ if (index < length && index >= 0) {
+ return emptyIfNull(map.item(index).getNamespaceURI());
+ } else {
+ return null;
+ }
+ }
+
+ public String getValue(int index) {
+ if (index < length && index >= 0) {
+ return map.item(index).getNodeValue();
+ } else {
+ return null;
+ }
+ }
+
+ public String getValue(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ public String getValue(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java
new file mode 100644
index 000000000..f4a307c9f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.dom;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the DOM
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
+ * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
+ * XML violation policy. To make the parser support non-conforming HTML fully
+ * per the HTML 5 spec while on the other hand potentially violating the SAX2
+ * API contract, set the general XML violation policy to <code>ALLOW</code>.
+ * This does not work with a standard DOM implementation.
+ * It is possible to treat XML 1.0 infoset violations as fatal by setting
+ * the general XML violation policy to <code>FATAL</code>.
+ *
+ * <p>The doctype is not represented in the tree.
+ *
+ * <p>The document mode is represented as user data <code>DocumentMode</code>
+ * object with the key <code>nu.validator.document-mode</code> on the document
+ * node.
+ *
+ * <p>The form pointer is also stored as user data with the key
+ * <code>nu.validator.form-pointer</code>.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class HtmlDocumentBuilder extends DocumentBuilder {
+
+ /**
+ * Returns the JAXP DOM implementation.
+ *
+ * @return the JAXP DOM implementation
+ */
+ private static DOMImplementation jaxpDOMImplementation() {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ factory.setNamespaceAware(true);
+ DocumentBuilder builder;
+ try {
+ builder = factory.newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ throw new RuntimeException(e);
+ }
+ return builder.getDOMImplementation();
+ }
+
+ /**
+ * The tokenizer.
+ */
+ private Driver driver;
+
+ /**
+ * The tree builder.
+ */
+ private final DOMTreeBuilder treeBuilder;
+
+ /**
+ * The DOM impl.
+ */
+ private final DOMImplementation implementation;
+
+ /**
+ * The entity resolver.
+ */
+ private EntityResolver entityResolver;
+
+ private ErrorHandler errorHandler = null;
+
+ private DocumentModeHandler documentModeHandler = null;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private boolean checkingNormalization = false;
+
+ private boolean scriptingEnabled = false;
+
+ private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
+
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata = false;
+
+ private boolean mappingLangToXmlLang = false;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
+
+ private boolean reportingDoctype = true;
+
+ private ErrorHandler treeBuilderErrorHandler = null;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private TransitionHandler transitionHandler = null;
+
+ /**
+ * Instantiates the document builder with a specific DOM
+ * implementation and XML violation policy.
+ *
+ * @param implementation
+ * the DOM implementation
+ * @param xmlPolicy the policy
+ */
+ public HtmlDocumentBuilder(DOMImplementation implementation,
+ XmlViolationPolicy xmlPolicy) {
+ this.implementation = implementation;
+ this.treeBuilder = new DOMTreeBuilder(implementation);
+ this.driver = null;
+ setXmlPolicy(xmlPolicy);
+ }
+
+ /**
+ * Instantiates the document builder with a specific DOM implementation
+ * and the infoset-altering XML violation policy.
+ *
+ * @param implementation
+ * the DOM implementation
+ */
+ public HtmlDocumentBuilder(DOMImplementation implementation) {
+ this(implementation, XmlViolationPolicy.ALTER_INFOSET);
+ }
+
+ /**
+ * Instantiates the document builder with the JAXP DOM implementation
+ * and the infoset-altering XML violation policy.
+ */
+ public HtmlDocumentBuilder() {
+ this(XmlViolationPolicy.ALTER_INFOSET);
+ }
+
+ /**
+ * Instantiates the document builder with the JAXP DOM implementation
+ * and a specific XML violation policy.
+ * @param xmlPolicy the policy
+ */
+ public HtmlDocumentBuilder(XmlViolationPolicy xmlPolicy) {
+ this(jaxpDOMImplementation(), xmlPolicy);
+ }
+
+
+ private Tokenizer newTokenizer(TokenHandler handler,
+ boolean newAttributesEachTime) {
+ if (errorHandler == null && transitionHandler == null
+ && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
+ return new Tokenizer(handler, newAttributesEachTime);
+ } else {
+ return new ErrorReportingTokenizer(handler, newAttributesEachTime);
+ }
+ }
+
+ /**
+ * This class wraps different tree builders depending on configuration. This
+ * method does the work of hiding this from the user of the class.
+ */
+ private void lazyInit() {
+ if (driver == null) {
+ this.driver = new Driver(newTokenizer(treeBuilder, false));
+ this.driver.setErrorHandler(errorHandler);
+ this.driver.setTransitionHandler(transitionHandler);
+ this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
+ this.driver.setCheckingNormalization(checkingNormalization);
+ this.driver.setCommentPolicy(commentPolicy);
+ this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ this.driver.setContentSpacePolicy(contentSpacePolicy);
+ this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ this.driver.setXmlnsPolicy(xmlnsPolicy);
+ this.driver.setHeuristics(heuristics);
+ for (CharacterHandler characterHandler : characterHandlers) {
+ this.driver.addCharacterHandler(characterHandler);
+ }
+ this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ this.treeBuilder.setDocumentModeHandler(documentModeHandler);
+ this.treeBuilder.setScriptingEnabled(scriptingEnabled);
+ this.treeBuilder.setReportingDoctype(reportingDoctype);
+ this.treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Tokenizes the input source.
+ *
+ * @param is the source
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ * @throws MalformedURLException if the system ID is malformed and the entity resolver is <code>null</code>
+ */
+ private void tokenize(InputSource is) throws SAXException, IOException,
+ MalformedURLException {
+ if (is == null) {
+ throw new IllegalArgumentException("Null input.");
+ }
+ if (is.getByteStream() == null && is.getCharacterStream() == null) {
+ String systemId = is.getSystemId();
+ if (systemId == null) {
+ throw new IllegalArgumentException(
+ "No byte stream, no character stream nor URI.");
+ }
+ if (entityResolver != null) {
+ is = entityResolver.resolveEntity(is.getPublicId(), systemId);
+ }
+ if (is.getByteStream() == null || is.getCharacterStream() == null) {
+ is = new InputSource();
+ is.setSystemId(systemId);
+ is.setByteStream(new URL(systemId).openStream());
+ }
+ }
+ if (driver == null) lazyInit();
+ driver.tokenize(is);
+ }
+
+ /**
+ * Returns the DOM implementation
+ * @return the DOM implementation
+ * @see javax.xml.parsers.DocumentBuilder#getDOMImplementation()
+ */
+ @Override public DOMImplementation getDOMImplementation() {
+ return implementation;
+ }
+
+ /**
+ * Returns <code>true</code>.
+ * @return <code>true</code>
+ * @see javax.xml.parsers.DocumentBuilder#isNamespaceAware()
+ */
+ @Override public boolean isNamespaceAware() {
+ return true;
+ }
+
+ /**
+ * Returns <code>false</code>
+ * @return <code>false</code>
+ * @see javax.xml.parsers.DocumentBuilder#isValidating()
+ */
+ @Override public boolean isValidating() {
+ return false;
+ }
+
+ /**
+ * For API compatibility.
+ * @see javax.xml.parsers.DocumentBuilder#newDocument()
+ */
+ @Override public Document newDocument() {
+ return implementation.createDocument(null, null, null);
+ }
+
+ /**
+ * Parses a document from a SAX <code>InputSource</code>.
+ * @param is the source
+ * @return the doc
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource)
+ */
+ @Override public Document parse(InputSource is) throws SAXException,
+ IOException {
+ treeBuilder.setFragmentContext(null);
+ tokenize(is);
+ return treeBuilder.getDocument();
+ }
+
+ /**
+ * Parses a document fragment from a SAX <code>InputSource</code> with
+ * an HTML element as the fragment context.
+ * @param is the source
+ * @param context the context element name (HTML namespace assumed)
+ * @return the document fragment
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ */
+ public DocumentFragment parseFragment(InputSource is, String context)
+ throws IOException, SAXException {
+ treeBuilder.setFragmentContext(context.intern());
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Parses a document fragment from a SAX <code>InputSource</code>.
+ * @param is the source
+ * @param contextLocal the local name of the context element
+ * @param contextNamespace the namespace of the context element
+ * @return the document fragment
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ */
+ public DocumentFragment parseFragment(InputSource is, String contextLocal,
+ String contextNamespace) throws IOException, SAXException {
+ treeBuilder.setFragmentContext(contextLocal.intern(),
+ contextNamespace.intern(), null, false);
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Sets the entity resolver for URI-only inputs.
+ * @param resolver the resolver
+ * @see javax.xml.parsers.DocumentBuilder#setEntityResolver(org.xml.sax.EntityResolver)
+ */
+ @Override public void setEntityResolver(EntityResolver resolver) {
+ this.entityResolver = resolver;
+ }
+
+ /**
+ * Sets the error handler.
+ * @param errorHandler the handler
+ * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ @Override public void setErrorHandler(ErrorHandler errorHandler) {
+ treeBuilder.setErrorHandler(errorHandler);
+ if (driver != null) {
+ driver.setErrorHandler(errorHandler);
+ }
+ }
+
+ public void setTransitionHander(TransitionHandler handler) {
+ transitionHandler = handler;
+ driver = null;
+ }
+
+ /**
+ * Indicates whether NFC normalization of source is being checked.
+ * @return <code>true</code> if NFC normalization of source is being checked.
+ * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
+ */
+ public boolean isCheckingNormalization() {
+ return checkingNormalization;
+ }
+
+ /**
+ * Toggles the checking of the NFC normalization of source.
+ * @param enable <code>true</code> to check normalization
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
+ */
+ public void setCheckingNormalization(boolean enable) {
+ this.checkingNormalization = enable;
+ if (driver != null) {
+ driver.setCheckingNormalization(checkingNormalization);
+ }
+ }
+
+ /**
+ * Sets the policy for consecutive hyphens in comments.
+ * @param commentPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ if (driver != null) {
+ driver.setCommentPolicy(commentPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML characters except white space.
+ * @param contentNonXmlCharPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ driver = null;
+ }
+
+ /**
+ * Sets the policy for non-XML white space.
+ * @param contentSpacePolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ if (driver != null) {
+ driver.setContentSpacePolicy(contentSpacePolicy);
+ }
+ }
+
+ /**
+ * Whether the parser considers scripting to be enabled for noscript treatment.
+ *
+ * @return <code>true</code> if enabled
+ * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ if (treeBuilder != null) {
+ treeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+ }
+
+ /**
+ * Returns the doctype expectation.
+ *
+ * @return the doctypeExpectation
+ */
+ public DoctypeExpectation getDoctypeExpectation() {
+ return doctypeExpectation;
+ }
+
+ /**
+ * Sets the doctype expectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ if (treeBuilder != null) {
+ treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ }
+ }
+
+ /**
+ * Returns the document mode handler.
+ *
+ * @return the documentModeHandler
+ */
+ public DocumentModeHandler getDocumentModeHandler() {
+ return documentModeHandler;
+ }
+
+ /**
+ * Sets the document mode handler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Returns the streamabilityViolationPolicy.
+ *
+ * @return the streamabilityViolationPolicy
+ */
+ public XmlViolationPolicy getStreamabilityViolationPolicy() {
+ return streamabilityViolationPolicy;
+ }
+
+ /**
+ * Sets the streamabilityViolationPolicy.
+ *
+ * @param streamabilityViolationPolicy
+ * the streamabilityViolationPolicy to set
+ */
+ public void setStreamabilityViolationPolicy(
+ XmlViolationPolicy streamabilityViolationPolicy) {
+ this.streamabilityViolationPolicy = streamabilityViolationPolicy;
+ driver = null;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ if (driver != null) {
+ driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+ }
+
+ /**
+ * Returns the <code>Locator</code> during parse.
+ * @return the <code>Locator</code>
+ */
+ public Locator getDocumentLocator() {
+ return driver.getDocumentLocator();
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ *
+ * @return the html4ModeCompatibleWithXhtml1Schemata
+ */
+ public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
+ return html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang;
+ if (driver != null) {
+ driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang;
+ }
+
+ /**
+ * Whether the <code>xmlns</code> attribute on the root element is
+ * passed to through. (FATAL not allowed.)
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ if (driver != null) {
+ driver.setXmlnsPolicy(xmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the xmlnsPolicy.
+ *
+ * @return the xmlnsPolicy
+ */
+ public XmlViolationPolicy getXmlnsPolicy() {
+ return xmlnsPolicy;
+ }
+
+ /**
+ * Returns the commentPolicy.
+ *
+ * @return the commentPolicy
+ */
+ public XmlViolationPolicy getCommentPolicy() {
+ return commentPolicy;
+ }
+
+ /**
+ * Returns the contentNonXmlCharPolicy.
+ *
+ * @return the contentNonXmlCharPolicy
+ */
+ public XmlViolationPolicy getContentNonXmlCharPolicy() {
+ return contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Returns the contentSpacePolicy.
+ *
+ * @return the contentSpacePolicy
+ */
+ public XmlViolationPolicy getContentSpacePolicy() {
+ return contentSpacePolicy;
+ }
+
+ /**
+ * @param reportingDoctype
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ if (treeBuilder != null) {
+ treeBuilder.setReportingDoctype(reportingDoctype);
+ }
+ }
+
+ /**
+ * Returns the reportingDoctype.
+ *
+ * @return the reportingDoctype
+ */
+ public boolean isReportingDoctype() {
+ return reportingDoctype;
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ if (driver != null) {
+ driver.setNamePolicy(namePolicy);
+ treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics the heuristics to set
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ if (driver != null) {
+ driver.setHeuristics(heuristics);
+ }
+ }
+
+ public Heuristics getHeuristics() {
+ return this.heuristics;
+ }
+
+ /**
+ * This is a catch-all convenience method for setting name, xmlns, content space,
+ * content non-XML char and comment policies in one go. This does not affect the
+ * streamability policy or doctype reporting.
+ *
+ * @param xmlPolicy
+ */
+ public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
+ setNamePolicy(xmlPolicy);
+ setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
+ setContentSpacePolicy(xmlPolicy);
+ setContentNonXmlCharPolicy(xmlPolicy);
+ setCommentPolicy(xmlPolicy);
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ *
+ * @return the namePolicy
+ */
+ public XmlViolationPolicy getNamePolicy() {
+ return namePolicy;
+ }
+
+ /**
+ * Does nothing.
+ * @deprecated
+ */
+ public void setBogusXmlnsPolicy(
+ XmlViolationPolicy bogusXmlnsPolicy) {
+ }
+
+ /**
+ * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
+ * @deprecated
+ * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
+ */
+ public XmlViolationPolicy getBogusXmlnsPolicy() {
+ return XmlViolationPolicy.ALTER_INFOSET;
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ this.characterHandlers.add(characterHandler);
+ if (driver != null) {
+ driver.addCharacterHandler(characterHandler);
+ }
+ }
+
+
+ /**
+ * Sets whether comment nodes appear in the tree.
+ * @param ignoreComments <code>true</code> to ignore comments
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
+ */
+ public void setIgnoringComments(boolean ignoreComments) {
+ treeBuilder.setIgnoringComments(ignoreComments);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html
new file mode 100644
index 000000000..d793bcf86
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides an HTML5 parser that exposes the document using the DOM API.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java
new file mode 100644
index 000000000..a75750398
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.extra;
+
+import java.io.IOException;
+import java.nio.charset.UnsupportedCharsetException;
+
+import nu.validator.htmlparser.io.Encoding;
+
+import org.mozilla.intl.chardet.nsDetector;
+import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
+import org.mozilla.intl.chardet.nsPSMDetector;
+
+import com.ibm.icu.text.CharsetDetector;
+
+public class ChardetSniffer implements nsICharsetDetectionObserver {
+
+ private final byte[] source;
+
+ private final int length;
+
+ private Encoding returnValue = null;
+
+ /**
+ * @param source
+ */
+ public ChardetSniffer(final byte[] source, final int length) {
+ this.source = source;
+ this.length = length;
+ }
+
+ public Encoding sniff() throws IOException {
+ nsDetector detector = new nsDetector(nsPSMDetector.ALL);
+ detector.Init(this);
+ detector.DoIt(source, length, false);
+ detector.DataEnd();
+ if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) {
+ return returnValue;
+ } else {
+ return null;
+ }
+ }
+
+ public static void main(String[] args) {
+ String[] detectable = CharsetDetector.getAllDetectableCharsets();
+ for (int i = 0; i < detectable.length; i++) {
+ String charset = detectable[i];
+ System.out.println(charset);
+ }
+ }
+
+ public void Notify(String charsetName) {
+ try {
+ Encoding enc = Encoding.forName(charsetName);
+ Encoding actual = enc.getActualHtmlEncoding();
+ if (actual != null) {
+ enc = actual;
+ }
+ returnValue = enc;
+ } catch (UnsupportedCharsetException e) {
+ returnValue = null;
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java
new file mode 100644
index 000000000..f3caab5c4
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.extra;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import nu.validator.htmlparser.common.ByteReadable;
+import nu.validator.htmlparser.io.Encoding;
+
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
+
+public class IcuDetectorSniffer extends InputStream {
+
+ private final ByteReadable source;
+
+ /**
+ * @param source
+ */
+ public IcuDetectorSniffer(final ByteReadable source) {
+ this.source = source;
+ }
+
+ @Override
+ public int read() throws IOException {
+ return source.readByte();
+ }
+
+ public Encoding sniff() throws IOException {
+ try {
+ CharsetDetector detector = new CharsetDetector();
+ detector.setText(this);
+ CharsetMatch match = detector.detect();
+ Encoding enc = Encoding.forName(match.getName());
+ Encoding actual = enc.getActualHtmlEncoding();
+ if (actual != null) {
+ enc = actual;
+ }
+ if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) {
+ return enc;
+ } else {
+ return null;
+ }
+ } catch (Exception e) {
+ return null;
+ }
+ }
+
+ public static void main(String[] args) {
+ String[] detectable = CharsetDetector.getAllDetectableCharsets();
+ for (int i = 0; i < detectable.length; i++) {
+ String charset = detectable[i];
+ System.out.println(charset);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java
new file mode 100644
index 000000000..45df62fb7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2006, 2007 Henri Sivonen
+ * Copyright (c) 2007 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.extra;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class NormalizationChecker implements CharacterHandler {
+
+ private ErrorHandler errorHandler;
+
+ private Locator locator;
+
+ /**
+ * A thread-safe set of composing characters as per Charmod Norm.
+ */
+ @SuppressWarnings("deprecation")
+ private static final UnicodeSet COMPOSING_CHARACTERS = (UnicodeSet) new UnicodeSet(
+ "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze();
+
+ // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908
+
+ /**
+ * A buffer for holding sequences overlap the SAX buffer boundary.
+ */
+ private char[] buf = new char[128];
+
+ /**
+ * A holder for the original buffer (for the memory leak prevention
+ * mechanism).
+ */
+ private char[] bufHolder = null;
+
+ /**
+ * The current used length of the buffer, i.e. the index of the first slot
+ * that does not hold current data.
+ */
+ private int pos;
+
+ /**
+ * Indicates whether the checker the next call to <code>characters()</code>
+ * is the first call in a run.
+ */
+ private boolean atStartOfRun;
+
+ /**
+ * Indicates whether the current run has already caused an error.
+ */
+ private boolean alreadyComplainedAboutThisRun;
+
+ /**
+ * Emit an error. The locator is used.
+ *
+ * @param message the error message
+ * @throws SAXException if something goes wrong
+ */
+ public void err(String message) throws SAXException {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, locator);
+ errorHandler.error(spe);
+ }
+ }
+
+ /**
+ * Returns <code>true</code> if the argument is a composing BMP character
+ * or a surrogate and <code>false</code> otherwise.
+ *
+ * @param c a UTF-16 code unit
+ * @return <code>true</code> if the argument is a composing BMP character
+ * or a surrogate and <code>false</code> otherwise
+ */
+ private static boolean isComposingCharOrSurrogate(char c) {
+ if (UCharacter.isHighSurrogate(c) || UCharacter.isLowSurrogate(c)) {
+ return true;
+ }
+ return isComposingChar(c);
+ }
+
+ /**
+ * Returns <code>true</code> if the argument is a composing character
+ * and <code>false</code> otherwise.
+ *
+ * @param c a Unicode code point
+ * @return <code>true</code> if the argument is a composing character
+ * <code>false</code> otherwise
+ */
+ private static boolean isComposingChar(int c) {
+ return COMPOSING_CHARACTERS.contains(c);
+ }
+
+ /**
+ * Constructor with mode selection.
+ *
+ * @param sourceTextMode whether the source text-related messages
+ * should be enabled.
+ */
+ public NormalizationChecker(Locator locator) {
+ super();
+ start();
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.CharacterHandler#start()
+ */
+ public void start() {
+ atStartOfRun = true;
+ alreadyComplainedAboutThisRun = false;
+ pos = 0;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int)
+ */
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ if (alreadyComplainedAboutThisRun) {
+ return;
+ }
+ if (atStartOfRun) {
+ char c = ch[start];
+ if (pos == 1) {
+ // there's a single high surrogate in buf
+ if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) {
+ err("Text run starts with a composing character.");
+ }
+ atStartOfRun = false;
+ } else {
+ if (length == 1 && UCharacter.isHighSurrogate(c)) {
+ buf[0] = c;
+ pos = 1;
+ return;
+ } else {
+ if (UCharacter.isHighSurrogate(c)) {
+ if (isComposingChar(UCharacter.getCodePoint(c,
+ ch[start + 1]))) {
+ err("Text run starts with a composing character.");
+ }
+ } else {
+ if (isComposingCharOrSurrogate(c)) {
+ err("Text run starts with a composing character.");
+ }
+ }
+ atStartOfRun = false;
+ }
+ }
+ }
+ int i = start;
+ int stop = start + length;
+ if (pos > 0) {
+ // there's stuff in buf
+ while (i < stop && isComposingCharOrSurrogate(ch[i])) {
+ i++;
+ }
+ appendToBuf(ch, start, i);
+ if (i == stop) {
+ return;
+ } else {
+ if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) {
+ errAboutTextRun();
+ }
+ pos = 0;
+ }
+ }
+ if (i < stop) {
+ start = i;
+ i = stop - 1;
+ while (i > start && isComposingCharOrSurrogate(ch[i])) {
+ i--;
+ }
+ if (i > start) {
+ if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) {
+ errAboutTextRun();
+ }
+ }
+ appendToBuf(ch, i, stop);
+ }
+ }
+
+ /**
+ * Emits an error stating that the current text run or the source
+ * text is not in NFC.
+ *
+ * @throws SAXException if the <code>ErrorHandler</code> throws
+ */
+ private void errAboutTextRun() throws SAXException {
+ err("Source text is not in Unicode Normalization Form C.");
+ alreadyComplainedAboutThisRun = true;
+ }
+
+ /**
+ * Appends a slice of an UTF-16 code unit array to the internal
+ * buffer.
+ *
+ * @param ch the array from which to copy
+ * @param start the index of the first element that is copied
+ * @param end the index of the first element that is not copied
+ */
+ private void appendToBuf(char[] ch, int start, int end) {
+ if (start == end) {
+ return;
+ }
+ int neededBufLen = pos + (end - start);
+ if (neededBufLen > buf.length) {
+ char[] newBuf = new char[neededBufLen];
+ System.arraycopy(buf, 0, newBuf, 0, pos);
+ if (bufHolder == null) {
+ bufHolder = buf; // keep the original around
+ }
+ buf = newBuf;
+ }
+ System.arraycopy(ch, start, buf, pos, end - start);
+ pos += (end - start);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.CharacterHandler#end()
+ */
+ public void end() throws SAXException {
+ if (!alreadyComplainedAboutThisRun
+ && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) {
+ errAboutTextRun();
+ }
+ if (bufHolder != null) {
+ // restore the original small buffer to avoid leaking
+ // memory if this checker is recycled
+ buf = bufHolder;
+ bufHolder = null;
+ }
+ }
+
+ public void setErrorHandler(ErrorHandler errorHandler) {
+ this.errorHandler = errorHandler;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java
new file mode 100644
index 000000000..48d82036c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java
@@ -0,0 +1,2475 @@
+/*
+ * Copyright (c) 2008-2011 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.util.Arrays;
+
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.annotation.NsUri;
+import nu.validator.htmlparser.annotation.Prefix;
+import nu.validator.htmlparser.annotation.QName;
+import nu.validator.htmlparser.annotation.Virtual;
+import nu.validator.htmlparser.common.Interner;
+
+public final class AttributeName
+// Uncomment to regenerate
+// implements Comparable<AttributeName>
+{
+ // [NOCPP[
+
+ public static final int NCNAME_HTML = 1;
+
+ public static final int NCNAME_FOREIGN = (1 << 1) | (1 << 2);
+
+ public static final int NCNAME_LANG = (1 << 3);
+
+ public static final int IS_XMLNS = (1 << 4);
+
+ public static final int CASE_FOLDED = (1 << 5);
+
+ public static final int BOOLEAN = (1 << 6);
+
+ // ]NOCPP]
+
+ /**
+ * An array representing no namespace regardless of namespace mode (HTML,
+ * SVG, MathML, lang-mapping HTML) used.
+ */
+ static final @NoLength @NsUri String[] ALL_NO_NS = { "", "", "",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ /**
+ * An array that has no namespace for the HTML mode but the XMLNS namespace
+ * for the SVG and MathML modes.
+ */
+ private static final @NoLength @NsUri String[] XMLNS_NS = { "",
+ "http://www.w3.org/2000/xmlns/", "http://www.w3.org/2000/xmlns/",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ /**
+ * An array that has no namespace for the HTML mode but the XML namespace
+ * for the SVG and MathML modes.
+ */
+ private static final @NoLength @NsUri String[] XML_NS = { "",
+ "http://www.w3.org/XML/1998/namespace",
+ "http://www.w3.org/XML/1998/namespace",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ /**
+ * An array that has no namespace for the HTML mode but the XLink namespace
+ * for the SVG and MathML modes.
+ */
+ private static final @NoLength @NsUri String[] XLINK_NS = { "",
+ "http://www.w3.org/1999/xlink", "http://www.w3.org/1999/xlink",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ // [NOCPP[
+ /**
+ * An array that has no namespace for the HTML, SVG and MathML modes but has
+ * the XML namespace for the lang-mapping HTML mode.
+ */
+ private static final @NoLength @NsUri String[] LANG_NS = { "", "", "",
+ "http://www.w3.org/XML/1998/namespace" };
+
+ // ]NOCPP]
+
+ /**
+ * An array for no prefixes in any mode.
+ */
+ static final @NoLength @Prefix String[] ALL_NO_PREFIX = { null, null, null,
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ /**
+ * An array for no prefixe in the HTML mode and the <code>xmlns</code>
+ * prefix in the SVG and MathML modes.
+ */
+ private static final @NoLength @Prefix String[] XMLNS_PREFIX = { null,
+ "xmlns", "xmlns",
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ /**
+ * An array for no prefixe in the HTML mode and the <code>xlink</code>
+ * prefix in the SVG and MathML modes.
+ */
+ private static final @NoLength @Prefix String[] XLINK_PREFIX = { null,
+ "xlink", "xlink",
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ /**
+ * An array for no prefixe in the HTML mode and the <code>xml</code> prefix
+ * in the SVG and MathML modes.
+ */
+ private static final @NoLength @Prefix String[] XML_PREFIX = { null, "xml",
+ "xml",
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ // [NOCPP[
+
+ private static final @NoLength @Prefix String[] LANG_PREFIX = { null, null,
+ null, "xml" };
+
+ private static @QName String[] COMPUTE_QNAME(String[] local, String[] prefix) {
+ @QName String[] arr = new String[4];
+ for (int i = 0; i < arr.length; i++) {
+ if (prefix[i] == null) {
+ arr[i] = local[i];
+ } else {
+ arr[i] = (prefix[i] + ':' + local[i]).intern();
+ }
+ }
+ return arr;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * An initialization helper for having a one name in the SVG mode and
+ * another name in the other modes.
+ *
+ * @param name
+ * the name for the non-SVG modes
+ * @param camel
+ * the name for the SVG mode
+ * @return the initialized name array
+ */
+ private static @NoLength @Local String[] SVG_DIFFERENT(@Local String name,
+ @Local String camel) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = name;
+ arr[2] = camel;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * An initialization helper for having a one name in the MathML mode and
+ * another name in the other modes.
+ *
+ * @param name
+ * the name for the non-MathML modes
+ * @param camel
+ * the name for the MathML mode
+ * @return the initialized name array
+ */
+ private static @NoLength @Local String[] MATH_DIFFERENT(@Local String name,
+ @Local String camel) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = camel;
+ arr[2] = name;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * An initialization helper for having a different local name in the HTML
+ * mode and the SVG and MathML modes.
+ *
+ * @param name
+ * the name for the HTML mode
+ * @param suffix
+ * the name for the SVG and MathML modes
+ * @return the initialized name array
+ */
+ private static @NoLength @Local String[] COLONIFIED_LOCAL(
+ @Local String name, @Local String suffix) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = suffix;
+ arr[2] = suffix;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * An initialization helper for having the same local name in all modes.
+ *
+ * @param name
+ * the name
+ * @return the initialized name array
+ */
+ static @NoLength @Local String[] SAME_LOCAL(@Local String name) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = name;
+ arr[2] = name;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * Returns an attribute name by buffer.
+ *
+ * <p>
+ * C++ ownership: The return value is either released by the caller if the
+ * attribute is a duplicate or the ownership is transferred to
+ * HtmlAttributes and released upon clearing or destroying that object.
+ *
+ * @param buf
+ * the buffer
+ * @param offset
+ * ignored
+ * @param length
+ * length of data
+ * @param checkNcName
+ * whether to check ncnameness
+ * @return an <code>AttributeName</code> corresponding to the argument data
+ */
+ static AttributeName nameByBuffer(@NoLength char[] buf, int offset,
+ int length
+ // [NOCPP[
+ , boolean checkNcName
+ // ]NOCPP]
+ , Interner interner) {
+ // XXX deal with offset
+ int hash = AttributeName.bufToHash(buf, length);
+ int index = Arrays.binarySearch(AttributeName.ATTRIBUTE_HASHES, hash);
+ if (index < 0) {
+ return AttributeName.createAttributeName(
+ Portability.newLocalNameFromBuffer(buf, offset, length,
+ interner)
+ // [NOCPP[
+ , checkNcName
+ // ]NOCPP]
+ );
+ } else {
+ AttributeName attributeName = AttributeName.ATTRIBUTE_NAMES[index];
+ @Local String name = attributeName.getLocal(AttributeName.HTML);
+ if (!Portability.localEqualsBuffer(name, buf, offset, length)) {
+ return AttributeName.createAttributeName(
+ Portability.newLocalNameFromBuffer(buf, offset, length,
+ interner)
+ // [NOCPP[
+ , checkNcName
+ // ]NOCPP]
+ );
+ }
+ return attributeName;
+ }
+ }
+
+ /**
+ * This method has to return a unique integer for each well-known
+ * lower-cased attribute name.
+ *
+ * @param buf
+ * @param len
+ * @return
+ */
+ private static int bufToHash(@NoLength char[] buf, int len) {
+ int hash2 = 0;
+ int hash = len;
+ hash <<= 5;
+ hash += buf[0] - 0x60;
+ int j = len;
+ for (int i = 0; i < 4 && j > 0; i++) {
+ j--;
+ hash <<= 5;
+ hash += buf[j] - 0x60;
+ hash2 <<= 6;
+ hash2 += buf[i] - 0x5F;
+ }
+ return hash ^ hash2;
+ }
+
+ /**
+ * The mode value for HTML.
+ */
+ public static final int HTML = 0;
+
+ /**
+ * The mode value for MathML.
+ */
+ public static final int MATHML = 1;
+
+ /**
+ * The mode value for SVG.
+ */
+ public static final int SVG = 2;
+
+ // [NOCPP[
+
+ /**
+ * The mode value for lang-mapping HTML.
+ */
+ public static final int HTML_LANG = 3;
+
+ // ]NOCPP]
+
+ /**
+ * The namespaces indexable by mode.
+ */
+ private final @NsUri @NoLength String[] uri;
+
+ /**
+ * The local names indexable by mode.
+ */
+ private final @Local @NoLength String[] local;
+
+ /**
+ * The prefixes indexably by mode.
+ */
+ private final @Prefix @NoLength String[] prefix;
+
+ // [NOCPP[
+
+ private final int flags;
+
+ /**
+ * The qnames indexable by mode.
+ */
+ private final @QName @NoLength String[] qName;
+
+ // ]NOCPP]
+
+ /**
+ * The startup-time constructor.
+ *
+ * @param uri
+ * the namespace
+ * @param local
+ * the local name
+ * @param prefix
+ * the prefix
+ * @param ncname
+ * the ncnameness
+ * @param xmlns
+ * whether this is an xmlns attribute
+ */
+ protected AttributeName(@NsUri @NoLength String[] uri,
+ @Local @NoLength String[] local, @Prefix @NoLength String[] prefix
+ // [NOCPP[
+ , int flags
+ // ]NOCPP]
+ ) {
+ this.uri = uri;
+ this.local = local;
+ this.prefix = prefix;
+ // [NOCPP[
+ this.qName = COMPUTE_QNAME(local, prefix);
+ this.flags = flags;
+ // ]NOCPP]
+ }
+
+ /**
+ * Creates an <code>AttributeName</code> for a local name.
+ *
+ * @param name
+ * the name
+ * @param checkNcName
+ * whether to check ncnameness
+ * @return an <code>AttributeName</code>
+ */
+ private static AttributeName createAttributeName(@Local String name
+ // [NOCPP[
+ , boolean checkNcName
+ // ]NOCPP]
+ ) {
+ // [NOCPP[
+ int flags = NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG;
+ if (name.startsWith("xmlns:")) {
+ flags = IS_XMLNS;
+ } else if (checkNcName && !NCName.isNCName(name)) {
+ flags = 0;
+ }
+ // ]NOCPP]
+ return new AttributeName(AttributeName.ALL_NO_NS,
+ AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX, flags);
+ }
+
+ /**
+ * Deletes runtime-allocated instances in C++.
+ */
+ @Virtual void release() {
+ // No-op in Java.
+ // Implement as |delete this;| in subclass.
+ }
+
+ /**
+ * The C++ destructor.
+ */
+ @SuppressWarnings("unused") @Virtual private void destructor() {
+ Portability.deleteArray(local);
+ }
+
+ /**
+ * Clones the attribute using an interner. Returns <code>this</code> in Java
+ * and for non-dynamic instances in C++.
+ *
+ * @param interner
+ * an interner
+ * @return a clone
+ */
+ @Virtual public AttributeName cloneAttributeName(Interner interner) {
+ return this;
+ }
+
+ // [NOCPP[
+ /**
+ * Creator for use when the XML violation policy requires an attribute name
+ * to be changed.
+ *
+ * @param name
+ * the name of the attribute to create
+ */
+ static AttributeName create(@Local String name) {
+ return new AttributeName(AttributeName.ALL_NO_NS,
+ AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX,
+ NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ }
+
+ /**
+ * Queries whether this name is an XML 1.0 4th ed. NCName.
+ *
+ * @param mode
+ * the SVG/MathML/HTML mode
+ * @return <code>true</code> if this is an NCName in the given mode
+ */
+ public boolean isNcName(int mode) {
+ return (flags & (1 << mode)) != 0;
+ }
+
+ /**
+ * Queries whether this is an <code>xmlns</code> attribute.
+ *
+ * @return <code>true</code> if this is an <code>xmlns</code> attribute
+ */
+ public boolean isXmlns() {
+ return (flags & IS_XMLNS) != 0;
+ }
+
+ /**
+ * Queries whether this attribute has a case-folded value in the HTML4 mode
+ * of the parser.
+ *
+ * @return <code>true</code> if the value is case-folded
+ */
+ boolean isCaseFolded() {
+ return (flags & CASE_FOLDED) != 0;
+ }
+
+ boolean isBoolean() {
+ return (flags & BOOLEAN) != 0;
+ }
+
+ public @QName String getQName(int mode) {
+ return qName[mode];
+ }
+
+ // ]NOCPP]
+
+ public @NsUri String getUri(int mode) {
+ return uri[mode];
+ }
+
+ public @Local String getLocal(int mode) {
+ return local[mode];
+ }
+
+ public @Prefix String getPrefix(int mode) {
+ return prefix[mode];
+ }
+
+ boolean equalsAnother(AttributeName another) {
+ return this.getLocal(AttributeName.HTML) == another.getLocal(AttributeName.HTML);
+ }
+
+ // START CODE ONLY USED FOR GENERATING CODE uncomment to regenerate
+
+// /**
+// * @see java.lang.Object#toString()
+// */
+// @Override public String toString() {
+// return "(" + formatNs() + ", " + formatLocal() + ", " + formatPrefix()
+// + ", " + formatFlags() + ")";
+// }
+//
+// private String formatFlags() {
+// StringBuilder builder = new StringBuilder();
+// if ((flags & NCNAME_HTML) != 0) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("NCNAME_HTML");
+// }
+// if ((flags & NCNAME_FOREIGN) != 0) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("NCNAME_FOREIGN");
+// }
+// if ((flags & NCNAME_LANG) != 0) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("NCNAME_LANG");
+// }
+// if (isXmlns()) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("IS_XMLNS");
+// }
+// if (isCaseFolded()) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("CASE_FOLDED");
+// }
+// if (isBoolean()) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("BOOLEAN");
+// }
+// if (builder.length() == 0) {
+// return "0";
+// }
+// return builder.toString();
+// }
+//
+// public int compareTo(AttributeName other) {
+// int thisHash = this.hash();
+// int otherHash = other.hash();
+// if (thisHash < otherHash) {
+// return -1;
+// } else if (thisHash == otherHash) {
+// return 0;
+// } else {
+// return 1;
+// }
+// }
+//
+// private String formatPrefix() {
+// if (prefix[0] == null && prefix[1] == null && prefix[2] == null
+// && prefix[3] == null) {
+// return "ALL_NO_PREFIX";
+// } else if (prefix[0] == null && prefix[1] == prefix[2]
+// && prefix[3] == null) {
+// if ("xmlns".equals(prefix[1])) {
+// return "XMLNS_PREFIX";
+// } else if ("xml".equals(prefix[1])) {
+// return "XML_PREFIX";
+// } else if ("xlink".equals(prefix[1])) {
+// return "XLINK_PREFIX";
+// } else {
+// throw new IllegalStateException();
+// }
+// } else if (prefix[0] == null && prefix[1] == null && prefix[2] == null
+// && prefix[3] == "xml") {
+// return "LANG_PREFIX";
+// } else {
+// throw new IllegalStateException();
+// }
+// }
+//
+// private String formatLocal() {
+// if (local[0] == local[1] && local[0] == local[3]
+// && local[0] != local[2]) {
+// return "SVG_DIFFERENT(\"" + local[0] + "\", \"" + local[2] + "\")";
+// }
+// if (local[0] == local[2] && local[0] == local[3]
+// && local[0] != local[1]) {
+// return "MATH_DIFFERENT(\"" + local[0] + "\", \"" + local[1] + "\")";
+// }
+// if (local[0] == local[3] && local[1] == local[2]
+// && local[0] != local[1]) {
+// return "COLONIFIED_LOCAL(\"" + local[0] + "\", \"" + local[1]
+// + "\")";
+// }
+// for (int i = 1; i < local.length; i++) {
+// if (local[0] != local[i]) {
+// throw new IllegalStateException();
+// }
+// }
+// return "SAME_LOCAL(\"" + local[0] + "\")";
+// }
+//
+// private String formatNs() {
+// if (uri[0] == "" && uri[1] == "" && uri[2] == "" && uri[3] == "") {
+// return "ALL_NO_NS";
+// } else if (uri[0] == "" && uri[1] == uri[2] && uri[3] == "") {
+// if ("http://www.w3.org/2000/xmlns/".equals(uri[1])) {
+// return "XMLNS_NS";
+// } else if ("http://www.w3.org/XML/1998/namespace".equals(uri[1])) {
+// return "XML_NS";
+// } else if ("http://www.w3.org/1999/xlink".equals(uri[1])) {
+// return "XLINK_NS";
+// } else {
+// throw new IllegalStateException();
+// }
+// } else if (uri[0] == "" && uri[1] == "" && uri[2] == ""
+// && uri[3] == "http://www.w3.org/XML/1998/namespace") {
+// return "LANG_NS";
+// } else {
+// throw new IllegalStateException();
+// }
+// }
+//
+// private String constName() {
+// String name = getLocal(HTML);
+// char[] buf = new char[name.length()];
+// for (int i = 0; i < name.length(); i++) {
+// char c = name.charAt(i);
+// if (c == '-' || c == ':') {
+// buf[i] = '_';
+// } else if (c >= 'a' && c <= 'z') {
+// buf[i] = (char) (c - 0x20);
+// } else {
+// buf[i] = c;
+// }
+// }
+// return new String(buf);
+// }
+//
+// private int hash() {
+// String name = getLocal(HTML);
+// return bufToHash(name.toCharArray(), name.length());
+// }
+//
+// /**
+// * Regenerate self
+// *
+// * @param args
+// */
+// public static void main(String[] args) {
+// Arrays.sort(ATTRIBUTE_NAMES);
+// for (int i = 1; i < ATTRIBUTE_NAMES.length; i++) {
+// if (ATTRIBUTE_NAMES[i].hash() == ATTRIBUTE_NAMES[i - 1].hash()) {
+// System.err.println("Hash collision: "
+// + ATTRIBUTE_NAMES[i].getLocal(HTML) + ", "
+// + ATTRIBUTE_NAMES[i - 1].getLocal(HTML));
+// return;
+// }
+// }
+// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) {
+// AttributeName att = ATTRIBUTE_NAMES[i];
+// System.out.println("public static final AttributeName "
+// + att.constName() + " = new AttributeName" + att.toString()
+// + ";");
+// }
+// System.out.println("private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = {");
+// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) {
+// AttributeName att = ATTRIBUTE_NAMES[i];
+// System.out.println(att.constName() + ",");
+// }
+// System.out.println("};");
+// System.out.println("private final static int[] ATTRIBUTE_HASHES = {");
+// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) {
+// AttributeName att = ATTRIBUTE_NAMES[i];
+// System.out.println(Integer.toString(att.hash()) + ",");
+// }
+// System.out.println("};");
+// }
+
+ // START GENERATED CODE
+ public static final AttributeName D = new AttributeName(ALL_NO_NS, SAME_LOCAL("d"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K = new AttributeName(ALL_NO_NS, SAME_LOCAL("k"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName R = new AttributeName(ALL_NO_NS, SAME_LOCAL("r"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName X = new AttributeName(ALL_NO_NS, SAME_LOCAL("x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Z = new AttributeName(ALL_NO_NS, SAME_LOCAL("z"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BY = new AttributeName(ALL_NO_NS, SAME_LOCAL("by"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CX = new AttributeName(ALL_NO_NS, SAME_LOCAL("cx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CY = new AttributeName(ALL_NO_NS, SAME_LOCAL("cy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DX = new AttributeName(ALL_NO_NS, SAME_LOCAL("dx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DY = new AttributeName(ALL_NO_NS, SAME_LOCAL("dy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName G2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName G1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FX = new AttributeName(ALL_NO_NS, SAME_LOCAL("fx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K4 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k4"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K3 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k3"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ID = new AttributeName(ALL_NO_NS, SAME_LOCAL("id"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IN = new AttributeName(ALL_NO_NS, SAME_LOCAL("in"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName U2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName U1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RX = new AttributeName(ALL_NO_NS, SAME_LOCAL("rx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RY = new AttributeName(ALL_NO_NS, SAME_LOCAL("ry"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TO = new AttributeName(ALL_NO_NS, SAME_LOCAL("to"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Y2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Y1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName X1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName X2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dir"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName DUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName END = new AttributeName(ALL_NO_NS, SAME_LOCAL("end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("for"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IN2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("in2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("low"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REL = new AttributeName(ALL_NO_NS, SAME_LOCAL("rel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REV = new AttributeName(ALL_NO_NS, SAME_LOCAL("rev"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("src"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AXIS = new AttributeName(ALL_NO_NS, SAME_LOCAL("axis"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ABBR = new AttributeName(ALL_NO_NS, SAME_LOCAL("abbr"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("bbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CITE = new AttributeName(ALL_NO_NS, SAME_LOCAL("cite"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("code"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BIAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("bias"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("cols"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIP = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("char"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("base"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EDGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("edge"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATA = new AttributeName(ALL_NO_NS, SAME_LOCAL("data"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILL = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FROM = new AttributeName(ALL_NO_NS, SAME_LOCAL("from"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("face"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HIGH = new AttributeName(ALL_NO_NS, SAME_LOCAL("high"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("href"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("open"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ICON = new AttributeName(ALL_NO_NS, SAME_LOCAL("icon"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MASK = new AttributeName(ALL_NO_NS, SAME_LOCAL("mask"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("link"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LANG = new AttributeName(LANG_NS, SAME_LOCAL("lang"), LANG_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LOOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("loop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LIST = new AttributeName(ALL_NO_NS, SAME_LOCAL("list"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("type"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName WHEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("when"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("wrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("text"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PING = new AttributeName(ALL_NO_NS, SAME_LOCAL("ping"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REFX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refx", "refX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REFY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refy", "refY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("seed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("rows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("span"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STEP = new AttributeName(ALL_NO_NS, SAME_LOCAL("step"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName ROLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("role"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("xref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ASYNC = new AttributeName(ALL_NO_NS, SAME_LOCAL("async"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ALINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("alink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("align"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName CLOSE = new AttributeName(ALL_NO_NS, SAME_LOCAL("close"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLASS = new AttributeName(ALL_NO_NS, SAME_LOCAL("class"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLEAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("clear"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName BEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("begin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEPTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("depth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEFER = new AttributeName(ALL_NO_NS, SAME_LOCAL("defer"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName FENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FRAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("frame"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName ISMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ismap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ONEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("index"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("order"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OTHER = new AttributeName(ALL_NO_NS, SAME_LOCAL("other"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NARGS = new AttributeName(ALL_NO_NS, SAME_LOCAL("nargs"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MEDIA = new AttributeName(ALL_NO_NS, SAME_LOCAL("media"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LABEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("label"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LOCAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("local"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TITLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("title"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VLINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("vlink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALUE = new AttributeName(ALL_NO_NS, SAME_LOCAL("value"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SLOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("slope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SHAPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName SCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName SCALE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("speed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RULES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rules"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName STEMH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemh"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SIZES = new AttributeName(ALL_NO_NS, SAME_LOCAL("sizes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STEMV = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName START = new AttributeName(ALL_NO_NS, SAME_LOCAL("start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XMLNS = new AttributeName(XMLNS_NS, SAME_LOCAL("xmlns"), ALL_NO_PREFIX, IS_XMLNS);
+ public static final AttributeName ACCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ASCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("ascent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACTIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("active"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ALTIMG = new AttributeName(ALL_NO_NS, SAME_LOCAL("altimg"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("action"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("border"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CURSOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("cursor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COORDS = new AttributeName(ALL_NO_NS, SAME_LOCAL("coords"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("filter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FORMAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("format"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("hspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAG = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrag"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("origin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONZOOM = new AttributeName(ALL_NO_NS, SAME_LOCAL("onzoom"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONHELP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onhelp"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSTOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBLUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onblur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OBJECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("object"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("offset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORIENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("orient"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NOWRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("nowrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName NOHREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("nohref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName MACROS = new AttributeName(ALL_NO_NS, SAME_LOCAL("macros"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName METHOD = new AttributeName(ALL_NO_NS, SAME_LOCAL("method"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName LOWSRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("lowsrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName USEMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("usemap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WIDTHS = new AttributeName(ALL_NO_NS, SAME_LOCAL("widths"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TARGET = new AttributeName(ALL_NO_NS, SAME_LOCAL("target"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALUES = new AttributeName(ALL_NO_NS, SAME_LOCAL("values"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("valign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName VSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("vspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POSTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("poster"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("points"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PROMPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("prompt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SRCDOC = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcdoc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCOPED = new AttributeName(ALL_NO_NS, SAME_LOCAL("scoped"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRING = new AttributeName(ALL_NO_NS, SAME_LOCAL("string"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCHEME = new AttributeName(ALL_NO_NS, SAME_LOCAL("scheme"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RADIUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("radius"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RESULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("result"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SRCSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROTATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rotate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALTTEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alttext"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARCHIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("archive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AZIMUTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("azimuth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLOSURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("closure"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName CLASSID = new AttributeName(ALL_NO_NS, SAME_LOCAL("classid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHAROFF = new AttributeName(ALL_NO_NS, SAME_LOCAL("charoff"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BGCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("bgcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("colspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COMPACT = new AttributeName(ALL_NO_NS, SAME_LOCAL("compact"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName CONTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("content"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ENCTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("enctype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName DATASRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("datasrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATAFLD = new AttributeName(ALL_NO_NS, SAME_LOCAL("datafld"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DECLARE = new AttributeName(ALL_NO_NS, SAME_LOCAL("declare"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName DISPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("display"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIVISOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("divisor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEFAULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("default"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName DESCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("descent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KERNING = new AttributeName(ALL_NO_NS, SAME_LOCAL("kerning"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HEADERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("headers"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("onclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPTIMUM = new AttributeName(ALL_NO_NS, SAME_LOCAL("optimum"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbegin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONKEYUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeyup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONERROR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerror"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONABORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onabort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONRESET = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NOSHADE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noshade"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName MINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("minsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MAXSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LARGEOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("largeop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNICODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TARGETX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targetx", "targetX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TARGETY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targety", "targetY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VIEWBOX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewbox", "viewBox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERSION = new AttributeName(ALL_NO_NS, SAME_LOCAL("version"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERN = new AttributeName(ALL_NO_NS, SAME_LOCAL("pattern"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("restart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SANDBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("sandbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SUMMARY = new AttributeName(ALL_NO_NS, SAME_LOCAL("summary"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STANDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("standby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPLACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("replace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName AUTOPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("autoplay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ADDITIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("additive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CALCMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("calcmode", "calcMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CODETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CODEBASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codebase"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BEVELLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("bevelled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EXPONENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("exponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EDGEMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("edgemode", "edgeMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ENCODING = new AttributeName(ALL_NO_NS, SAME_LOCAL("encoding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPHREF = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("glyphref", "glyphRef"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATETIME = new AttributeName(ALL_NO_NS, SAME_LOCAL("datetime"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName FONTSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYTIMES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keytimes", "keyTimes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PANOSE_1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("panose-1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HREFLANG = new AttributeName(ALL_NO_NS, SAME_LOCAL("hreflang"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONRESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBOUNCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbounce"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFINISH = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfinish"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSCROLL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onscroll"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPERATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("operator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OVERFLOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("overflow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onsubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONREPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrepeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NOTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("notation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NORESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName MANIFEST = new AttributeName(ALL_NO_NS, SAME_LOCAL("manifest"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MULTIPLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("multiple"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName LONGDESC = new AttributeName(ALL_NO_NS, SAME_LOCAL("longdesc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LANGUAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("language"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TABINDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("tabindex"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PROPERTY = new AttributeName(ALL_NO_NS, SAME_LOCAL("property"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ROWLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEAMLESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("seamless"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRETCHY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stretchy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName XML_BASE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:base", "base"), XML_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XML_LANG = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:lang", "lang"), XML_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName X_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("x-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_OWNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-owns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AUTOFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("autofocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ARIA_SORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-sort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCESSKEY = new AttributeName(ALL_NO_NS, SAME_LOCAL("accesskey"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_BUSY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-busy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_GRAB = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-grab"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AMPLITUDE = new AttributeName(ALL_NO_NS, SAME_LOCAL("amplitude"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_LIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-live"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIP_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIP_PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EQUALROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalrows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ELEVATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("elevation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIRECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("direction"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DRAGGABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("draggable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILL_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONTSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontstyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYSYSTEM = new AttributeName(ALL_NO_NS, SAME_LOCAL("keysystem"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYPOINTS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keypoints", "keyPoints"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HIDEFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidefocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMESSAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmessage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INTERCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("intercept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOVEEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmoveend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONINVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninvalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INTEGRITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("integrity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONKEYDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeydown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFOCUSIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INPUTMODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("inputmode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWEXIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowexit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MASKUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskunits", "maskUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MAXLENGTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxlength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LINEBREAK = new AttributeName(ALL_NO_NS, SAME_LOCAL("linebreak"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TRANSFORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("transform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALUETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("valuetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName POINTSATZ = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatz", "pointsAtZ"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTSATX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatx", "pointsAtX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTSATY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsaty", "pointsAtY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SYMMETRIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("symmetric"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCROLLING = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolling"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName REPEATDUR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatdur", "repeatDur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SELECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("selection"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEPARATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("separator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XML_SPACE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:space", "space"), XML_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName AUTOSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("autosubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACTIONTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("actiontype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCUMULATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("accumulate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_LEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-level"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CAP_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("cap-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPH_NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GROUPALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("groupalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONTFAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontfamily"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONTWEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontweight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYSPLINES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keysplines", "keySplines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HTTP_EQUIV = new AttributeName(ALL_NO_NS, SAME_LOCAL("http-equiv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OCCURRENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("occurrence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IRRELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("irrelevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDBLCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondblclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragdrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONKEYPRESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeypress"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFOCUSOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NUMOCTAVES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("numoctaves", "numOctaves"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKER_MID = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-mid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKER_END = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXTLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("textlength", "textLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VISIBILITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("visibility"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VIEWTARGET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewtarget", "viewTarget"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERT_ADV_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-adv-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATHLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pathlength", "pathLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RADIOGROUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("radiogroup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STOP_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEPARATORS = new AttributeName(ALL_NO_NS, SAME_LOCAL("separators"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ZOOMANDPAN = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("zoomandpan", "zoomAndPan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XLINK_TYPE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:type", "type"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_ROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:role", "role"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_HREF = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:href", "href"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_SHOW = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:show", "show"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName ACCENTUNDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("accentunder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_SECRET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-secret"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_ATOMIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-atomic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_FLOWTO = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-flowto"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARABIC_FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("arabic-form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CELLPADDING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellpadding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CELLSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CROSSORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("crossorigin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("contextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASEPROFILE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("baseprofile", "baseProfile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_FAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-family"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FRAMEBORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("frameborder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILTERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("filterunits", "filterUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FLOOD_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_WEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-weight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HORIZ_ADV_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-adv-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGLEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousemove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORIENTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("orientation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousedown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFORECUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFORMINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onforminput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOVESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmovestart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerunits", "markerUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHVARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathvariant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARGINWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKERWIDTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerwidth", "markerWidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT_ANCHOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-anchor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TABLEVALUES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("tablevalues", "tableValues"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCRIPTLEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptlevel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEATCOUNT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatcount", "repeatCount"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STITCHTILES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stitchtiles", "stitchTiles"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STARTOFFSET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("startoffset", "startOffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCROLLDELAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolldelay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XMLNS_XLINK = new AttributeName(XMLNS_NS, COLONIFIED_LOCAL("xmlns:xlink", "xlink"), XMLNS_PREFIX, IS_XMLNS);
+ public static final AttributeName XLINK_TITLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:title", "title"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName ARIA_INVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-invalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_PRESSED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-pressed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName ARIA_SETSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-setsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_CHANNEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-channel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EQUALCOLUMNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalcolumns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DISPLAYSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("displaystyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATAFORMATAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("dataformatas"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName FILL_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_VARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-variant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_STRETCH = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-stretch"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FRAMESPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("framespacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KERNELMATRIX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelmatrix", "kernelMatrix"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWSDELETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsdelete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSELEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFORMCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onformchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCELLCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncellchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEWHEEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousewheel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONAFTERPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFORECOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARGINHEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginheight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKERHEIGHT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerheight", "markerHeight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKER_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LENGTHADJUST = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("lengthadjust", "lengthAdjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNSELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNICODE_BIDI = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-bidi"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNITS_PER_EM = new AttributeName(ALL_NO_NS, SAME_LOCAL("units-per-em"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WORD_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("word-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WRITING_MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("writing-mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERNUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patternunits", "patternUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPREADMETHOD = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("spreadmethod", "spreadMethod"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SURFACESCALE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("surfacescale", "surfaceScale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STDDEVIATION = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stddeviation", "stdDeviation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STOP_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_HASPOPUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-haspopup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCENT_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_VALUENOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuenow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_RELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-relevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_POSINSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-posinset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_VALUEMAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemax"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_EXPANDED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-expanded"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ATTRIBUTETYPE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributetype", "attributeType"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ATTRIBUTENAME = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributename", "attributeName"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DATATYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-datatype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_VALUEMIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASEFREQUENCY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("basefrequency", "baseFrequency"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIPPATHUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("clippathunits", "clipPathUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEFINITIONURL = new AttributeName(ALL_NO_NS, MATH_DIFFERENT("definitionurl", "definitionURL"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GRADIENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradientunits", "gradientUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FLOOD_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONAFTERUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONERRORUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerrorupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforepaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONLOSECAPTURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onlosecapture"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSELECTSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselectstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MOVABLELIMITS = new AttributeName(ALL_NO_NS, SAME_LOCAL("movablelimits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LINETHICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("linethickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNICODE_RANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-range"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName THINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERT_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERT_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PRESERVEALPHA = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preservealpha", "preserveAlpha"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCRIPTMINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptminsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPECIFICATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("specification"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XLINK_ACTUATE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:actuate", "actuate"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_ARCROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:arcrole", "arcrole"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName ACCEPT_CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept-charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALIGNMENTSCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignmentscope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_MULTILINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASELINE_SHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline-shift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HORIZ_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HORIZ_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFILTERCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfilterchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWSINSERTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsinserted"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHBACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathbackground"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LETTER_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("letter-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LIGHTING_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("lighting-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName THICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTER_EVENTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("pointer-events"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PRIMITIVEUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("primitiveunits", "primitiveUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REFERRERPOLICY = new AttributeName(ALL_NO_NS, SAME_LOCAL("referrerpolicy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SYSTEMLANGUAGE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("systemlanguage", "systemLanguage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_LINECAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linecap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SUBSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("subscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DROPEFFECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-dropeffect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_LABELLEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-labelledby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_TEMPLATEID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-templateid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CONTENTEDITABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("contenteditable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIFFUSECONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("diffuseconstant", "diffuseConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDATAAVAILABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondataavailable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCONTROLSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontrolselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IMAGE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("image-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MEDIUMMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mediummathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT_DECORATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-decoration"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SHAPE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_LINEJOIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linejoin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DESCRIBEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-describedby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_SIZE_ADJUST = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size-adjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KERNELUNITLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelunitlength", "kernelUnitLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONPROPERTYCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpropertychange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDATASETCHANGED = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetchanged"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MASKCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskcontentunits", "maskContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERNTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterntransform", "patternTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REQUIREDFEATURES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredfeatures", "requiredFeatures"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RENDERING_INTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rendering-intent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPECULAREXPONENT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularexponent", "specularExponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPECULARCONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularconstant", "specularConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SUPERSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("superscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_DASHARRAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dasharray"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("xchannelselector", "xChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName YCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("ychannelselector", "yChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ENABLE_BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("enable-background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DOMINANT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("dominant-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GRADIENTTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradienttransform", "gradientTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFORDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbefordeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDATASETCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetcomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OVERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREEDITFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeeditfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LIMITINGCONEANGLE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("limitingconeangle", "limitingConeAngle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_DASHOFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dashoffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_MITERLIMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-miterlimit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALIGNMENT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignment-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONREADYSTATECHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreadystatechange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OVERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNDERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REQUIREDEXTENSIONS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredextensions", "requiredExtensions"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_INTERPOLATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNDERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PRESERVEASPECTRATIO = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preserveaspectratio", "preserveAspectRatio"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERNCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterncontentunits", "patternContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_MULTISELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCRIPTSIZEMULTIPLIER = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptsizemultiplier"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_ACTIVEDESCENDANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-activedescendant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYVERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYVERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRIKETHROUGH_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRIKETHROUGH_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPH_ORIENTATION_VERTICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-vertical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_INTERPOLATION_FILTERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation-filters"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPH_ORIENTATION_HORIZONTAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-horizontal"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = {
+ D,
+ K,
+ R,
+ X,
+ Y,
+ Z,
+ BY,
+ CX,
+ CY,
+ DX,
+ DY,
+ G2,
+ G1,
+ FX,
+ FY,
+ K4,
+ K2,
+ K3,
+ K1,
+ ID,
+ IN,
+ U2,
+ U1,
+ RT,
+ RX,
+ RY,
+ TO,
+ Y2,
+ Y1,
+ X1,
+ X2,
+ ALT,
+ DIR,
+ DUR,
+ END,
+ FOR,
+ IN2,
+ MAX,
+ MIN,
+ LOW,
+ REL,
+ REV,
+ SRC,
+ AXIS,
+ ABBR,
+ BBOX,
+ CITE,
+ CODE,
+ BIAS,
+ COLS,
+ CLIP,
+ CHAR,
+ BASE,
+ EDGE,
+ DATA,
+ FILL,
+ FROM,
+ FORM,
+ FACE,
+ HIGH,
+ HREF,
+ OPEN,
+ ICON,
+ NAME,
+ MODE,
+ MASK,
+ LINK,
+ LANG,
+ LOOP,
+ LIST,
+ TYPE,
+ WHEN,
+ WRAP,
+ TEXT,
+ PATH,
+ PING,
+ REFX,
+ REFY,
+ SIZE,
+ SEED,
+ ROWS,
+ SPAN,
+ STEP,
+ ROLE,
+ XREF,
+ ASYNC,
+ ALINK,
+ ALIGN,
+ CLOSE,
+ COLOR,
+ CLASS,
+ CLEAR,
+ BEGIN,
+ DEPTH,
+ DEFER,
+ FENCE,
+ FRAME,
+ ISMAP,
+ ONEND,
+ INDEX,
+ ORDER,
+ OTHER,
+ ONCUT,
+ NARGS,
+ MEDIA,
+ LABEL,
+ LOCAL,
+ WIDTH,
+ TITLE,
+ VLINK,
+ VALUE,
+ SLOPE,
+ SHAPE,
+ SCOPE,
+ SCALE,
+ SPEED,
+ STYLE,
+ RULES,
+ STEMH,
+ SIZES,
+ STEMV,
+ START,
+ XMLNS,
+ ACCEPT,
+ ACCENT,
+ ASCENT,
+ ACTIVE,
+ ALTIMG,
+ ACTION,
+ BORDER,
+ CURSOR,
+ COORDS,
+ FILTER,
+ FORMAT,
+ HIDDEN,
+ HSPACE,
+ HEIGHT,
+ ONMOVE,
+ ONLOAD,
+ ONDRAG,
+ ORIGIN,
+ ONZOOM,
+ ONHELP,
+ ONSTOP,
+ ONDROP,
+ ONBLUR,
+ OBJECT,
+ OFFSET,
+ ORIENT,
+ ONCOPY,
+ NOWRAP,
+ NOHREF,
+ MACROS,
+ METHOD,
+ LOWSRC,
+ LSPACE,
+ LQUOTE,
+ USEMAP,
+ WIDTHS,
+ TARGET,
+ VALUES,
+ VALIGN,
+ VSPACE,
+ POSTER,
+ POINTS,
+ PROMPT,
+ SRCDOC,
+ SCOPED,
+ STRING,
+ SCHEME,
+ STROKE,
+ RADIUS,
+ RESULT,
+ REPEAT,
+ SRCSET,
+ RSPACE,
+ ROTATE,
+ RQUOTE,
+ ALTTEXT,
+ ARCHIVE,
+ AZIMUTH,
+ CLOSURE,
+ CHECKED,
+ CLASSID,
+ CHAROFF,
+ BGCOLOR,
+ COLSPAN,
+ CHARSET,
+ COMPACT,
+ CONTENT,
+ ENCTYPE,
+ DATASRC,
+ DATAFLD,
+ DECLARE,
+ DISPLAY,
+ DIVISOR,
+ DEFAULT,
+ DESCENT,
+ KERNING,
+ HANGING,
+ HEADERS,
+ ONPASTE,
+ ONCLICK,
+ OPTIMUM,
+ ONBEGIN,
+ ONKEYUP,
+ ONFOCUS,
+ ONERROR,
+ ONINPUT,
+ ONABORT,
+ ONSTART,
+ ONRESET,
+ OPACITY,
+ NOSHADE,
+ MINSIZE,
+ MAXSIZE,
+ LARGEOP,
+ UNICODE,
+ TARGETX,
+ TARGETY,
+ VIEWBOX,
+ VERSION,
+ PATTERN,
+ PROFILE,
+ SPACING,
+ RESTART,
+ ROWSPAN,
+ SANDBOX,
+ SUMMARY,
+ STANDBY,
+ REPLACE,
+ AUTOPLAY,
+ ADDITIVE,
+ CALCMODE,
+ CODETYPE,
+ CODEBASE,
+ CONTROLS,
+ BEVELLED,
+ BASELINE,
+ EXPONENT,
+ EDGEMODE,
+ ENCODING,
+ GLYPHREF,
+ DATETIME,
+ DISABLED,
+ FONTSIZE,
+ KEYTIMES,
+ PANOSE_1,
+ HREFLANG,
+ ONRESIZE,
+ ONCHANGE,
+ ONBOUNCE,
+ ONUNLOAD,
+ ONFINISH,
+ ONSCROLL,
+ OPERATOR,
+ OVERFLOW,
+ ONSUBMIT,
+ ONREPEAT,
+ ONSELECT,
+ NOTATION,
+ NORESIZE,
+ MANIFEST,
+ MATHSIZE,
+ MULTIPLE,
+ LONGDESC,
+ LANGUAGE,
+ TEMPLATE,
+ TABINDEX,
+ PROPERTY,
+ READONLY,
+ SELECTED,
+ ROWLINES,
+ SEAMLESS,
+ ROWALIGN,
+ STRETCHY,
+ REQUIRED,
+ XML_BASE,
+ XML_LANG,
+ X_HEIGHT,
+ ARIA_OWNS,
+ AUTOFOCUS,
+ ARIA_SORT,
+ ACCESSKEY,
+ ARIA_BUSY,
+ ARIA_GRAB,
+ AMPLITUDE,
+ ARIA_LIVE,
+ CLIP_RULE,
+ CLIP_PATH,
+ EQUALROWS,
+ ELEVATION,
+ DIRECTION,
+ DRAGGABLE,
+ FILL_RULE,
+ FONTSTYLE,
+ FONT_SIZE,
+ KEYSYSTEM,
+ KEYPOINTS,
+ HIDEFOCUS,
+ ONMESSAGE,
+ INTERCEPT,
+ ONDRAGEND,
+ ONMOVEEND,
+ ONINVALID,
+ INTEGRITY,
+ ONKEYDOWN,
+ ONFOCUSIN,
+ ONMOUSEUP,
+ INPUTMODE,
+ ONROWEXIT,
+ MATHCOLOR,
+ MASKUNITS,
+ MAXLENGTH,
+ LINEBREAK,
+ TRANSFORM,
+ V_HANGING,
+ VALUETYPE,
+ POINTSATZ,
+ POINTSATX,
+ POINTSATY,
+ SYMMETRIC,
+ SCROLLING,
+ REPEATDUR,
+ SELECTION,
+ SEPARATOR,
+ XML_SPACE,
+ AUTOSUBMIT,
+ ALPHABETIC,
+ ACTIONTYPE,
+ ACCUMULATE,
+ ARIA_LEVEL,
+ COLUMNSPAN,
+ CAP_HEIGHT,
+ BACKGROUND,
+ GLYPH_NAME,
+ GROUPALIGN,
+ FONTFAMILY,
+ FONTWEIGHT,
+ FONT_STYLE,
+ KEYSPLINES,
+ HTTP_EQUIV,
+ ONACTIVATE,
+ OCCURRENCE,
+ IRRELEVANT,
+ ONDBLCLICK,
+ ONDRAGDROP,
+ ONKEYPRESS,
+ ONROWENTER,
+ ONDRAGOVER,
+ ONFOCUSOUT,
+ ONMOUSEOUT,
+ NUMOCTAVES,
+ MARKER_MID,
+ MARKER_END,
+ TEXTLENGTH,
+ VISIBILITY,
+ VIEWTARGET,
+ VERT_ADV_Y,
+ PATHLENGTH,
+ REPEAT_MAX,
+ RADIOGROUP,
+ STOP_COLOR,
+ SEPARATORS,
+ REPEAT_MIN,
+ ROWSPACING,
+ ZOOMANDPAN,
+ XLINK_TYPE,
+ XLINK_ROLE,
+ XLINK_HREF,
+ XLINK_SHOW,
+ ACCENTUNDER,
+ ARIA_SECRET,
+ ARIA_ATOMIC,
+ ARIA_HIDDEN,
+ ARIA_FLOWTO,
+ ARABIC_FORM,
+ CELLPADDING,
+ CELLSPACING,
+ COLUMNWIDTH,
+ CROSSORIGIN,
+ COLUMNALIGN,
+ COLUMNLINES,
+ CONTEXTMENU,
+ BASEPROFILE,
+ FONT_FAMILY,
+ FRAMEBORDER,
+ FILTERUNITS,
+ FLOOD_COLOR,
+ FONT_WEIGHT,
+ HORIZ_ADV_X,
+ ONDRAGLEAVE,
+ ONMOUSEMOVE,
+ ORIENTATION,
+ ONMOUSEDOWN,
+ ONMOUSEOVER,
+ ONDRAGENTER,
+ IDEOGRAPHIC,
+ ONBEFORECUT,
+ ONFORMINPUT,
+ ONDRAGSTART,
+ ONMOVESTART,
+ MARKERUNITS,
+ MATHVARIANT,
+ MARGINWIDTH,
+ MARKERWIDTH,
+ TEXT_ANCHOR,
+ TABLEVALUES,
+ SCRIPTLEVEL,
+ REPEATCOUNT,
+ STITCHTILES,
+ STARTOFFSET,
+ SCROLLDELAY,
+ XMLNS_XLINK,
+ XLINK_TITLE,
+ ARIA_INVALID,
+ ARIA_PRESSED,
+ ARIA_CHECKED,
+ AUTOCOMPLETE,
+ ARIA_SETSIZE,
+ ARIA_CHANNEL,
+ EQUALCOLUMNS,
+ DISPLAYSTYLE,
+ DATAFORMATAS,
+ FILL_OPACITY,
+ FONT_VARIANT,
+ FONT_STRETCH,
+ FRAMESPACING,
+ KERNELMATRIX,
+ ONDEACTIVATE,
+ ONROWSDELETE,
+ ONMOUSELEAVE,
+ ONFORMCHANGE,
+ ONCELLCHANGE,
+ ONMOUSEWHEEL,
+ ONMOUSEENTER,
+ ONAFTERPRINT,
+ ONBEFORECOPY,
+ MARGINHEIGHT,
+ MARKERHEIGHT,
+ MARKER_START,
+ MATHEMATICAL,
+ LENGTHADJUST,
+ UNSELECTABLE,
+ UNICODE_BIDI,
+ UNITS_PER_EM,
+ WORD_SPACING,
+ WRITING_MODE,
+ V_ALPHABETIC,
+ PATTERNUNITS,
+ SPREADMETHOD,
+ SURFACESCALE,
+ STROKE_WIDTH,
+ REPEAT_START,
+ STDDEVIATION,
+ STOP_OPACITY,
+ ARIA_CONTROLS,
+ ARIA_HASPOPUP,
+ ACCENT_HEIGHT,
+ ARIA_VALUENOW,
+ ARIA_RELEVANT,
+ ARIA_POSINSET,
+ ARIA_VALUEMAX,
+ ARIA_READONLY,
+ ARIA_SELECTED,
+ ARIA_REQUIRED,
+ ARIA_EXPANDED,
+ ARIA_DISABLED,
+ ATTRIBUTETYPE,
+ ATTRIBUTENAME,
+ ARIA_DATATYPE,
+ ARIA_VALUEMIN,
+ BASEFREQUENCY,
+ COLUMNSPACING,
+ COLOR_PROFILE,
+ CLIPPATHUNITS,
+ DEFINITIONURL,
+ GRADIENTUNITS,
+ FLOOD_OPACITY,
+ ONAFTERUPDATE,
+ ONERRORUPDATE,
+ ONBEFOREPASTE,
+ ONLOSECAPTURE,
+ ONCONTEXTMENU,
+ ONSELECTSTART,
+ ONBEFOREPRINT,
+ MOVABLELIMITS,
+ LINETHICKNESS,
+ UNICODE_RANGE,
+ THINMATHSPACE,
+ VERT_ORIGIN_X,
+ VERT_ORIGIN_Y,
+ V_IDEOGRAPHIC,
+ PRESERVEALPHA,
+ SCRIPTMINSIZE,
+ SPECIFICATION,
+ XLINK_ACTUATE,
+ XLINK_ARCROLE,
+ ACCEPT_CHARSET,
+ ALIGNMENTSCOPE,
+ ARIA_MULTILINE,
+ BASELINE_SHIFT,
+ HORIZ_ORIGIN_X,
+ HORIZ_ORIGIN_Y,
+ ONBEFOREUPDATE,
+ ONFILTERCHANGE,
+ ONROWSINSERTED,
+ ONBEFOREUNLOAD,
+ MATHBACKGROUND,
+ LETTER_SPACING,
+ LIGHTING_COLOR,
+ THICKMATHSPACE,
+ TEXT_RENDERING,
+ V_MATHEMATICAL,
+ POINTER_EVENTS,
+ PRIMITIVEUNITS,
+ REFERRERPOLICY,
+ SYSTEMLANGUAGE,
+ STROKE_LINECAP,
+ SUBSCRIPTSHIFT,
+ STROKE_OPACITY,
+ ARIA_DROPEFFECT,
+ ARIA_LABELLEDBY,
+ ARIA_TEMPLATEID,
+ COLOR_RENDERING,
+ CONTENTEDITABLE,
+ DIFFUSECONSTANT,
+ ONDATAAVAILABLE,
+ ONCONTROLSELECT,
+ IMAGE_RENDERING,
+ MEDIUMMATHSPACE,
+ TEXT_DECORATION,
+ SHAPE_RENDERING,
+ STROKE_LINEJOIN,
+ REPEAT_TEMPLATE,
+ ARIA_DESCRIBEDBY,
+ FONT_SIZE_ADJUST,
+ KERNELUNITLENGTH,
+ ONBEFOREACTIVATE,
+ ONPROPERTYCHANGE,
+ ONDATASETCHANGED,
+ MASKCONTENTUNITS,
+ PATTERNTRANSFORM,
+ REQUIREDFEATURES,
+ RENDERING_INTENT,
+ SPECULAREXPONENT,
+ SPECULARCONSTANT,
+ SUPERSCRIPTSHIFT,
+ STROKE_DASHARRAY,
+ XCHANNELSELECTOR,
+ YCHANNELSELECTOR,
+ ARIA_AUTOCOMPLETE,
+ ENABLE_BACKGROUND,
+ DOMINANT_BASELINE,
+ GRADIENTTRANSFORM,
+ ONBEFORDEACTIVATE,
+ ONDATASETCOMPLETE,
+ OVERLINE_POSITION,
+ ONBEFOREEDITFOCUS,
+ LIMITINGCONEANGLE,
+ VERYTHINMATHSPACE,
+ STROKE_DASHOFFSET,
+ STROKE_MITERLIMIT,
+ ALIGNMENT_BASELINE,
+ ONREADYSTATECHANGE,
+ OVERLINE_THICKNESS,
+ UNDERLINE_POSITION,
+ VERYTHICKMATHSPACE,
+ REQUIREDEXTENSIONS,
+ COLOR_INTERPOLATION,
+ UNDERLINE_THICKNESS,
+ PRESERVEASPECTRATIO,
+ PATTERNCONTENTUNITS,
+ ARIA_MULTISELECTABLE,
+ SCRIPTSIZEMULTIPLIER,
+ ARIA_ACTIVEDESCENDANT,
+ VERYVERYTHINMATHSPACE,
+ VERYVERYTHICKMATHSPACE,
+ STRIKETHROUGH_POSITION,
+ STRIKETHROUGH_THICKNESS,
+ GLYPH_ORIENTATION_VERTICAL,
+ COLOR_INTERPOLATION_FILTERS,
+ GLYPH_ORIENTATION_HORIZONTAL,
+ };
+ private final static int[] ATTRIBUTE_HASHES = {
+ 1153,
+ 1383,
+ 1601,
+ 1793,
+ 1827,
+ 1857,
+ 68600,
+ 69146,
+ 69177,
+ 70237,
+ 70270,
+ 71572,
+ 71669,
+ 72415,
+ 72444,
+ 74846,
+ 74904,
+ 74943,
+ 75001,
+ 75276,
+ 75590,
+ 84742,
+ 84839,
+ 85575,
+ 85963,
+ 85992,
+ 87204,
+ 88074,
+ 88171,
+ 89130,
+ 89163,
+ 3207892,
+ 3283895,
+ 3284791,
+ 3338752,
+ 3358197,
+ 3369562,
+ 3539124,
+ 3562402,
+ 3574260,
+ 3670335,
+ 3696933,
+ 3721879,
+ 135280021,
+ 135346322,
+ 136317019,
+ 136475749,
+ 136548517,
+ 136652214,
+ 136884919,
+ 136902418,
+ 136942992,
+ 137292068,
+ 139120259,
+ 139785574,
+ 142250603,
+ 142314056,
+ 142331176,
+ 142519584,
+ 144752417,
+ 145106895,
+ 146147200,
+ 146765926,
+ 148805544,
+ 149655723,
+ 149809441,
+ 150018784,
+ 150445028,
+ 150813181,
+ 150923321,
+ 152528754,
+ 152536216,
+ 152647366,
+ 152962785,
+ 155219321,
+ 155654904,
+ 157317483,
+ 157350248,
+ 157437941,
+ 157447478,
+ 157604838,
+ 157685404,
+ 157894402,
+ 158315188,
+ 166078431,
+ 169409980,
+ 169700259,
+ 169856932,
+ 170007032,
+ 170409695,
+ 170466488,
+ 170513710,
+ 170608367,
+ 173028944,
+ 173896963,
+ 176090625,
+ 176129212,
+ 179390001,
+ 179489057,
+ 179627464,
+ 179840468,
+ 179849042,
+ 180004216,
+ 181779081,
+ 183027151,
+ 183645319,
+ 183698797,
+ 185922012,
+ 185997252,
+ 188312483,
+ 188675799,
+ 190977533,
+ 190992569,
+ 191006194,
+ 191033518,
+ 191038774,
+ 191096249,
+ 191166163,
+ 191194426,
+ 191443343,
+ 191522106,
+ 191568039,
+ 200104642,
+ 202506661,
+ 202537381,
+ 202602917,
+ 203070590,
+ 203120766,
+ 203389054,
+ 203690071,
+ 203971238,
+ 203986524,
+ 209040857,
+ 209125756,
+ 212055489,
+ 212322418,
+ 212746849,
+ 213002877,
+ 213055164,
+ 213088023,
+ 213259873,
+ 213273386,
+ 213435118,
+ 213437318,
+ 213438231,
+ 213493071,
+ 213532268,
+ 213542834,
+ 213584431,
+ 213659891,
+ 215285828,
+ 215880731,
+ 216112976,
+ 216684637,
+ 217369699,
+ 217565298,
+ 217576549,
+ 218186795,
+ 219743185,
+ 220082234,
+ 221623802,
+ 221986406,
+ 222283890,
+ 223089542,
+ 223138630,
+ 223311265,
+ 224431494,
+ 224547358,
+ 224587256,
+ 224589550,
+ 224655650,
+ 224785518,
+ 224810917,
+ 224813302,
+ 225126263,
+ 225429618,
+ 225432950,
+ 225440869,
+ 236107233,
+ 236709921,
+ 236838947,
+ 237117095,
+ 237143271,
+ 237172455,
+ 237209953,
+ 237354143,
+ 237372743,
+ 237668065,
+ 237703073,
+ 237714273,
+ 239743521,
+ 240512803,
+ 240522627,
+ 240560417,
+ 240656513,
+ 241015715,
+ 241062755,
+ 241065383,
+ 243523041,
+ 245865199,
+ 246261793,
+ 246556195,
+ 246774817,
+ 246923491,
+ 246928419,
+ 246981667,
+ 247014847,
+ 247058369,
+ 247112833,
+ 247118177,
+ 247119137,
+ 247128739,
+ 247316903,
+ 249533729,
+ 250235623,
+ 250269543,
+ 251402351,
+ 252339047,
+ 253260911,
+ 253293679,
+ 254844367,
+ 255547879,
+ 256077281,
+ 256345377,
+ 258124199,
+ 258354465,
+ 258605063,
+ 258744193,
+ 258845603,
+ 258856961,
+ 258926689,
+ 269869248,
+ 270174334,
+ 270709417,
+ 270778994,
+ 270781796,
+ 271102503,
+ 271478858,
+ 271490090,
+ 272870654,
+ 273335275,
+ 273369140,
+ 273924313,
+ 274108530,
+ 274116736,
+ 276818662,
+ 277476156,
+ 279156579,
+ 279349675,
+ 280108533,
+ 280128712,
+ 280132869,
+ 280162403,
+ 280280292,
+ 280413430,
+ 280506130,
+ 280677397,
+ 280678580,
+ 280686710,
+ 280689066,
+ 282736758,
+ 283110901,
+ 283275116,
+ 283823226,
+ 283890012,
+ 284479340,
+ 284606461,
+ 286700477,
+ 286798916,
+ 290055764,
+ 291557706,
+ 291665349,
+ 291804100,
+ 292138018,
+ 292166446,
+ 292418738,
+ 292451039,
+ 300298041,
+ 300374839,
+ 300597935,
+ 303073389,
+ 303083839,
+ 303266673,
+ 303354997,
+ 303430688,
+ 303576261,
+ 303724281,
+ 303819694,
+ 304242723,
+ 304382625,
+ 306247792,
+ 307227811,
+ 307468786,
+ 307724489,
+ 310252031,
+ 310358241,
+ 310373094,
+ 310833159,
+ 311015256,
+ 313357609,
+ 313683893,
+ 313701861,
+ 313706996,
+ 313707317,
+ 313710350,
+ 313795700,
+ 314027746,
+ 314038181,
+ 314091299,
+ 314205627,
+ 314233813,
+ 316741830,
+ 316797986,
+ 317486755,
+ 317794164,
+ 320076137,
+ 322657125,
+ 322887778,
+ 323506876,
+ 323572412,
+ 323605180,
+ 325060058,
+ 325320188,
+ 325398738,
+ 325541490,
+ 325671619,
+ 333868843,
+ 336806130,
+ 337212108,
+ 337282686,
+ 337285434,
+ 337585223,
+ 338036037,
+ 338298087,
+ 338566051,
+ 340943551,
+ 341190970,
+ 342995704,
+ 343352124,
+ 343912673,
+ 344585053,
+ 346977248,
+ 347218098,
+ 347262163,
+ 347278576,
+ 347438191,
+ 347655959,
+ 347684788,
+ 347726430,
+ 347727772,
+ 347776035,
+ 347776629,
+ 349500753,
+ 350880161,
+ 350887073,
+ 353384123,
+ 355496998,
+ 355906922,
+ 355979793,
+ 356545959,
+ 358637867,
+ 358905016,
+ 359164318,
+ 359247286,
+ 359350571,
+ 359579447,
+ 365560330,
+ 367399355,
+ 367420285,
+ 367510727,
+ 368013212,
+ 370234760,
+ 370353345,
+ 370710317,
+ 371074566,
+ 371122285,
+ 371194213,
+ 371448425,
+ 371448430,
+ 371545055,
+ 371593469,
+ 371596922,
+ 371758751,
+ 371964792,
+ 372151328,
+ 376550136,
+ 376710172,
+ 376795771,
+ 376826271,
+ 376906556,
+ 380514830,
+ 380774774,
+ 380775037,
+ 381030322,
+ 381136500,
+ 381281631,
+ 381282269,
+ 381285504,
+ 381330595,
+ 381331422,
+ 381335911,
+ 381336484,
+ 383907298,
+ 383917408,
+ 384595009,
+ 384595013,
+ 387799894,
+ 387823201,
+ 392581647,
+ 392584937,
+ 392742684,
+ 392906485,
+ 393003349,
+ 400644707,
+ 400973830,
+ 404428547,
+ 404432113,
+ 404432865,
+ 404469244,
+ 404478897,
+ 404694860,
+ 406887479,
+ 408294949,
+ 408789955,
+ 410022510,
+ 410467324,
+ 410586448,
+ 410945965,
+ 411845275,
+ 414327152,
+ 414327932,
+ 414329781,
+ 414346257,
+ 414346439,
+ 414639928,
+ 414835998,
+ 414894517,
+ 414986533,
+ 417465377,
+ 417465381,
+ 417492216,
+ 418259232,
+ 419310946,
+ 420103495,
+ 420242342,
+ 420380455,
+ 420658662,
+ 420717432,
+ 423183880,
+ 424539259,
+ 425929170,
+ 425972964,
+ 426050649,
+ 426126450,
+ 426142833,
+ 426607922,
+ 437289840,
+ 437347469,
+ 437412335,
+ 437423943,
+ 437455540,
+ 437462252,
+ 437597991,
+ 437617485,
+ 437986305,
+ 437986507,
+ 437986828,
+ 437987072,
+ 438015591,
+ 438034813,
+ 438038966,
+ 438179623,
+ 438347971,
+ 438483573,
+ 438547062,
+ 438895551,
+ 441592676,
+ 442032555,
+ 443548979,
+ 447881379,
+ 447881655,
+ 447881895,
+ 447887844,
+ 448416189,
+ 448445746,
+ 448449012,
+ 450942191,
+ 452816744,
+ 453668677,
+ 454434495,
+ 456610076,
+ 456642844,
+ 456738709,
+ 457544600,
+ 459451897,
+ 459680944,
+ 468058810,
+ 468083581,
+ 470964084,
+ 471470955,
+ 471567278,
+ 472267822,
+ 481177859,
+ 481210627,
+ 481435874,
+ 481455115,
+ 481485378,
+ 481490218,
+ 485105638,
+ 486005878,
+ 486383494,
+ 487988916,
+ 488103783,
+ 490661867,
+ 491574090,
+ 491578272,
+ 492891370,
+ 493041952,
+ 493441205,
+ 493582844,
+ 493716979,
+ 504577572,
+ 504740359,
+ 505091638,
+ 505592418,
+ 505656212,
+ 509516275,
+ 514998531,
+ 515571132,
+ 515594682,
+ 518712698,
+ 521362273,
+ 526592419,
+ 526807354,
+ 527348842,
+ 538294791,
+ 544689535,
+ 545535009,
+ 548544752,
+ 548563346,
+ 548595116,
+ 551679010,
+ 558034099,
+ 560329411,
+ 560356209,
+ 560671018,
+ 560671152,
+ 560692590,
+ 560845442,
+ 569212097,
+ 569474241,
+ 572252718,
+ 575326764,
+ 576174758,
+ 576190819,
+ 582099184,
+ 582099438,
+ 582372519,
+ 582558889,
+ 586552164,
+ 591325418,
+ 594231990,
+ 594243961,
+ 605711268,
+ 615672071,
+ 616086845,
+ 621792370,
+ 624879850,
+ 627432831,
+ 640040548,
+ 654392808,
+ 658675477,
+ 659420283,
+ 672891587,
+ 694768102,
+ 705890982,
+ 725543146,
+ 759097578,
+ 761686526,
+ 795383908,
+ 878105336,
+ 908643300,
+ 945213471,
+ };
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java
new file mode 100644
index 000000000..01d76d700
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.NoLength;
+
+import org.xml.sax.SAXException;
+
+/**
+ * A common superclass for tree builders that coalesce their text nodes.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class CoalescingTreeBuilder<T> extends TreeBuilder<T> {
+
+ protected final void accumulateCharacters(@NoLength char[] buf, int start,
+ int length) throws SAXException {
+ System.arraycopy(buf, start, charBuffer, charBufferLen, length);
+ charBufferLen += length;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendCharacters(java.lang.Object, char[], int, int)
+ */
+ @Override protected final void appendCharacters(T parent, char[] buf, int start,
+ int length) throws SAXException {
+ appendCharacters(parent, new String(buf, start, length));
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object)
+ */
+ @Override protected void appendIsindexPrompt(T parent) throws SAXException {
+ appendCharacters(parent, "This is a searchable index. Enter search keywords: ");
+ }
+
+ protected abstract void appendCharacters(T parent, String text) throws SAXException;
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendComment(java.lang.Object, char[], int, int)
+ */
+ @Override final protected void appendComment(T parent, char[] buf, int start,
+ int length) throws SAXException {
+ appendComment(parent, new String(buf, start, length));
+ }
+
+ protected abstract void appendComment(T parent, String comment) throws SAXException;
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendCommentToDocument(char[], int, int)
+ */
+ @Override protected final void appendCommentToDocument(char[] buf, int start,
+ int length) throws SAXException {
+ // TODO Auto-generated method stub
+ appendCommentToDocument(new String(buf, start, length));
+ }
+
+ protected abstract void appendCommentToDocument(String comment) throws SAXException;
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#insertFosterParentedCharacters(char[], int, int, java.lang.Object, java.lang.Object)
+ */
+ @Override protected final void insertFosterParentedCharacters(char[] buf, int start,
+ int length, T table, T stackParent) throws SAXException {
+ insertFosterParentedCharacters(new String(buf, start, length), table, stackParent);
+ }
+
+ protected abstract void insertFosterParentedCharacters(String text, T table, T stackParent) throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java
new file mode 100644
index 000000000..ee0493318
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java
@@ -0,0 +1,1614 @@
+/*
+ * Copyright (c) 2008-2016 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.util.Arrays;
+
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.annotation.Virtual;
+import nu.validator.htmlparser.common.Interner;
+
+public final class ElementName
+// uncomment when regenerating self
+// implements Comparable<ElementName>
+{
+
+ /**
+ * The mask for extracting the dispatch group.
+ */
+ public static final int GROUP_MASK = 127;
+
+ /**
+ * Indicates that the element is not a pre-interned element. Forbidden
+ * on preinterned elements.
+ */
+ public static final int CUSTOM = (1 << 30);
+
+ /**
+ * Indicates that the element is in the "special" category. This bit
+ * should not be pre-set on MathML or SVG specials--only on HTML specials.
+ */
+ public static final int SPECIAL = (1 << 29);
+
+ /**
+ * The element is foster-parenting. This bit should be pre-set on elements
+ * that are foster-parenting as HTML.
+ */
+ public static final int FOSTER_PARENTING = (1 << 28);
+
+ /**
+ * The element is scoping. This bit should be pre-set on elements
+ * that are scoping as HTML.
+ */
+ public static final int SCOPING = (1 << 27);
+
+ /**
+ * The element is scoping as SVG.
+ */
+ public static final int SCOPING_AS_SVG = (1 << 26);
+
+ /**
+ * The element is scoping as MathML.
+ */
+ public static final int SCOPING_AS_MATHML = (1 << 25);
+
+ /**
+ * The element is an HTML integration point.
+ */
+ public static final int HTML_INTEGRATION_POINT = (1 << 24);
+
+ /**
+ * The element has an optional end tag.
+ */
+ public static final int OPTIONAL_END_TAG = (1 << 23);
+
+ public static final ElementName NULL_ELEMENT_NAME = new ElementName(null);
+
+ public final @Local String name;
+
+ public final @Local String camelCaseName;
+
+ /**
+ * The lowest 7 bits are the dispatch group. The high bits are flags.
+ */
+ public final int flags;
+
+ @Inline public int getFlags() {
+ return flags;
+ }
+
+ public int getGroup() {
+ return flags & GROUP_MASK;
+ }
+
+ public boolean isCustom() {
+ return (flags & CUSTOM) != 0;
+ }
+
+ static ElementName elementNameByBuffer(@NoLength char[] buf, int offset, int length, Interner interner) {
+ int hash = ElementName.bufToHash(buf, length);
+ int index = Arrays.binarySearch(ElementName.ELEMENT_HASHES, hash);
+ if (index < 0) {
+ return new ElementName(Portability.newLocalNameFromBuffer(buf, offset, length, interner));
+ } else {
+ ElementName elementName = ElementName.ELEMENT_NAMES[index];
+ @Local String name = elementName.name;
+ if (!Portability.localEqualsBuffer(name, buf, offset, length)) {
+ return new ElementName(Portability.newLocalNameFromBuffer(buf,
+ offset, length, interner));
+ }
+ return elementName;
+ }
+ }
+
+ /**
+ * This method has to return a unique integer for each well-known
+ * lower-cased element name.
+ *
+ * @param buf
+ * @param len
+ * @return
+ */
+ private static int bufToHash(@NoLength char[] buf, int len) {
+ int hash = len;
+ hash <<= 5;
+ hash += buf[0] - 0x60;
+ int j = len;
+ for (int i = 0; i < 4 && j > 0; i++) {
+ j--;
+ hash <<= 5;
+ hash += buf[j] - 0x60;
+ }
+ return hash;
+ }
+
+ private ElementName(@Local String name, @Local String camelCaseName,
+ int flags) {
+ this.name = name;
+ this.camelCaseName = camelCaseName;
+ this.flags = flags;
+ }
+
+ protected ElementName(@Local String name) {
+ this.name = name;
+ this.camelCaseName = name;
+ this.flags = TreeBuilder.OTHER | CUSTOM;
+ }
+
+ @Virtual void release() {
+ // No-op in Java.
+ // Implement as delete this in subclass.
+ // Be sure to release the local name
+ }
+
+ @SuppressWarnings("unused") @Virtual private void destructor() {
+ }
+
+ @Virtual public ElementName cloneElementName(Interner interner) {
+ return this;
+ }
+
+ // START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate
+
+// /**
+// * @see java.lang.Object#toString()
+// */
+// @Override public String toString() {
+// return "(\"" + name + "\", \"" + camelCaseName + "\", " + decomposedFlags() + ")";
+// }
+//
+// private String decomposedFlags() {
+// StringBuilder buf = new StringBuilder("TreeBuilder.");
+// buf.append(treeBuilderGroupToName());
+// if ((flags & SPECIAL) != 0) {
+// buf.append(" | SPECIAL");
+// }
+// if ((flags & FOSTER_PARENTING) != 0) {
+// buf.append(" | FOSTER_PARENTING");
+// }
+// if ((flags & SCOPING) != 0) {
+// buf.append(" | SCOPING");
+// }
+// if ((flags & SCOPING_AS_MATHML) != 0) {
+// buf.append(" | SCOPING_AS_MATHML");
+// }
+// if ((flags & SCOPING_AS_SVG) != 0) {
+// buf.append(" | SCOPING_AS_SVG");
+// }
+// if ((flags & OPTIONAL_END_TAG) != 0) {
+// buf.append(" | OPTIONAL_END_TAG");
+// }
+// return buf.toString();
+// }
+//
+// private String constName() {
+// char[] buf = new char[name.length()];
+// for (int i = 0; i < name.length(); i++) {
+// char c = name.charAt(i);
+// if (c == '-') {
+// buf[i] = '_';
+// } else if (c >= '0' && c <= '9') {
+// buf[i] = c;
+// } else {
+// buf[i] = (char) (c - 0x20);
+// }
+// }
+// return new String(buf);
+// }
+//
+// private int hash() {
+// return bufToHash(name.toCharArray(), name.length());
+// }
+//
+// public int compareTo(ElementName other) {
+// int thisHash = this.hash();
+// int otherHash = other.hash();
+// if (thisHash < otherHash) {
+// return -1;
+// } else if (thisHash == otherHash) {
+// return 0;
+// } else {
+// return 1;
+// }
+// }
+//
+// private String treeBuilderGroupToName() {
+// switch (getGroup()) {
+// case TreeBuilder.OTHER:
+// return "OTHER";
+// case TreeBuilder.A:
+// return "A";
+// case TreeBuilder.BASE:
+// return "BASE";
+// case TreeBuilder.BODY:
+// return "BODY";
+// case TreeBuilder.BR:
+// return "BR";
+// case TreeBuilder.BUTTON:
+// return "BUTTON";
+// case TreeBuilder.CAPTION:
+// return "CAPTION";
+// case TreeBuilder.COL:
+// return "COL";
+// case TreeBuilder.COLGROUP:
+// return "COLGROUP";
+// case TreeBuilder.FONT:
+// return "FONT";
+// case TreeBuilder.FORM:
+// return "FORM";
+// case TreeBuilder.FRAME:
+// return "FRAME";
+// case TreeBuilder.FRAMESET:
+// return "FRAMESET";
+// case TreeBuilder.IMAGE:
+// return "IMAGE";
+// case TreeBuilder.INPUT:
+// return "INPUT";
+// case TreeBuilder.ISINDEX:
+// return "ISINDEX";
+// case TreeBuilder.LI:
+// return "LI";
+// case TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND:
+// return "LINK_OR_BASEFONT_OR_BGSOUND";
+// case TreeBuilder.MATH:
+// return "MATH";
+// case TreeBuilder.META:
+// return "META";
+// case TreeBuilder.SVG:
+// return "SVG";
+// case TreeBuilder.HEAD:
+// return "HEAD";
+// case TreeBuilder.HR:
+// return "HR";
+// case TreeBuilder.HTML:
+// return "HTML";
+// case TreeBuilder.KEYGEN:
+// return "KEYGEN";
+// case TreeBuilder.NOBR:
+// return "NOBR";
+// case TreeBuilder.NOFRAMES:
+// return "NOFRAMES";
+// case TreeBuilder.NOSCRIPT:
+// return "NOSCRIPT";
+// case TreeBuilder.OPTGROUP:
+// return "OPTGROUP";
+// case TreeBuilder.OPTION:
+// return "OPTION";
+// case TreeBuilder.P:
+// return "P";
+// case TreeBuilder.PLAINTEXT:
+// return "PLAINTEXT";
+// case TreeBuilder.SCRIPT:
+// return "SCRIPT";
+// case TreeBuilder.SELECT:
+// return "SELECT";
+// case TreeBuilder.STYLE:
+// return "STYLE";
+// case TreeBuilder.TABLE:
+// return "TABLE";
+// case TreeBuilder.TEXTAREA:
+// return "TEXTAREA";
+// case TreeBuilder.TITLE:
+// return "TITLE";
+// case TreeBuilder.TEMPLATE:
+// return "TEMPLATE";
+// case TreeBuilder.TR:
+// return "TR";
+// case TreeBuilder.XMP:
+// return "XMP";
+// case TreeBuilder.TBODY_OR_THEAD_OR_TFOOT:
+// return "TBODY_OR_THEAD_OR_TFOOT";
+// case TreeBuilder.TD_OR_TH:
+// return "TD_OR_TH";
+// case TreeBuilder.DD_OR_DT:
+// return "DD_OR_DT";
+// case TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+// return "H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6";
+// case TreeBuilder.OBJECT:
+// return "OBJECT";
+// case TreeBuilder.OUTPUT:
+// return "OUTPUT";
+// case TreeBuilder.MARQUEE_OR_APPLET:
+// return "MARQUEE_OR_APPLET";
+// case TreeBuilder.PRE_OR_LISTING:
+// return "PRE_OR_LISTING";
+// case TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+// return "B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U";
+// case TreeBuilder.UL_OR_OL_OR_DL:
+// return "UL_OR_OL_OR_DL";
+// case TreeBuilder.IFRAME:
+// return "IFRAME";
+// case TreeBuilder.NOEMBED:
+// return "NOEMBED";
+// case TreeBuilder.EMBED:
+// return "EMBED";
+// case TreeBuilder.IMG:
+// return "IMG";
+// case TreeBuilder.AREA_OR_WBR:
+// return "AREA_OR_WBR";
+// case TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+// return "DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU";
+// case TreeBuilder.FIELDSET:
+// return "FIELDSET";
+// case TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
+// return "ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY";
+// case TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR:
+// return "RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR";
+// case TreeBuilder.RB_OR_RTC:
+// return "RB_OR_RTC";
+// case TreeBuilder.RT_OR_RP:
+// return "RT_OR_RP";
+// case TreeBuilder.PARAM_OR_SOURCE_OR_TRACK:
+// return "PARAM_OR_SOURCE_OR_TRACK";
+// case TreeBuilder.MGLYPH_OR_MALIGNMARK:
+// return "MGLYPH_OR_MALIGNMARK";
+// case TreeBuilder.MI_MO_MN_MS_MTEXT:
+// return "MI_MO_MN_MS_MTEXT";
+// case TreeBuilder.ANNOTATION_XML:
+// return "ANNOTATION_XML";
+// case TreeBuilder.FOREIGNOBJECT_OR_DESC:
+// return "FOREIGNOBJECT_OR_DESC";
+// case TreeBuilder.MENUITEM:
+// return "MENUITEM";
+// }
+// return null;
+// }
+//
+// /**
+// * Regenerate self
+// *
+// * @param args
+// */
+// public static void main(String[] args) {
+// Arrays.sort(ELEMENT_NAMES);
+// for (int i = 1; i < ELEMENT_NAMES.length; i++) {
+// if (ELEMENT_NAMES[i].hash() == ELEMENT_NAMES[i - 1].hash()) {
+// System.err.println("Hash collision: " + ELEMENT_NAMES[i].name
+// + ", " + ELEMENT_NAMES[i - 1].name);
+// return;
+// }
+// }
+// for (int i = 0; i < ELEMENT_NAMES.length; i++) {
+// ElementName el = ELEMENT_NAMES[i];
+// System.out.println("public static final ElementName "
+// + el.constName() + " = new ElementName" + el.toString()
+// + ";");
+// }
+// System.out.println("private final static @NoLength ElementName[] ELEMENT_NAMES = {");
+// for (int i = 0; i < ELEMENT_NAMES.length; i++) {
+// ElementName el = ELEMENT_NAMES[i];
+// System.out.println(el.constName() + ",");
+// }
+// System.out.println("};");
+// System.out.println("private final static int[] ELEMENT_HASHES = {");
+// for (int i = 0; i < ELEMENT_NAMES.length; i++) {
+// ElementName el = ELEMENT_NAMES[i];
+// System.out.println(Integer.toString(el.hash()) + ",");
+// }
+// System.out.println("};");
+// }
+
+ // START GENERATED CODE
+ public static final ElementName A = new ElementName("a", "a", TreeBuilder.A);
+ public static final ElementName B = new ElementName("b", "b", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName G = new ElementName("g", "g", TreeBuilder.OTHER);
+ public static final ElementName I = new ElementName("i", "i", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName P = new ElementName("p", "p", TreeBuilder.P | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName Q = new ElementName("q", "q", TreeBuilder.OTHER);
+ public static final ElementName S = new ElementName("s", "s", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName U = new ElementName("u", "u", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName BR = new ElementName("br", "br", TreeBuilder.BR | SPECIAL);
+ public static final ElementName CI = new ElementName("ci", "ci", TreeBuilder.OTHER);
+ public static final ElementName CN = new ElementName("cn", "cn", TreeBuilder.OTHER);
+ public static final ElementName DD = new ElementName("dd", "dd", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName DL = new ElementName("dl", "dl", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL);
+ public static final ElementName DT = new ElementName("dt", "dt", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName EM = new ElementName("em", "em", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName EQ = new ElementName("eq", "eq", TreeBuilder.OTHER);
+ public static final ElementName FN = new ElementName("fn", "fn", TreeBuilder.OTHER);
+ public static final ElementName H1 = new ElementName("h1", "h1", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H2 = new ElementName("h2", "h2", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H3 = new ElementName("h3", "h3", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H4 = new ElementName("h4", "h4", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H5 = new ElementName("h5", "h5", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H6 = new ElementName("h6", "h6", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName GT = new ElementName("gt", "gt", TreeBuilder.OTHER);
+ public static final ElementName HR = new ElementName("hr", "hr", TreeBuilder.HR | SPECIAL);
+ public static final ElementName IN = new ElementName("in", "in", TreeBuilder.OTHER);
+ public static final ElementName LI = new ElementName("li", "li", TreeBuilder.LI | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName LN = new ElementName("ln", "ln", TreeBuilder.OTHER);
+ public static final ElementName LT = new ElementName("lt", "lt", TreeBuilder.OTHER);
+ public static final ElementName MI = new ElementName("mi", "mi", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName MN = new ElementName("mn", "mn", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName MO = new ElementName("mo", "mo", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName MS = new ElementName("ms", "ms", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName OL = new ElementName("ol", "ol", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL);
+ public static final ElementName OR = new ElementName("or", "or", TreeBuilder.OTHER);
+ public static final ElementName PI = new ElementName("pi", "pi", TreeBuilder.OTHER);
+ public static final ElementName RB = new ElementName("rb", "rb", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG);
+ public static final ElementName RP = new ElementName("rp", "rp", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG);
+ public static final ElementName RT = new ElementName("rt", "rt", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG);
+ public static final ElementName TD = new ElementName("td", "td", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG);
+ public static final ElementName TH = new ElementName("th", "th", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG);
+ public static final ElementName TR = new ElementName("tr", "tr", TreeBuilder.TR | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName TT = new ElementName("tt", "tt", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName UL = new ElementName("ul", "ul", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL);
+ public static final ElementName AND = new ElementName("and", "and", TreeBuilder.OTHER);
+ public static final ElementName ARG = new ElementName("arg", "arg", TreeBuilder.OTHER);
+ public static final ElementName ABS = new ElementName("abs", "abs", TreeBuilder.OTHER);
+ public static final ElementName BIG = new ElementName("big", "big", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName BDO = new ElementName("bdo", "bdo", TreeBuilder.OTHER);
+ public static final ElementName CSC = new ElementName("csc", "csc", TreeBuilder.OTHER);
+ public static final ElementName COL = new ElementName("col", "col", TreeBuilder.COL | SPECIAL);
+ public static final ElementName COS = new ElementName("cos", "cos", TreeBuilder.OTHER);
+ public static final ElementName COT = new ElementName("cot", "cot", TreeBuilder.OTHER);
+ public static final ElementName DEL = new ElementName("del", "del", TreeBuilder.OTHER);
+ public static final ElementName DFN = new ElementName("dfn", "dfn", TreeBuilder.OTHER);
+ public static final ElementName DIR = new ElementName("dir", "dir", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName DIV = new ElementName("div", "div", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName EXP = new ElementName("exp", "exp", TreeBuilder.OTHER);
+ public static final ElementName GCD = new ElementName("gcd", "gcd", TreeBuilder.OTHER);
+ public static final ElementName GEQ = new ElementName("geq", "geq", TreeBuilder.OTHER);
+ public static final ElementName IMG = new ElementName("img", "img", TreeBuilder.IMG | SPECIAL);
+ public static final ElementName INS = new ElementName("ins", "ins", TreeBuilder.OTHER);
+ public static final ElementName INT = new ElementName("int", "int", TreeBuilder.OTHER);
+ public static final ElementName KBD = new ElementName("kbd", "kbd", TreeBuilder.OTHER);
+ public static final ElementName LOG = new ElementName("log", "log", TreeBuilder.OTHER);
+ public static final ElementName LCM = new ElementName("lcm", "lcm", TreeBuilder.OTHER);
+ public static final ElementName LEQ = new ElementName("leq", "leq", TreeBuilder.OTHER);
+ public static final ElementName MTD = new ElementName("mtd", "mtd", TreeBuilder.OTHER);
+ public static final ElementName MIN = new ElementName("min", "min", TreeBuilder.OTHER);
+ public static final ElementName MAP = new ElementName("map", "map", TreeBuilder.OTHER);
+ public static final ElementName MTR = new ElementName("mtr", "mtr", TreeBuilder.OTHER);
+ public static final ElementName MAX = new ElementName("max", "max", TreeBuilder.OTHER);
+ public static final ElementName NEQ = new ElementName("neq", "neq", TreeBuilder.OTHER);
+ public static final ElementName NOT = new ElementName("not", "not", TreeBuilder.OTHER);
+ public static final ElementName NAV = new ElementName("nav", "nav", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName PRE = new ElementName("pre", "pre", TreeBuilder.PRE_OR_LISTING | SPECIAL);
+ public static final ElementName RTC = new ElementName("rtc", "rtc", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG);
+ public static final ElementName REM = new ElementName("rem", "rem", TreeBuilder.OTHER);
+ public static final ElementName SUB = new ElementName("sub", "sub", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SEC = new ElementName("sec", "sec", TreeBuilder.OTHER);
+ public static final ElementName SVG = new ElementName("svg", "svg", TreeBuilder.SVG);
+ public static final ElementName SUM = new ElementName("sum", "sum", TreeBuilder.OTHER);
+ public static final ElementName SIN = new ElementName("sin", "sin", TreeBuilder.OTHER);
+ public static final ElementName SEP = new ElementName("sep", "sep", TreeBuilder.OTHER);
+ public static final ElementName SUP = new ElementName("sup", "sup", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SET = new ElementName("set", "set", TreeBuilder.OTHER);
+ public static final ElementName TAN = new ElementName("tan", "tan", TreeBuilder.OTHER);
+ public static final ElementName USE = new ElementName("use", "use", TreeBuilder.OTHER);
+ public static final ElementName VAR = new ElementName("var", "var", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName WBR = new ElementName("wbr", "wbr", TreeBuilder.AREA_OR_WBR | SPECIAL);
+ public static final ElementName XMP = new ElementName("xmp", "xmp", TreeBuilder.XMP | SPECIAL);
+ public static final ElementName XOR = new ElementName("xor", "xor", TreeBuilder.OTHER);
+ public static final ElementName AREA = new ElementName("area", "area", TreeBuilder.AREA_OR_WBR | SPECIAL);
+ public static final ElementName ABBR = new ElementName("abbr", "abbr", TreeBuilder.OTHER);
+ public static final ElementName BASE = new ElementName("base", "base", TreeBuilder.BASE | SPECIAL);
+ public static final ElementName BVAR = new ElementName("bvar", "bvar", TreeBuilder.OTHER);
+ public static final ElementName BODY = new ElementName("body", "body", TreeBuilder.BODY | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName CARD = new ElementName("card", "card", TreeBuilder.OTHER);
+ public static final ElementName CODE = new ElementName("code", "code", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName CITE = new ElementName("cite", "cite", TreeBuilder.OTHER);
+ public static final ElementName CSCH = new ElementName("csch", "csch", TreeBuilder.OTHER);
+ public static final ElementName COSH = new ElementName("cosh", "cosh", TreeBuilder.OTHER);
+ public static final ElementName COTH = new ElementName("coth", "coth", TreeBuilder.OTHER);
+ public static final ElementName CURL = new ElementName("curl", "curl", TreeBuilder.OTHER);
+ public static final ElementName DESC = new ElementName("desc", "desc", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG);
+ public static final ElementName DIFF = new ElementName("diff", "diff", TreeBuilder.OTHER);
+ public static final ElementName DEFS = new ElementName("defs", "defs", TreeBuilder.OTHER);
+ public static final ElementName FORM = new ElementName("form", "form", TreeBuilder.FORM | SPECIAL);
+ public static final ElementName FONT = new ElementName("font", "font", TreeBuilder.FONT);
+ public static final ElementName GRAD = new ElementName("grad", "grad", TreeBuilder.OTHER);
+ public static final ElementName HEAD = new ElementName("head", "head", TreeBuilder.HEAD | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName HTML = new ElementName("html", "html", TreeBuilder.HTML | SPECIAL | SCOPING | OPTIONAL_END_TAG);
+ public static final ElementName LINE = new ElementName("line", "line", TreeBuilder.OTHER);
+ public static final ElementName LINK = new ElementName("link", "link", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
+ public static final ElementName LIST = new ElementName("list", "list", TreeBuilder.OTHER);
+ public static final ElementName META = new ElementName("meta", "meta", TreeBuilder.META | SPECIAL);
+ public static final ElementName MSUB = new ElementName("msub", "msub", TreeBuilder.OTHER);
+ public static final ElementName MODE = new ElementName("mode", "mode", TreeBuilder.OTHER);
+ public static final ElementName MATH = new ElementName("math", "math", TreeBuilder.MATH);
+ public static final ElementName MARK = new ElementName("mark", "mark", TreeBuilder.OTHER);
+ public static final ElementName MASK = new ElementName("mask", "mask", TreeBuilder.OTHER);
+ public static final ElementName MEAN = new ElementName("mean", "mean", TreeBuilder.OTHER);
+ public static final ElementName MAIN = new ElementName("main", "main", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName MSUP = new ElementName("msup", "msup", TreeBuilder.OTHER);
+ public static final ElementName MENU = new ElementName("menu", "menu", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName MROW = new ElementName("mrow", "mrow", TreeBuilder.OTHER);
+ public static final ElementName NONE = new ElementName("none", "none", TreeBuilder.OTHER);
+ public static final ElementName NOBR = new ElementName("nobr", "nobr", TreeBuilder.NOBR);
+ public static final ElementName NEST = new ElementName("nest", "nest", TreeBuilder.OTHER);
+ public static final ElementName PATH = new ElementName("path", "path", TreeBuilder.OTHER);
+ public static final ElementName PLUS = new ElementName("plus", "plus", TreeBuilder.OTHER);
+ public static final ElementName RULE = new ElementName("rule", "rule", TreeBuilder.OTHER);
+ public static final ElementName REAL = new ElementName("real", "real", TreeBuilder.OTHER);
+ public static final ElementName RELN = new ElementName("reln", "reln", TreeBuilder.OTHER);
+ public static final ElementName RECT = new ElementName("rect", "rect", TreeBuilder.OTHER);
+ public static final ElementName ROOT = new ElementName("root", "root", TreeBuilder.OTHER);
+ public static final ElementName RUBY = new ElementName("ruby", "ruby", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SECH = new ElementName("sech", "sech", TreeBuilder.OTHER);
+ public static final ElementName SINH = new ElementName("sinh", "sinh", TreeBuilder.OTHER);
+ public static final ElementName SPAN = new ElementName("span", "span", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SAMP = new ElementName("samp", "samp", TreeBuilder.OTHER);
+ public static final ElementName STOP = new ElementName("stop", "stop", TreeBuilder.OTHER);
+ public static final ElementName SDEV = new ElementName("sdev", "sdev", TreeBuilder.OTHER);
+ public static final ElementName TIME = new ElementName("time", "time", TreeBuilder.OTHER);
+ public static final ElementName TRUE = new ElementName("true", "true", TreeBuilder.OTHER);
+ public static final ElementName TREF = new ElementName("tref", "tref", TreeBuilder.OTHER);
+ public static final ElementName TANH = new ElementName("tanh", "tanh", TreeBuilder.OTHER);
+ public static final ElementName TEXT = new ElementName("text", "text", TreeBuilder.OTHER);
+ public static final ElementName VIEW = new ElementName("view", "view", TreeBuilder.OTHER);
+ public static final ElementName ASIDE = new ElementName("aside", "aside", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName AUDIO = new ElementName("audio", "audio", TreeBuilder.OTHER);
+ public static final ElementName APPLY = new ElementName("apply", "apply", TreeBuilder.OTHER);
+ public static final ElementName EMBED = new ElementName("embed", "embed", TreeBuilder.EMBED | SPECIAL);
+ public static final ElementName FRAME = new ElementName("frame", "frame", TreeBuilder.FRAME | SPECIAL);
+ public static final ElementName FALSE = new ElementName("false", "false", TreeBuilder.OTHER);
+ public static final ElementName FLOOR = new ElementName("floor", "floor", TreeBuilder.OTHER);
+ public static final ElementName GLYPH = new ElementName("glyph", "glyph", TreeBuilder.OTHER);
+ public static final ElementName HKERN = new ElementName("hkern", "hkern", TreeBuilder.OTHER);
+ public static final ElementName IMAGE = new ElementName("image", "image", TreeBuilder.IMAGE);
+ public static final ElementName IDENT = new ElementName("ident", "ident", TreeBuilder.OTHER);
+ public static final ElementName INPUT = new ElementName("input", "input", TreeBuilder.INPUT | SPECIAL);
+ public static final ElementName LABEL = new ElementName("label", "label", TreeBuilder.OTHER);
+ public static final ElementName LIMIT = new ElementName("limit", "limit", TreeBuilder.OTHER);
+ public static final ElementName MFRAC = new ElementName("mfrac", "mfrac", TreeBuilder.OTHER);
+ public static final ElementName MPATH = new ElementName("mpath", "mpath", TreeBuilder.OTHER);
+ public static final ElementName METER = new ElementName("meter", "meter", TreeBuilder.OTHER);
+ public static final ElementName MOVER = new ElementName("mover", "mover", TreeBuilder.OTHER);
+ public static final ElementName MINUS = new ElementName("minus", "minus", TreeBuilder.OTHER);
+ public static final ElementName MROOT = new ElementName("mroot", "mroot", TreeBuilder.OTHER);
+ public static final ElementName MSQRT = new ElementName("msqrt", "msqrt", TreeBuilder.OTHER);
+ public static final ElementName MTEXT = new ElementName("mtext", "mtext", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName NOTIN = new ElementName("notin", "notin", TreeBuilder.OTHER);
+ public static final ElementName PIECE = new ElementName("piece", "piece", TreeBuilder.OTHER);
+ public static final ElementName PARAM = new ElementName("param", "param", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL);
+ public static final ElementName POWER = new ElementName("power", "power", TreeBuilder.OTHER);
+ public static final ElementName REALS = new ElementName("reals", "reals", TreeBuilder.OTHER);
+ public static final ElementName STYLE = new ElementName("style", "style", TreeBuilder.STYLE | SPECIAL);
+ public static final ElementName SMALL = new ElementName("small", "small", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName THEAD = new ElementName("thead", "thead", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName TABLE = new ElementName("table", "table", TreeBuilder.TABLE | SPECIAL | FOSTER_PARENTING | SCOPING);
+ public static final ElementName TITLE = new ElementName("title", "title", TreeBuilder.TITLE | SPECIAL | SCOPING_AS_SVG);
+ public static final ElementName TRACK = new ElementName("track", "track", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL);
+ public static final ElementName TSPAN = new ElementName("tspan", "tspan", TreeBuilder.OTHER);
+ public static final ElementName TIMES = new ElementName("times", "times", TreeBuilder.OTHER);
+ public static final ElementName TFOOT = new ElementName("tfoot", "tfoot", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName TBODY = new ElementName("tbody", "tbody", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName UNION = new ElementName("union", "union", TreeBuilder.OTHER);
+ public static final ElementName VKERN = new ElementName("vkern", "vkern", TreeBuilder.OTHER);
+ public static final ElementName VIDEO = new ElementName("video", "video", TreeBuilder.OTHER);
+ public static final ElementName ARCSEC = new ElementName("arcsec", "arcsec", TreeBuilder.OTHER);
+ public static final ElementName ARCCSC = new ElementName("arccsc", "arccsc", TreeBuilder.OTHER);
+ public static final ElementName ARCTAN = new ElementName("arctan", "arctan", TreeBuilder.OTHER);
+ public static final ElementName ARCSIN = new ElementName("arcsin", "arcsin", TreeBuilder.OTHER);
+ public static final ElementName ARCCOS = new ElementName("arccos", "arccos", TreeBuilder.OTHER);
+ public static final ElementName APPLET = new ElementName("applet", "applet", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING);
+ public static final ElementName ARCCOT = new ElementName("arccot", "arccot", TreeBuilder.OTHER);
+ public static final ElementName APPROX = new ElementName("approx", "approx", TreeBuilder.OTHER);
+ public static final ElementName BUTTON = new ElementName("button", "button", TreeBuilder.BUTTON | SPECIAL);
+ public static final ElementName CIRCLE = new ElementName("circle", "circle", TreeBuilder.OTHER);
+ public static final ElementName CENTER = new ElementName("center", "center", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName CURSOR = new ElementName("cursor", "cursor", TreeBuilder.OTHER);
+ public static final ElementName CANVAS = new ElementName("canvas", "canvas", TreeBuilder.OTHER);
+ public static final ElementName DIVIDE = new ElementName("divide", "divide", TreeBuilder.OTHER);
+ public static final ElementName DEGREE = new ElementName("degree", "degree", TreeBuilder.OTHER);
+ public static final ElementName DIALOG = new ElementName("dialog", "dialog", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName DOMAIN = new ElementName("domain", "domain", TreeBuilder.OTHER);
+ public static final ElementName EXISTS = new ElementName("exists", "exists", TreeBuilder.OTHER);
+ public static final ElementName FETILE = new ElementName("fetile", "feTile", TreeBuilder.OTHER);
+ public static final ElementName FIGURE = new ElementName("figure", "figure", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName FORALL = new ElementName("forall", "forall", TreeBuilder.OTHER);
+ public static final ElementName FILTER = new ElementName("filter", "filter", TreeBuilder.OTHER);
+ public static final ElementName FOOTER = new ElementName("footer", "footer", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName HGROUP = new ElementName("hgroup", "hgroup", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName HEADER = new ElementName("header", "header", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName IFRAME = new ElementName("iframe", "iframe", TreeBuilder.IFRAME | SPECIAL);
+ public static final ElementName KEYGEN = new ElementName("keygen", "keygen", TreeBuilder.KEYGEN);
+ public static final ElementName LAMBDA = new ElementName("lambda", "lambda", TreeBuilder.OTHER);
+ public static final ElementName LEGEND = new ElementName("legend", "legend", TreeBuilder.OTHER);
+ public static final ElementName MSPACE = new ElementName("mspace", "mspace", TreeBuilder.OTHER);
+ public static final ElementName MTABLE = new ElementName("mtable", "mtable", TreeBuilder.OTHER);
+ public static final ElementName MSTYLE = new ElementName("mstyle", "mstyle", TreeBuilder.OTHER);
+ public static final ElementName MGLYPH = new ElementName("mglyph", "mglyph", TreeBuilder.MGLYPH_OR_MALIGNMARK);
+ public static final ElementName MEDIAN = new ElementName("median", "median", TreeBuilder.OTHER);
+ public static final ElementName MUNDER = new ElementName("munder", "munder", TreeBuilder.OTHER);
+ public static final ElementName MARKER = new ElementName("marker", "marker", TreeBuilder.OTHER);
+ public static final ElementName MERROR = new ElementName("merror", "merror", TreeBuilder.OTHER);
+ public static final ElementName MOMENT = new ElementName("moment", "moment", TreeBuilder.OTHER);
+ public static final ElementName MATRIX = new ElementName("matrix", "matrix", TreeBuilder.OTHER);
+ public static final ElementName OPTION = new ElementName("option", "option", TreeBuilder.OPTION | OPTIONAL_END_TAG);
+ public static final ElementName OBJECT = new ElementName("object", "object", TreeBuilder.OBJECT | SPECIAL | SCOPING);
+ public static final ElementName OUTPUT = new ElementName("output", "output", TreeBuilder.OUTPUT);
+ public static final ElementName PRIMES = new ElementName("primes", "primes", TreeBuilder.OTHER);
+ public static final ElementName SOURCE = new ElementName("source", "source", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK);
+ public static final ElementName STRIKE = new ElementName("strike", "strike", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName STRONG = new ElementName("strong", "strong", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName SWITCH = new ElementName("switch", "switch", TreeBuilder.OTHER);
+ public static final ElementName SYMBOL = new ElementName("symbol", "symbol", TreeBuilder.OTHER);
+ public static final ElementName SELECT = new ElementName("select", "select", TreeBuilder.SELECT | SPECIAL);
+ public static final ElementName SUBSET = new ElementName("subset", "subset", TreeBuilder.OTHER);
+ public static final ElementName SCRIPT = new ElementName("script", "script", TreeBuilder.SCRIPT | SPECIAL);
+ public static final ElementName TBREAK = new ElementName("tbreak", "tbreak", TreeBuilder.OTHER);
+ public static final ElementName VECTOR = new ElementName("vector", "vector", TreeBuilder.OTHER);
+ public static final ElementName ARTICLE = new ElementName("article", "article", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName ANIMATE = new ElementName("animate", "animate", TreeBuilder.OTHER);
+ public static final ElementName ARCSECH = new ElementName("arcsech", "arcsech", TreeBuilder.OTHER);
+ public static final ElementName ARCCSCH = new ElementName("arccsch", "arccsch", TreeBuilder.OTHER);
+ public static final ElementName ARCTANH = new ElementName("arctanh", "arctanh", TreeBuilder.OTHER);
+ public static final ElementName ARCSINH = new ElementName("arcsinh", "arcsinh", TreeBuilder.OTHER);
+ public static final ElementName ARCCOSH = new ElementName("arccosh", "arccosh", TreeBuilder.OTHER);
+ public static final ElementName ARCCOTH = new ElementName("arccoth", "arccoth", TreeBuilder.OTHER);
+ public static final ElementName ACRONYM = new ElementName("acronym", "acronym", TreeBuilder.OTHER);
+ public static final ElementName ADDRESS = new ElementName("address", "address", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName BGSOUND = new ElementName("bgsound", "bgsound", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
+ public static final ElementName COMPOSE = new ElementName("compose", "compose", TreeBuilder.OTHER);
+ public static final ElementName CEILING = new ElementName("ceiling", "ceiling", TreeBuilder.OTHER);
+ public static final ElementName CSYMBOL = new ElementName("csymbol", "csymbol", TreeBuilder.OTHER);
+ public static final ElementName CAPTION = new ElementName("caption", "caption", TreeBuilder.CAPTION | SPECIAL | SCOPING);
+ public static final ElementName DISCARD = new ElementName("discard", "discard", TreeBuilder.OTHER);
+ public static final ElementName DECLARE = new ElementName("declare", "declare", TreeBuilder.OTHER);
+ public static final ElementName DETAILS = new ElementName("details", "details", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName ELLIPSE = new ElementName("ellipse", "ellipse", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCA = new ElementName("fefunca", "feFuncA", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCB = new ElementName("fefuncb", "feFuncB", TreeBuilder.OTHER);
+ public static final ElementName FEBLEND = new ElementName("feblend", "feBlend", TreeBuilder.OTHER);
+ public static final ElementName FEFLOOD = new ElementName("feflood", "feFlood", TreeBuilder.OTHER);
+ public static final ElementName FEIMAGE = new ElementName("feimage", "feImage", TreeBuilder.OTHER);
+ public static final ElementName FEMERGE = new ElementName("femerge", "feMerge", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCG = new ElementName("fefuncg", "feFuncG", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCR = new ElementName("fefuncr", "feFuncR", TreeBuilder.OTHER);
+ public static final ElementName HANDLER = new ElementName("handler", "handler", TreeBuilder.OTHER);
+ public static final ElementName INVERSE = new ElementName("inverse", "inverse", TreeBuilder.OTHER);
+ public static final ElementName IMPLIES = new ElementName("implies", "implies", TreeBuilder.OTHER);
+ public static final ElementName ISINDEX = new ElementName("isindex", "isindex", TreeBuilder.ISINDEX | SPECIAL);
+ public static final ElementName LOGBASE = new ElementName("logbase", "logbase", TreeBuilder.OTHER);
+ public static final ElementName LISTING = new ElementName("listing", "listing", TreeBuilder.PRE_OR_LISTING | SPECIAL);
+ public static final ElementName MFENCED = new ElementName("mfenced", "mfenced", TreeBuilder.OTHER);
+ public static final ElementName MPADDED = new ElementName("mpadded", "mpadded", TreeBuilder.OTHER);
+ public static final ElementName MARQUEE = new ElementName("marquee", "marquee", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING);
+ public static final ElementName MACTION = new ElementName("maction", "maction", TreeBuilder.OTHER);
+ public static final ElementName MSUBSUP = new ElementName("msubsup", "msubsup", TreeBuilder.OTHER);
+ public static final ElementName NOEMBED = new ElementName("noembed", "noembed", TreeBuilder.NOEMBED | SPECIAL);
+ public static final ElementName PICTURE = new ElementName("picture", "picture", TreeBuilder.OTHER);
+ public static final ElementName POLYGON = new ElementName("polygon", "polygon", TreeBuilder.OTHER);
+ public static final ElementName PATTERN = new ElementName("pattern", "pattern", TreeBuilder.OTHER);
+ public static final ElementName PRODUCT = new ElementName("product", "product", TreeBuilder.OTHER);
+ public static final ElementName SETDIFF = new ElementName("setdiff", "setdiff", TreeBuilder.OTHER);
+ public static final ElementName SECTION = new ElementName("section", "section", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName SUMMARY = new ElementName("summary", "summary", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName TENDSTO = new ElementName("tendsto", "tendsto", TreeBuilder.OTHER);
+ public static final ElementName UPLIMIT = new ElementName("uplimit", "uplimit", TreeBuilder.OTHER);
+ public static final ElementName ALTGLYPH = new ElementName("altglyph", "altGlyph", TreeBuilder.OTHER);
+ public static final ElementName BASEFONT = new ElementName("basefont", "basefont", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
+ public static final ElementName CLIPPATH = new ElementName("clippath", "clipPath", TreeBuilder.OTHER);
+ public static final ElementName CODOMAIN = new ElementName("codomain", "codomain", TreeBuilder.OTHER);
+ public static final ElementName COLGROUP = new ElementName("colgroup", "colgroup", TreeBuilder.COLGROUP | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName EMPTYSET = new ElementName("emptyset", "emptyset", TreeBuilder.OTHER);
+ public static final ElementName FACTOROF = new ElementName("factorof", "factorof", TreeBuilder.OTHER);
+ public static final ElementName FIELDSET = new ElementName("fieldset", "fieldset", TreeBuilder.FIELDSET | SPECIAL);
+ public static final ElementName FRAMESET = new ElementName("frameset", "frameset", TreeBuilder.FRAMESET | SPECIAL);
+ public static final ElementName FEOFFSET = new ElementName("feoffset", "feOffset", TreeBuilder.OTHER);
+ public static final ElementName GLYPHREF = new ElementName("glyphref", "glyphRef", TreeBuilder.OTHER);
+ public static final ElementName INTERVAL = new ElementName("interval", "interval", TreeBuilder.OTHER);
+ public static final ElementName INTEGERS = new ElementName("integers", "integers", TreeBuilder.OTHER);
+ public static final ElementName INFINITY = new ElementName("infinity", "infinity", TreeBuilder.OTHER);
+ public static final ElementName LISTENER = new ElementName("listener", "listener", TreeBuilder.OTHER);
+ public static final ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", TreeBuilder.OTHER);
+ public static final ElementName METADATA = new ElementName("metadata", "metadata", TreeBuilder.OTHER);
+ public static final ElementName MENCLOSE = new ElementName("menclose", "menclose", TreeBuilder.OTHER);
+ public static final ElementName MENUITEM = new ElementName("menuitem", "menuitem", TreeBuilder.MENUITEM);
+ public static final ElementName MPHANTOM = new ElementName("mphantom", "mphantom", TreeBuilder.OTHER);
+ public static final ElementName NOFRAMES = new ElementName("noframes", "noframes", TreeBuilder.NOFRAMES | SPECIAL);
+ public static final ElementName NOSCRIPT = new ElementName("noscript", "noscript", TreeBuilder.NOSCRIPT | SPECIAL);
+ public static final ElementName OPTGROUP = new ElementName("optgroup", "optgroup", TreeBuilder.OPTGROUP | OPTIONAL_END_TAG);
+ public static final ElementName POLYLINE = new ElementName("polyline", "polyline", TreeBuilder.OTHER);
+ public static final ElementName PREFETCH = new ElementName("prefetch", "prefetch", TreeBuilder.OTHER);
+ public static final ElementName PROGRESS = new ElementName("progress", "progress", TreeBuilder.OTHER);
+ public static final ElementName PRSUBSET = new ElementName("prsubset", "prsubset", TreeBuilder.OTHER);
+ public static final ElementName QUOTIENT = new ElementName("quotient", "quotient", TreeBuilder.OTHER);
+ public static final ElementName SELECTOR = new ElementName("selector", "selector", TreeBuilder.OTHER);
+ public static final ElementName TEXTAREA = new ElementName("textarea", "textarea", TreeBuilder.TEXTAREA | SPECIAL);
+ public static final ElementName TEMPLATE = new ElementName("template", "template", TreeBuilder.TEMPLATE | SPECIAL | SCOPING);
+ public static final ElementName TEXTPATH = new ElementName("textpath", "textPath", TreeBuilder.OTHER);
+ public static final ElementName VARIANCE = new ElementName("variance", "variance", TreeBuilder.OTHER);
+ public static final ElementName ANIMATION = new ElementName("animation", "animation", TreeBuilder.OTHER);
+ public static final ElementName CONJUGATE = new ElementName("conjugate", "conjugate", TreeBuilder.OTHER);
+ public static final ElementName CONDITION = new ElementName("condition", "condition", TreeBuilder.OTHER);
+ public static final ElementName COMPLEXES = new ElementName("complexes", "complexes", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE = new ElementName("font-face", "font-face", TreeBuilder.OTHER);
+ public static final ElementName FACTORIAL = new ElementName("factorial", "factorial", TreeBuilder.OTHER);
+ public static final ElementName INTERSECT = new ElementName("intersect", "intersect", TreeBuilder.OTHER);
+ public static final ElementName IMAGINARY = new ElementName("imaginary", "imaginary", TreeBuilder.OTHER);
+ public static final ElementName LAPLACIAN = new ElementName("laplacian", "laplacian", TreeBuilder.OTHER);
+ public static final ElementName MATRIXROW = new ElementName("matrixrow", "matrixrow", TreeBuilder.OTHER);
+ public static final ElementName NOTSUBSET = new ElementName("notsubset", "notsubset", TreeBuilder.OTHER);
+ public static final ElementName OTHERWISE = new ElementName("otherwise", "otherwise", TreeBuilder.OTHER);
+ public static final ElementName PIECEWISE = new ElementName("piecewise", "piecewise", TreeBuilder.OTHER);
+ public static final ElementName PLAINTEXT = new ElementName("plaintext", "plaintext", TreeBuilder.PLAINTEXT | SPECIAL);
+ public static final ElementName RATIONALS = new ElementName("rationals", "rationals", TreeBuilder.OTHER);
+ public static final ElementName SEMANTICS = new ElementName("semantics", "semantics", TreeBuilder.OTHER);
+ public static final ElementName TRANSPOSE = new ElementName("transpose", "transpose", TreeBuilder.OTHER);
+ public static final ElementName ANNOTATION = new ElementName("annotation", "annotation", TreeBuilder.OTHER);
+ public static final ElementName BLOCKQUOTE = new ElementName("blockquote", "blockquote", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName DIVERGENCE = new ElementName("divergence", "divergence", TreeBuilder.OTHER);
+ public static final ElementName EULERGAMMA = new ElementName("eulergamma", "eulergamma", TreeBuilder.OTHER);
+ public static final ElementName EQUIVALENT = new ElementName("equivalent", "equivalent", TreeBuilder.OTHER);
+ public static final ElementName FIGCAPTION = new ElementName("figcaption", "figcaption", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName IMAGINARYI = new ElementName("imaginaryi", "imaginaryi", TreeBuilder.OTHER);
+ public static final ElementName MALIGNMARK = new ElementName("malignmark", "malignmark", TreeBuilder.MGLYPH_OR_MALIGNMARK);
+ public static final ElementName MUNDEROVER = new ElementName("munderover", "munderover", TreeBuilder.OTHER);
+ public static final ElementName MLABELEDTR = new ElementName("mlabeledtr", "mlabeledtr", TreeBuilder.OTHER);
+ public static final ElementName NOTANUMBER = new ElementName("notanumber", "notanumber", TreeBuilder.OTHER);
+ public static final ElementName SOLIDCOLOR = new ElementName("solidcolor", "solidcolor", TreeBuilder.OTHER);
+ public static final ElementName ALTGLYPHDEF = new ElementName("altglyphdef", "altGlyphDef", TreeBuilder.OTHER);
+ public static final ElementName DETERMINANT = new ElementName("determinant", "determinant", TreeBuilder.OTHER);
+ public static final ElementName FEMERGENODE = new ElementName("femergenode", "feMergeNode", TreeBuilder.OTHER);
+ public static final ElementName FECOMPOSITE = new ElementName("fecomposite", "feComposite", TreeBuilder.OTHER);
+ public static final ElementName FESPOTLIGHT = new ElementName("fespotlight", "feSpotLight", TreeBuilder.OTHER);
+ public static final ElementName MALIGNGROUP = new ElementName("maligngroup", "maligngroup", TreeBuilder.OTHER);
+ public static final ElementName MPRESCRIPTS = new ElementName("mprescripts", "mprescripts", TreeBuilder.OTHER);
+ public static final ElementName MOMENTABOUT = new ElementName("momentabout", "momentabout", TreeBuilder.OTHER);
+ public static final ElementName NOTPRSUBSET = new ElementName("notprsubset", "notprsubset", TreeBuilder.OTHER);
+ public static final ElementName PARTIALDIFF = new ElementName("partialdiff", "partialdiff", TreeBuilder.OTHER);
+ public static final ElementName ALTGLYPHITEM = new ElementName("altglyphitem", "altGlyphItem", TreeBuilder.OTHER);
+ public static final ElementName ANIMATECOLOR = new ElementName("animatecolor", "animateColor", TreeBuilder.OTHER);
+ public static final ElementName DATATEMPLATE = new ElementName("datatemplate", "datatemplate", TreeBuilder.OTHER);
+ public static final ElementName EXPONENTIALE = new ElementName("exponentiale", "exponentiale", TreeBuilder.OTHER);
+ public static final ElementName FETURBULENCE = new ElementName("feturbulence", "feTurbulence", TreeBuilder.OTHER);
+ public static final ElementName FEPOINTLIGHT = new ElementName("fepointlight", "fePointLight", TreeBuilder.OTHER);
+ public static final ElementName FEDROPSHADOW = new ElementName("fedropshadow", "feDropShadow", TreeBuilder.OTHER);
+ public static final ElementName FEMORPHOLOGY = new ElementName("femorphology", "feMorphology", TreeBuilder.OTHER);
+ public static final ElementName OUTERPRODUCT = new ElementName("outerproduct", "outerproduct", TreeBuilder.OTHER);
+ public static final ElementName ANIMATEMOTION = new ElementName("animatemotion", "animateMotion", TreeBuilder.OTHER);
+ public static final ElementName COLOR_PROFILE = new ElementName("color-profile", "color-profile", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_SRC = new ElementName("font-face-src", "font-face-src", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_URI = new ElementName("font-face-uri", "font-face-uri", TreeBuilder.OTHER);
+ public static final ElementName FOREIGNOBJECT = new ElementName("foreignobject", "foreignObject", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG);
+ public static final ElementName FECOLORMATRIX = new ElementName("fecolormatrix", "feColorMatrix", TreeBuilder.OTHER);
+ public static final ElementName MISSING_GLYPH = new ElementName("missing-glyph", "missing-glyph", TreeBuilder.OTHER);
+ public static final ElementName MMULTISCRIPTS = new ElementName("mmultiscripts", "mmultiscripts", TreeBuilder.OTHER);
+ public static final ElementName SCALARPRODUCT = new ElementName("scalarproduct", "scalarproduct", TreeBuilder.OTHER);
+ public static final ElementName VECTORPRODUCT = new ElementName("vectorproduct", "vectorproduct", TreeBuilder.OTHER);
+ public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML);
+ public static final ElementName DEFINITION_SRC = new ElementName("definition-src", "definition-src", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_NAME = new ElementName("font-face-name", "font-face-name", TreeBuilder.OTHER);
+ public static final ElementName FEGAUSSIANBLUR = new ElementName("fegaussianblur", "feGaussianBlur", TreeBuilder.OTHER);
+ public static final ElementName FEDISTANTLIGHT = new ElementName("fedistantlight", "feDistantLight", TreeBuilder.OTHER);
+ public static final ElementName LINEARGRADIENT = new ElementName("lineargradient", "linearGradient", TreeBuilder.OTHER);
+ public static final ElementName NATURALNUMBERS = new ElementName("naturalnumbers", "naturalnumbers", TreeBuilder.OTHER);
+ public static final ElementName RADIALGRADIENT = new ElementName("radialgradient", "radialGradient", TreeBuilder.OTHER);
+ public static final ElementName ANIMATETRANSFORM = new ElementName("animatetransform", "animateTransform", TreeBuilder.OTHER);
+ public static final ElementName CARTESIANPRODUCT = new ElementName("cartesianproduct", "cartesianproduct", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_FORMAT = new ElementName("font-face-format", "font-face-format", TreeBuilder.OTHER);
+ public static final ElementName FECONVOLVEMATRIX = new ElementName("feconvolvematrix", "feConvolveMatrix", TreeBuilder.OTHER);
+ public static final ElementName FEDIFFUSELIGHTING = new ElementName("fediffuselighting", "feDiffuseLighting", TreeBuilder.OTHER);
+ public static final ElementName FEDISPLACEMENTMAP = new ElementName("fedisplacementmap", "feDisplacementMap", TreeBuilder.OTHER);
+ public static final ElementName FESPECULARLIGHTING = new ElementName("fespecularlighting", "feSpecularLighting", TreeBuilder.OTHER);
+ public static final ElementName DOMAINOFAPPLICATION = new ElementName("domainofapplication", "domainofapplication", TreeBuilder.OTHER);
+ public static final ElementName FECOMPONENTTRANSFER = new ElementName("fecomponenttransfer", "feComponentTransfer", TreeBuilder.OTHER);
+ private final static @NoLength ElementName[] ELEMENT_NAMES = {
+ A,
+ B,
+ G,
+ I,
+ P,
+ Q,
+ S,
+ U,
+ BR,
+ CI,
+ CN,
+ DD,
+ DL,
+ DT,
+ EM,
+ EQ,
+ FN,
+ H1,
+ H2,
+ H3,
+ H4,
+ H5,
+ H6,
+ GT,
+ HR,
+ IN,
+ LI,
+ LN,
+ LT,
+ MI,
+ MN,
+ MO,
+ MS,
+ OL,
+ OR,
+ PI,
+ RB,
+ RP,
+ RT,
+ TD,
+ TH,
+ TR,
+ TT,
+ UL,
+ AND,
+ ARG,
+ ABS,
+ BIG,
+ BDO,
+ CSC,
+ COL,
+ COS,
+ COT,
+ DEL,
+ DFN,
+ DIR,
+ DIV,
+ EXP,
+ GCD,
+ GEQ,
+ IMG,
+ INS,
+ INT,
+ KBD,
+ LOG,
+ LCM,
+ LEQ,
+ MTD,
+ MIN,
+ MAP,
+ MTR,
+ MAX,
+ NEQ,
+ NOT,
+ NAV,
+ PRE,
+ RTC,
+ REM,
+ SUB,
+ SEC,
+ SVG,
+ SUM,
+ SIN,
+ SEP,
+ SUP,
+ SET,
+ TAN,
+ USE,
+ VAR,
+ WBR,
+ XMP,
+ XOR,
+ AREA,
+ ABBR,
+ BASE,
+ BVAR,
+ BODY,
+ CARD,
+ CODE,
+ CITE,
+ CSCH,
+ COSH,
+ COTH,
+ CURL,
+ DESC,
+ DIFF,
+ DEFS,
+ FORM,
+ FONT,
+ GRAD,
+ HEAD,
+ HTML,
+ LINE,
+ LINK,
+ LIST,
+ META,
+ MSUB,
+ MODE,
+ MATH,
+ MARK,
+ MASK,
+ MEAN,
+ MAIN,
+ MSUP,
+ MENU,
+ MROW,
+ NONE,
+ NOBR,
+ NEST,
+ PATH,
+ PLUS,
+ RULE,
+ REAL,
+ RELN,
+ RECT,
+ ROOT,
+ RUBY,
+ SECH,
+ SINH,
+ SPAN,
+ SAMP,
+ STOP,
+ SDEV,
+ TIME,
+ TRUE,
+ TREF,
+ TANH,
+ TEXT,
+ VIEW,
+ ASIDE,
+ AUDIO,
+ APPLY,
+ EMBED,
+ FRAME,
+ FALSE,
+ FLOOR,
+ GLYPH,
+ HKERN,
+ IMAGE,
+ IDENT,
+ INPUT,
+ LABEL,
+ LIMIT,
+ MFRAC,
+ MPATH,
+ METER,
+ MOVER,
+ MINUS,
+ MROOT,
+ MSQRT,
+ MTEXT,
+ NOTIN,
+ PIECE,
+ PARAM,
+ POWER,
+ REALS,
+ STYLE,
+ SMALL,
+ THEAD,
+ TABLE,
+ TITLE,
+ TRACK,
+ TSPAN,
+ TIMES,
+ TFOOT,
+ TBODY,
+ UNION,
+ VKERN,
+ VIDEO,
+ ARCSEC,
+ ARCCSC,
+ ARCTAN,
+ ARCSIN,
+ ARCCOS,
+ APPLET,
+ ARCCOT,
+ APPROX,
+ BUTTON,
+ CIRCLE,
+ CENTER,
+ CURSOR,
+ CANVAS,
+ DIVIDE,
+ DEGREE,
+ DIALOG,
+ DOMAIN,
+ EXISTS,
+ FETILE,
+ FIGURE,
+ FORALL,
+ FILTER,
+ FOOTER,
+ HGROUP,
+ HEADER,
+ IFRAME,
+ KEYGEN,
+ LAMBDA,
+ LEGEND,
+ MSPACE,
+ MTABLE,
+ MSTYLE,
+ MGLYPH,
+ MEDIAN,
+ MUNDER,
+ MARKER,
+ MERROR,
+ MOMENT,
+ MATRIX,
+ OPTION,
+ OBJECT,
+ OUTPUT,
+ PRIMES,
+ SOURCE,
+ STRIKE,
+ STRONG,
+ SWITCH,
+ SYMBOL,
+ SELECT,
+ SUBSET,
+ SCRIPT,
+ TBREAK,
+ VECTOR,
+ ARTICLE,
+ ANIMATE,
+ ARCSECH,
+ ARCCSCH,
+ ARCTANH,
+ ARCSINH,
+ ARCCOSH,
+ ARCCOTH,
+ ACRONYM,
+ ADDRESS,
+ BGSOUND,
+ COMPOSE,
+ CEILING,
+ CSYMBOL,
+ CAPTION,
+ DISCARD,
+ DECLARE,
+ DETAILS,
+ ELLIPSE,
+ FEFUNCA,
+ FEFUNCB,
+ FEBLEND,
+ FEFLOOD,
+ FEIMAGE,
+ FEMERGE,
+ FEFUNCG,
+ FEFUNCR,
+ HANDLER,
+ INVERSE,
+ IMPLIES,
+ ISINDEX,
+ LOGBASE,
+ LISTING,
+ MFENCED,
+ MPADDED,
+ MARQUEE,
+ MACTION,
+ MSUBSUP,
+ NOEMBED,
+ PICTURE,
+ POLYGON,
+ PATTERN,
+ PRODUCT,
+ SETDIFF,
+ SECTION,
+ SUMMARY,
+ TENDSTO,
+ UPLIMIT,
+ ALTGLYPH,
+ BASEFONT,
+ CLIPPATH,
+ CODOMAIN,
+ COLGROUP,
+ EMPTYSET,
+ FACTOROF,
+ FIELDSET,
+ FRAMESET,
+ FEOFFSET,
+ GLYPHREF,
+ INTERVAL,
+ INTEGERS,
+ INFINITY,
+ LISTENER,
+ LOWLIMIT,
+ METADATA,
+ MENCLOSE,
+ MENUITEM,
+ MPHANTOM,
+ NOFRAMES,
+ NOSCRIPT,
+ OPTGROUP,
+ POLYLINE,
+ PREFETCH,
+ PROGRESS,
+ PRSUBSET,
+ QUOTIENT,
+ SELECTOR,
+ TEXTAREA,
+ TEMPLATE,
+ TEXTPATH,
+ VARIANCE,
+ ANIMATION,
+ CONJUGATE,
+ CONDITION,
+ COMPLEXES,
+ FONT_FACE,
+ FACTORIAL,
+ INTERSECT,
+ IMAGINARY,
+ LAPLACIAN,
+ MATRIXROW,
+ NOTSUBSET,
+ OTHERWISE,
+ PIECEWISE,
+ PLAINTEXT,
+ RATIONALS,
+ SEMANTICS,
+ TRANSPOSE,
+ ANNOTATION,
+ BLOCKQUOTE,
+ DIVERGENCE,
+ EULERGAMMA,
+ EQUIVALENT,
+ FIGCAPTION,
+ IMAGINARYI,
+ MALIGNMARK,
+ MUNDEROVER,
+ MLABELEDTR,
+ NOTANUMBER,
+ SOLIDCOLOR,
+ ALTGLYPHDEF,
+ DETERMINANT,
+ FEMERGENODE,
+ FECOMPOSITE,
+ FESPOTLIGHT,
+ MALIGNGROUP,
+ MPRESCRIPTS,
+ MOMENTABOUT,
+ NOTPRSUBSET,
+ PARTIALDIFF,
+ ALTGLYPHITEM,
+ ANIMATECOLOR,
+ DATATEMPLATE,
+ EXPONENTIALE,
+ FETURBULENCE,
+ FEPOINTLIGHT,
+ FEDROPSHADOW,
+ FEMORPHOLOGY,
+ OUTERPRODUCT,
+ ANIMATEMOTION,
+ COLOR_PROFILE,
+ FONT_FACE_SRC,
+ FONT_FACE_URI,
+ FOREIGNOBJECT,
+ FECOLORMATRIX,
+ MISSING_GLYPH,
+ MMULTISCRIPTS,
+ SCALARPRODUCT,
+ VECTORPRODUCT,
+ ANNOTATION_XML,
+ DEFINITION_SRC,
+ FONT_FACE_NAME,
+ FEGAUSSIANBLUR,
+ FEDISTANTLIGHT,
+ LINEARGRADIENT,
+ NATURALNUMBERS,
+ RADIALGRADIENT,
+ ANIMATETRANSFORM,
+ CARTESIANPRODUCT,
+ FONT_FACE_FORMAT,
+ FECONVOLVEMATRIX,
+ FEDIFFUSELIGHTING,
+ FEDISPLACEMENTMAP,
+ FESPECULARLIGHTING,
+ DOMAINOFAPPLICATION,
+ FECOMPONENTTRANSFER,
+ };
+ private final static int[] ELEMENT_HASHES = {
+ 1057,
+ 1090,
+ 1255,
+ 1321,
+ 1552,
+ 1585,
+ 1651,
+ 1717,
+ 68162,
+ 68899,
+ 69059,
+ 69764,
+ 70020,
+ 70276,
+ 71077,
+ 71205,
+ 72134,
+ 72232,
+ 72264,
+ 72296,
+ 72328,
+ 72360,
+ 72392,
+ 73351,
+ 74312,
+ 75209,
+ 78124,
+ 78284,
+ 78476,
+ 79149,
+ 79309,
+ 79341,
+ 79469,
+ 81295,
+ 81487,
+ 82224,
+ 84050,
+ 84498,
+ 84626,
+ 86164,
+ 86292,
+ 86612,
+ 86676,
+ 87445,
+ 3183041,
+ 3186241,
+ 3198017,
+ 3218722,
+ 3226754,
+ 3247715,
+ 3256803,
+ 3263971,
+ 3264995,
+ 3289252,
+ 3291332,
+ 3295524,
+ 3299620,
+ 3326725,
+ 3379303,
+ 3392679,
+ 3448233,
+ 3460553,
+ 3461577,
+ 3510347,
+ 3546604,
+ 3552364,
+ 3556524,
+ 3576461,
+ 3586349,
+ 3588141,
+ 3590797,
+ 3596333,
+ 3622062,
+ 3625454,
+ 3627054,
+ 3675728,
+ 3739282,
+ 3749042,
+ 3771059,
+ 3771571,
+ 3776211,
+ 3782323,
+ 3782963,
+ 3784883,
+ 3785395,
+ 3788979,
+ 3815476,
+ 3839605,
+ 3885110,
+ 3917911,
+ 3948984,
+ 3951096,
+ 135304769,
+ 135858241,
+ 136498210,
+ 136906434,
+ 137138658,
+ 137512995,
+ 137531875,
+ 137548067,
+ 137629283,
+ 137645539,
+ 137646563,
+ 137775779,
+ 138529956,
+ 138615076,
+ 139040932,
+ 140954086,
+ 141179366,
+ 141690439,
+ 142738600,
+ 143013512,
+ 146979116,
+ 147175724,
+ 147475756,
+ 147902637,
+ 147936877,
+ 148017645,
+ 148131885,
+ 148228141,
+ 148229165,
+ 148309165,
+ 148317229,
+ 148395629,
+ 148551853,
+ 148618829,
+ 149076462,
+ 149490158,
+ 149572782,
+ 151277616,
+ 151639440,
+ 153268914,
+ 153486514,
+ 153563314,
+ 153750706,
+ 153763314,
+ 153914034,
+ 154406067,
+ 154417459,
+ 154600979,
+ 154678323,
+ 154680979,
+ 154866835,
+ 155366708,
+ 155375188,
+ 155391572,
+ 155465780,
+ 155869364,
+ 158045494,
+ 168988979,
+ 169321621,
+ 169652752,
+ 173151309,
+ 174240818,
+ 174247297,
+ 174669292,
+ 175391532,
+ 176638123,
+ 177380397,
+ 177879204,
+ 177886734,
+ 180753473,
+ 181020073,
+ 181503558,
+ 181686320,
+ 181999237,
+ 181999311,
+ 182048201,
+ 182074866,
+ 182078003,
+ 182083764,
+ 182920847,
+ 184716457,
+ 184976961,
+ 185145071,
+ 187281445,
+ 187872052,
+ 188100653,
+ 188875944,
+ 188919873,
+ 188920457,
+ 189107250,
+ 189203987,
+ 189371817,
+ 189414886,
+ 189567458,
+ 190266670,
+ 191318187,
+ 191337609,
+ 202479203,
+ 202493027,
+ 202835587,
+ 202843747,
+ 203013219,
+ 203036048,
+ 203045987,
+ 203177552,
+ 203898516,
+ 204648562,
+ 205067918,
+ 205078130,
+ 205096654,
+ 205689142,
+ 205690439,
+ 205766017,
+ 205988909,
+ 207213161,
+ 207794484,
+ 207800999,
+ 208023602,
+ 208213644,
+ 208213647,
+ 210261490,
+ 210310273,
+ 210940978,
+ 213325049,
+ 213946445,
+ 214055079,
+ 215125040,
+ 215134273,
+ 215135028,
+ 215237420,
+ 215418148,
+ 215553166,
+ 215553394,
+ 215563858,
+ 215627949,
+ 215754324,
+ 217529652,
+ 217713834,
+ 217732628,
+ 218731945,
+ 221417045,
+ 221424946,
+ 221493746,
+ 221515401,
+ 221658189,
+ 221908140,
+ 221910626,
+ 221921586,
+ 222659762,
+ 225001091,
+ 236105833,
+ 236113965,
+ 236194995,
+ 236195427,
+ 236206132,
+ 236206387,
+ 236211683,
+ 236212707,
+ 236381647,
+ 236571826,
+ 237124271,
+ 238210544,
+ 238270764,
+ 238435405,
+ 238501172,
+ 239224867,
+ 239257644,
+ 239710497,
+ 240307721,
+ 241208789,
+ 241241557,
+ 241318060,
+ 241319404,
+ 241343533,
+ 241344069,
+ 241405397,
+ 241765845,
+ 243864964,
+ 244502085,
+ 244946220,
+ 245109902,
+ 247647266,
+ 247707956,
+ 248648814,
+ 248648836,
+ 248682161,
+ 248986932,
+ 249058914,
+ 249697357,
+ 251841204,
+ 252132601,
+ 252135604,
+ 252317348,
+ 255007012,
+ 255278388,
+ 255641645,
+ 256365156,
+ 257566121,
+ 269763372,
+ 271202790,
+ 271863856,
+ 272049197,
+ 272127474,
+ 274339449,
+ 274939471,
+ 275388004,
+ 275388005,
+ 275388006,
+ 275977800,
+ 278267602,
+ 278513831,
+ 278712622,
+ 281613765,
+ 281683369,
+ 282120228,
+ 282250732,
+ 282498697,
+ 282508942,
+ 283743649,
+ 283787570,
+ 284710386,
+ 285391148,
+ 285478533,
+ 285854898,
+ 285873762,
+ 286931113,
+ 288964227,
+ 289445441,
+ 289591340,
+ 289689648,
+ 291671489,
+ 303512884,
+ 305319975,
+ 305610036,
+ 305764101,
+ 308448294,
+ 308675890,
+ 312085683,
+ 312264750,
+ 315032867,
+ 316391000,
+ 317331042,
+ 317902135,
+ 318950711,
+ 319447220,
+ 321499182,
+ 322538804,
+ 323145200,
+ 337067316,
+ 337826293,
+ 339905989,
+ 340833697,
+ 341457068,
+ 342310196,
+ 345302593,
+ 349554733,
+ 349771471,
+ 349786245,
+ 350819405,
+ 356072847,
+ 370349192,
+ 373962798,
+ 375558638,
+ 375574835,
+ 376053993,
+ 383276530,
+ 383373833,
+ 383407586,
+ 384439906,
+ 386079012,
+ 404133513,
+ 404307343,
+ 407031852,
+ 408072233,
+ 409112005,
+ 409608425,
+ 409713793,
+ 409771500,
+ 419040932,
+ 437730612,
+ 439529766,
+ 442616365,
+ 442813037,
+ 443157674,
+ 443295316,
+ 450118444,
+ 450482697,
+ 456789668,
+ 459935396,
+ 471217869,
+ 474073645,
+ 476230702,
+ 476665218,
+ 476717289,
+ 483014825,
+ 485083298,
+ 489306281,
+ 538364390,
+ 540675748,
+ 543819186,
+ 543958612,
+ 576960820,
+ 577242548,
+ 610515252,
+ 642202932,
+ 644420819,
+ };
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java
new file mode 100644
index 000000000..f1749e0b3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java
@@ -0,0 +1,772 @@
+/*
+ * Copyright (c) 2009-2013 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+import java.util.HashMap;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class ErrorReportingTokenizer extends Tokenizer {
+
+ /**
+ * Magic value for UTF-16 operations.
+ */
+ private static final int SURROGATE_OFFSET = (0x10000 - (0xD800 << 10) - 0xDC00);
+
+ /**
+ * The policy for non-space non-XML characters.
+ */
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ /**
+ * Keeps track of PUA warnings.
+ */
+ private boolean alreadyWarnedAboutPrivateUseCharacters;
+
+ /**
+ * The current line number in the current resource being parsed. (First line
+ * is 1.) Passed on as locator data.
+ */
+ private int line;
+
+ private int linePrev;
+
+ /**
+ * The current column number in the current resource being tokenized. (First
+ * column is 1, counted by UTF-16 code units.) Passed on as locator data.
+ */
+ private int col;
+
+ private int colPrev;
+
+ private boolean nextCharOnNewLine;
+
+ private char prev;
+
+ private HashMap<String, String> errorProfileMap = null;
+
+ private TransitionHandler transitionHandler = null;
+
+ private int transitionBaseOffset = 0;
+
+ /**
+ * @param tokenHandler
+ * @param newAttributesEachTime
+ */
+ public ErrorReportingTokenizer(TokenHandler tokenHandler,
+ boolean newAttributesEachTime) {
+ super(tokenHandler, newAttributesEachTime);
+ }
+
+ /**
+ * @param tokenHandler
+ */
+ public ErrorReportingTokenizer(TokenHandler tokenHandler) {
+ super(tokenHandler);
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ if (line > 0) {
+ return line;
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ if (col > 0) {
+ return col;
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * Sets the contentNonXmlCharPolicy.
+ *
+ * @param contentNonXmlCharPolicy
+ * the contentNonXmlCharPolicy to set
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Sets the errorProfile.
+ *
+ * @param errorProfile
+ */
+ public void setErrorProfile(HashMap<String, String> errorProfileMap) {
+ this.errorProfileMap = errorProfileMap;
+ }
+
+ /**
+ * Reports on an event based on profile selected.
+ *
+ * @param profile
+ * the profile this message belongs to
+ * @param message
+ * the message itself
+ * @throws SAXException
+ */
+ public void note(String profile, String message) throws SAXException {
+ if (errorProfileMap == null)
+ return;
+ String level = errorProfileMap.get(profile);
+ if ("warn".equals(level)) {
+ warn(message);
+ } else if ("err".equals(level)) {
+ err(message);
+ // } else if ("info".equals(level)) {
+ // info(message);
+ }
+ }
+
+ protected void startErrorReporting() throws SAXException {
+ line = linePrev = 0;
+ col = colPrev = 1;
+ nextCharOnNewLine = true;
+ prev = '\u0000';
+ alreadyWarnedAboutPrivateUseCharacters = false;
+ transitionBaseOffset = 0;
+ }
+
+ @Inline protected void silentCarriageReturn() {
+ nextCharOnNewLine = true;
+ lastCR = true;
+ }
+
+ @Inline protected void silentLineFeed() {
+ nextCharOnNewLine = true;
+ }
+
+ /**
+ * Returns the line.
+ *
+ * @return the line
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Returns the col.
+ *
+ * @return the col
+ */
+ public int getCol() {
+ return col;
+ }
+
+ /**
+ * Returns the nextCharOnNewLine.
+ *
+ * @return the nextCharOnNewLine
+ */
+ public boolean isNextCharOnNewLine() {
+ return nextCharOnNewLine;
+ }
+
+ /**
+ * Flushes coalesced character tokens.
+ *
+ * @param buf
+ * TODO
+ * @param pos
+ * TODO
+ *
+ * @throws SAXException
+ */
+ @Override protected void flushChars(char[] buf, int pos)
+ throws SAXException {
+ if (pos > cstart) {
+ int currLine = line;
+ int currCol = col;
+ line = linePrev;
+ col = colPrev;
+ tokenHandler.characters(buf, cstart, pos - cstart);
+ line = currLine;
+ col = currCol;
+ }
+ cstart = 0x7fffffff;
+ }
+
+ @Override protected char checkChar(@NoLength char[] buf, int pos)
+ throws SAXException {
+ linePrev = line;
+ colPrev = col;
+ if (nextCharOnNewLine) {
+ line++;
+ col = 1;
+ nextCharOnNewLine = false;
+ } else {
+ col++;
+ }
+
+ char c = buf[pos];
+ switch (c) {
+ case '\u0000':
+ err("Saw U+0000 in stream.");
+ case '\t':
+ case '\r':
+ case '\n':
+ break;
+ case '\u000C':
+ if (contentNonXmlCharPolicy == XmlViolationPolicy.FATAL) {
+ fatal("This document is not mappable to XML 1.0 without data loss due to "
+ + toUPlusString(c)
+ + " which is not a legal XML 1.0 character.");
+ } else {
+ if (contentNonXmlCharPolicy == XmlViolationPolicy.ALTER_INFOSET) {
+ c = buf[pos] = ' ';
+ }
+ warn("This document is not mappable to XML 1.0 without data loss due to "
+ + toUPlusString(c)
+ + " which is not a legal XML 1.0 character.");
+ }
+ break;
+ default:
+ if ((c & 0xFC00) == 0xDC00) {
+ // Got a low surrogate. See if prev was high
+ // surrogate
+ if ((prev & 0xFC00) == 0xD800) {
+ int intVal = (prev << 10) + c + SURROGATE_OFFSET;
+ if ((intVal & 0xFFFE) == 0xFFFE) {
+ err("Astral non-character.");
+ }
+ if (isAstralPrivateUse(intVal)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+ } else if ((c < ' ' || ((c & 0xFFFE) == 0xFFFE))) {
+ switch (contentNonXmlCharPolicy) {
+ case FATAL:
+ fatal("Forbidden code point " + toUPlusString(c)
+ + ".");
+ break;
+ case ALTER_INFOSET:
+ c = buf[pos] = '\uFFFD';
+ // fall through
+ case ALLOW:
+ err("Forbidden code point " + toUPlusString(c)
+ + ".");
+ }
+ } else if ((c >= '\u007F') && (c <= '\u009F')
+ || (c >= '\uFDD0') && (c <= '\uFDEF')) {
+ err("Forbidden code point " + toUPlusString(c) + ".");
+ } else if (isPrivateUse(c)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+ prev = c;
+ return c;
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.htmlparser.impl.Tokenizer#transition(int, int, boolean,
+ * int)
+ */
+ @Override protected int transition(int from, int to, boolean reconsume,
+ int pos) throws SAXException {
+ if (transitionHandler != null) {
+ transitionHandler.transition(from, to, reconsume,
+ transitionBaseOffset + pos);
+ }
+ return to;
+ }
+
+ private String toUPlusString(int c) {
+ String hexString = Integer.toHexString(c);
+ switch (hexString.length()) {
+ case 1:
+ return "U+000" + hexString;
+ case 2:
+ return "U+00" + hexString;
+ case 3:
+ return "U+0" + hexString;
+ default:
+ return "U+" + hexString;
+ }
+ }
+
+ /**
+ * Emits a warning about private use characters if the warning has not been
+ * emitted yet.
+ *
+ * @throws SAXException
+ */
+ private void warnAboutPrivateUseChar() throws SAXException {
+ if (!alreadyWarnedAboutPrivateUseCharacters) {
+ warn("Document uses the Unicode Private Use Area(s), which should not be used in publicly exchanged documents. (Charmod C073)");
+ alreadyWarnedAboutPrivateUseCharacters = true;
+ }
+ }
+
+ /**
+ * Tells if the argument is a BMP PUA character.
+ *
+ * @param c
+ * the UTF-16 code unit to check
+ * @return <code>true</code> if PUA character
+ */
+ private boolean isPrivateUse(char c) {
+ return c >= '\uE000' && c <= '\uF8FF';
+ }
+
+ /**
+ * Tells if the argument is an astral PUA character.
+ *
+ * @param c
+ * the code point to check
+ * @return <code>true</code> if astral private use
+ */
+ private boolean isAstralPrivateUse(int c) {
+ return (c >= 0xF0000 && c <= 0xFFFFD)
+ || (c >= 0x100000 && c <= 0x10FFFD);
+ }
+
+ @Override protected void errGarbageAfterLtSlash() throws SAXException {
+ err("Garbage after \u201C</\u201D.");
+ }
+
+ @Override protected void errLtSlashGt() throws SAXException {
+ err("Saw \u201C</>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C&lt;\u201D) or mistyped end tag.");
+ }
+
+ @Override protected void errWarnLtSlashInRcdata() throws SAXException {
+ if (html4) {
+ err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
+ + " element \u201C"
+ + endTagExpectation
+ + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)");
+ } else {
+ warn((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
+ + " element \u201C"
+ + endTagExpectation
+ + "\u201D contained the string \u201C</\u201D, but this did not close the element.");
+ }
+ }
+
+ @Override protected void errHtml4LtSlashInRcdata(char folded)
+ throws SAXException {
+ if (html4 && (index > 0 || (folded >= 'a' && folded <= 'z'))
+ && ElementName.IFRAME != endTagExpectation) {
+ err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
+ + " element \u201C"
+ + endTagExpectation.name
+ + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)");
+ }
+ }
+
+ @Override protected void errCharRefLacksSemicolon() throws SAXException {
+ err("Character reference was not terminated by a semicolon.");
+ }
+
+ @Override protected void errNoDigitsInNCR() throws SAXException {
+ err("No digits after \u201C" + strBufToString() + "\u201D.");
+ }
+
+ @Override protected void errGtInSystemId() throws SAXException {
+ err("\u201C>\u201D in system identifier.");
+ }
+
+ @Override protected void errGtInPublicId() throws SAXException {
+ err("\u201C>\u201D in public identifier.");
+ }
+
+ @Override protected void errNamelessDoctype() throws SAXException {
+ err("Nameless doctype.");
+ }
+
+ @Override protected void errConsecutiveHyphens() throws SAXException {
+ err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is.");
+ }
+
+ @Override protected void errPrematureEndOfComment() throws SAXException {
+ err("Premature end of comment. Use \u201C-->\u201D to end a comment properly.");
+ }
+
+ @Override protected void errBogusComment() throws SAXException {
+ err("Bogus comment.");
+ }
+
+ @Override protected void errUnquotedAttributeValOrNull(char c)
+ throws SAXException {
+ switch (c) {
+ case '<':
+ err("\u201C<\u201D in an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before.");
+ return;
+ case '`':
+ err("\u201C`\u201D in an unquoted attribute value. Probable cause: Using the wrong character as a quote.");
+ return;
+ case '\uFFFD':
+ return;
+ default:
+ err("\u201C"
+ + c
+ + "\u201D in an unquoted attribute value. Probable causes: Attributes running together or a URL query string in an unquoted attribute value.");
+ return;
+ }
+ }
+
+ @Override protected void errSlashNotFollowedByGt() throws SAXException {
+ err("A slash was not immediately followed by \u201C>\u201D.");
+ }
+
+ @Override protected void errHtml4XmlVoidSyntax() throws SAXException {
+ if (html4) {
+ err("The \u201C/>\u201D syntax on void elements is not allowed. (This is an HTML4-only error.)");
+ }
+ }
+
+ @Override protected void errNoSpaceBetweenAttributes() throws SAXException {
+ err("No space between attributes.");
+ }
+
+ @Override protected void errHtml4NonNameInUnquotedAttribute(char c)
+ throws SAXException {
+ if (html4
+ && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9') || c == '.' || c == '-'
+ || c == '_' || c == ':')) {
+ err("Non-name character in an unquoted attribute value. (This is an HTML4-only error.)");
+ }
+ }
+
+ @Override protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(
+ char c) throws SAXException {
+ switch (c) {
+ case '=':
+ err("\u201C=\u201D at the start of an unquoted attribute value. Probable cause: Stray duplicate equals sign.");
+ return;
+ case '<':
+ err("\u201C<\u201D at the start of an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before.");
+ return;
+ case '`':
+ err("\u201C`\u201D at the start of an unquoted attribute value. Probable cause: Using the wrong character as a quote.");
+ return;
+ }
+ }
+
+ @Override protected void errAttributeValueMissing() throws SAXException {
+ err("Attribute value missing.");
+ }
+
+ @Override protected void errBadCharBeforeAttributeNameOrNull(char c)
+ throws SAXException {
+ if (c == '<') {
+ err("Saw \u201C<\u201D when expecting an attribute name. Probable cause: Missing \u201C>\u201D immediately before.");
+ } else if (c == '=') {
+ errEqualsSignBeforeAttributeName();
+ } else if (c != '\uFFFD') {
+ errQuoteBeforeAttributeName(c);
+ }
+ }
+
+ @Override protected void errEqualsSignBeforeAttributeName()
+ throws SAXException {
+ err("Saw \u201C=\u201D when expecting an attribute name. Probable cause: Attribute name missing.");
+ }
+
+ @Override protected void errBadCharAfterLt(char c) throws SAXException {
+ err("Bad character \u201C"
+ + c
+ + "\u201D after \u201C<\u201D. Probable cause: Unescaped \u201C<\u201D. Try escaping it as \u201C&lt;\u201D.");
+ }
+
+ @Override protected void errLtGt() throws SAXException {
+ err("Saw \u201C<>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C&lt;\u201D) or mistyped start tag.");
+ }
+
+ @Override protected void errProcessingInstruction() throws SAXException {
+ err("Saw \u201C<?\u201D. Probable cause: Attempt to use an XML processing instruction in HTML. (XML processing instructions are not supported in HTML.)");
+ }
+
+ @Override protected void errUnescapedAmpersandInterpretedAsCharacterReference()
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(
+ "The string following \u201C&\u201D was interpreted as a character reference. (\u201C&\u201D probably should have been escaped as \u201C&amp;\u201D.)",
+ ampersandLocation);
+ errorHandler.error(spe);
+ }
+
+ @Override protected void errNotSemicolonTerminated() throws SAXException {
+ err("Named character reference was not terminated by a semicolon. (Or \u201C&\u201D should have been escaped as \u201C&amp;\u201D.)");
+ }
+
+ @Override protected void errNoNamedCharacterMatch() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(
+ "\u201C&\u201D did not start a character reference. (\u201C&\u201D probably should have been escaped as \u201C&amp;\u201D.)",
+ ampersandLocation);
+ errorHandler.error(spe);
+ }
+
+ @Override protected void errQuoteBeforeAttributeName(char c)
+ throws SAXException {
+ err("Saw \u201C"
+ + c
+ + "\u201D when expecting an attribute name. Probable cause: \u201C=\u201D missing immediately before.");
+ }
+
+ @Override protected void errQuoteOrLtInAttributeNameOrNull(char c)
+ throws SAXException {
+ if (c == '<') {
+ err("\u201C<\u201D in attribute name. Probable cause: \u201C>\u201D missing immediately before.");
+ } else if (c != '\uFFFD') {
+ err("Quote \u201C"
+ + c
+ + "\u201D in attribute name. Probable cause: Matching quote missing somewhere earlier.");
+ }
+ }
+
+ @Override protected void errExpectedPublicId() throws SAXException {
+ err("Expected a public identifier but the doctype ended.");
+ }
+
+ @Override protected void errBogusDoctype() throws SAXException {
+ err("Bogus doctype.");
+ }
+
+ @Override protected void maybeWarnPrivateUseAstral() throws SAXException {
+ if (errorHandler != null && isAstralPrivateUse(value)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+
+ @Override protected void maybeWarnPrivateUse(char ch) throws SAXException {
+ if (errorHandler != null && isPrivateUse(ch)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+
+ @Override protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
+ throws SAXException {
+ if (attrs.getLength() != 0) {
+ /*
+ * When an end tag token is emitted with attributes, that is a parse
+ * error.
+ */
+ err("End tag had attributes.");
+ }
+ }
+
+ @Override protected void maybeErrSlashInEndTag(boolean selfClosing)
+ throws SAXException {
+ if (selfClosing && endTag) {
+ err("Stray \u201C/\u201D at the end of an end tag.");
+ }
+ }
+
+ @Override protected char errNcrNonCharacter(char ch) throws SAXException {
+ switch (contentNonXmlCharPolicy) {
+ case FATAL:
+ fatal("Character reference expands to a non-character ("
+ + toUPlusString((char) value) + ").");
+ break;
+ case ALTER_INFOSET:
+ ch = '\uFFFD';
+ // fall through
+ case ALLOW:
+ err("Character reference expands to a non-character ("
+ + toUPlusString((char) value) + ").");
+ }
+ return ch;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.Tokenizer#errAstralNonCharacter(int)
+ */
+ @Override protected void errAstralNonCharacter(int ch) throws SAXException {
+ err("Character reference expands to an astral non-character ("
+ + toUPlusString(value) + ").");
+ }
+
+ @Override protected void errNcrSurrogate() throws SAXException {
+ err("Character reference expands to a surrogate.");
+ }
+
+ @Override protected char errNcrControlChar(char ch) throws SAXException {
+ switch (contentNonXmlCharPolicy) {
+ case FATAL:
+ fatal("Character reference expands to a control character ("
+ + toUPlusString((char) value) + ").");
+ break;
+ case ALTER_INFOSET:
+ ch = '\uFFFD';
+ // fall through
+ case ALLOW:
+ err("Character reference expands to a control character ("
+ + toUPlusString((char) value) + ").");
+ }
+ return ch;
+ }
+
+ @Override protected void errNcrCr() throws SAXException {
+ err("A numeric character reference expanded to carriage return.");
+ }
+
+ @Override protected void errNcrInC1Range() throws SAXException {
+ err("A numeric character reference expanded to the C1 controls range.");
+ }
+
+ @Override protected void errEofInPublicId() throws SAXException {
+ err("End of file inside public identifier.");
+ }
+
+ @Override protected void errEofInComment() throws SAXException {
+ err("End of file inside comment.");
+ }
+
+ @Override protected void errEofInDoctype() throws SAXException {
+ err("End of file inside doctype.");
+ }
+
+ @Override protected void errEofInAttributeValue() throws SAXException {
+ err("End of file reached when inside an attribute value. Ignoring tag.");
+ }
+
+ @Override protected void errEofInAttributeName() throws SAXException {
+ err("End of file occurred in an attribute name. Ignoring tag.");
+ }
+
+ @Override protected void errEofWithoutGt() throws SAXException {
+ err("Saw end of file without the previous tag ending with \u201C>\u201D. Ignoring tag.");
+ }
+
+ @Override protected void errEofInTagName() throws SAXException {
+ err("End of file seen when looking for tag name. Ignoring tag.");
+ }
+
+ @Override protected void errEofInEndTag() throws SAXException {
+ err("End of file inside end tag. Ignoring tag.");
+ }
+
+ @Override protected void errEofAfterLt() throws SAXException {
+ err("End of file after \u201C<\u201D.");
+ }
+
+ @Override protected void errNcrOutOfRange() throws SAXException {
+ err("Character reference outside the permissible Unicode range.");
+ }
+
+ @Override protected void errNcrUnassigned() throws SAXException {
+ err("Character reference expands to a permanently unassigned code point.");
+ }
+
+ @Override protected void errDuplicateAttribute() throws SAXException {
+ err("Duplicate attribute \u201C"
+ + attributeName.getLocal(AttributeName.HTML) + "\u201D.");
+ }
+
+ @Override protected void errEofInSystemId() throws SAXException {
+ err("End of file inside system identifier.");
+ }
+
+ @Override protected void errExpectedSystemId() throws SAXException {
+ err("Expected a system identifier but the doctype ended.");
+ }
+
+ @Override protected void errMissingSpaceBeforeDoctypeName()
+ throws SAXException {
+ err("Missing space before doctype name.");
+ }
+
+ @Override protected void errHyphenHyphenBang() throws SAXException {
+ err("\u201C--!\u201D found in comment.");
+ }
+
+ @Override protected void errNcrControlChar() throws SAXException {
+ err("Character reference expands to a control character ("
+ + toUPlusString((char) value) + ").");
+ }
+
+ @Override protected void errNcrZero() throws SAXException {
+ err("Character reference expands to zero.");
+ }
+
+ @Override protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
+ throws SAXException {
+ err("No space between the doctype \u201CSYSTEM\u201D keyword and the quote.");
+ }
+
+ @Override protected void errNoSpaceBetweenPublicAndSystemIds()
+ throws SAXException {
+ err("No space between the doctype public and system identifiers.");
+ }
+
+ @Override protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
+ throws SAXException {
+ err("No space between the doctype \u201CPUBLIC\u201D keyword and the quote.");
+ }
+
+ @Override protected void noteAttributeWithoutValue() throws SAXException {
+ note("xhtml2", "Attribute without value");
+ }
+
+ @Override protected void noteUnquotedAttributeValue() throws SAXException {
+ note("xhtml1", "Unquoted attribute value.");
+ }
+
+ /**
+ * Sets the transitionHandler.
+ *
+ * @param transitionHandler
+ * the transitionHandler to set
+ */
+ public void setTransitionHandler(TransitionHandler transitionHandler) {
+ this.transitionHandler = transitionHandler;
+ }
+
+ /**
+ * Sets an offset to be added to the position reported to
+ * <code>TransitionHandler</code>.
+ *
+ * @param offset
+ * the offset
+ */
+ public void setTransitionBaseOffset(int offset) {
+ this.transitionBaseOffset = offset;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt
new file mode 100644
index 000000000..c389a8cac
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * compressed returnValue:
+ * int returnState = returnValue >> 33
+ * boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0)
+ * int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue
+ */
+ @SuppressWarnings("unused") private long workAroundHotSpotHugeMethodLimit(
+ int state, char c, int pos, @NoLength char[] buf,
+ boolean reconsume, int returnState, int endPos) throws SAXException {
+ stateloop: for (;;) {
+ switch (state) {
+ // BEGIN HOTSPOT WORKAROUND
+ default:
+ long returnStateAndPos = workAroundHotSpotHugeMethodLimit(
+ state, c, pos, buf, reconsume, returnState, endPos);
+ pos = (int)returnStateAndPos; // 5.1.3 in the Java spec
+ returnState = (int)(returnStateAndPos >> 33);
+ state = stateSave;
+ if ( (pos == endPos) || ( (((int)(returnStateAndPos >> 32)) & 0x1) != 0) ) {
+ break stateloop;
+ }
+ continue stateloop;
+ // END HOTSPOT WORKAROUND
+ default:
+ assert !reconsume : "Must not reconsume when returning from HotSpot workaround.";
+ stateSave = state;
+ return (((long)returnState) << 33) | pos;
+ }
+ }
+ assert !reconsume : "Must not reconsume when returning from HotSpot workaround.";
+ stateSave = state;
+ return (((long)returnState) << 33) | (1L << 32) | pos ;
+ }
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java
new file mode 100644
index 000000000..45c9c6c3e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java
@@ -0,0 +1,620 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2011 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.IdType;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NsUri;
+import nu.validator.htmlparser.annotation.Prefix;
+import nu.validator.htmlparser.annotation.QName;
+import nu.validator.htmlparser.common.Interner;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * Be careful with this class. QName is the name in from HTML tokenization.
+ * Otherwise, please refer to the interface doc.
+ *
+ * @version $Id: AttributesImpl.java 206 2008-03-20 14:09:29Z hsivonen $
+ * @author hsivonen
+ */
+public final class HtmlAttributes implements Attributes {
+
+ // [NOCPP[
+
+ private static final AttributeName[] EMPTY_ATTRIBUTENAMES = new AttributeName[0];
+
+ private static final String[] EMPTY_STRINGS = new String[0];
+
+ // ]NOCPP]
+
+ public static final HtmlAttributes EMPTY_ATTRIBUTES = new HtmlAttributes(
+ AttributeName.HTML);
+
+ private int mode;
+
+ private int length;
+
+ private @Auto AttributeName[] names;
+
+ private @Auto String[] values; // XXX perhaps make this @NoLength?
+
+ // CPPONLY: private @Auto int[] lines; // XXX perhaps make this @NoLength?
+
+ // [NOCPP[
+
+ private String idValue;
+
+ private int xmlnsLength;
+
+ private AttributeName[] xmlnsNames;
+
+ private String[] xmlnsValues;
+
+ // ]NOCPP]
+
+ public HtmlAttributes(int mode) {
+ this.mode = mode;
+ this.length = 0;
+ /*
+ * The length of 5 covers covers 98.3% of elements
+ * according to Hixie, but lets round to the next power of two for
+ * jemalloc.
+ */
+ this.names = new AttributeName[8];
+ this.values = new String[8];
+ // CPPONLY: this.lines = new int[8];
+
+ // [NOCPP[
+
+ this.idValue = null;
+
+ this.xmlnsLength = 0;
+
+ this.xmlnsNames = HtmlAttributes.EMPTY_ATTRIBUTENAMES;
+
+ this.xmlnsValues = HtmlAttributes.EMPTY_STRINGS;
+
+ // ]NOCPP]
+ }
+ /*
+ public HtmlAttributes(HtmlAttributes other) {
+ this.mode = other.mode;
+ this.length = other.length;
+ this.names = new AttributeName[other.length];
+ this.values = new String[other.length];
+ // [NOCPP[
+ this.idValue = other.idValue;
+ this.xmlnsLength = other.xmlnsLength;
+ this.xmlnsNames = new AttributeName[other.xmlnsLength];
+ this.xmlnsValues = new String[other.xmlnsLength];
+ // ]NOCPP]
+ }
+ */
+
+ void destructor() {
+ clear(0);
+ }
+
+ /**
+ * Only use with a static argument
+ *
+ * @param name
+ * @return
+ */
+ public int getIndex(AttributeName name) {
+ for (int i = 0; i < length; i++) {
+ if (names[i] == name) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Only use with static argument.
+ *
+ * @see org.xml.sax.Attributes#getValue(java.lang.String)
+ */
+ public String getValue(AttributeName name) {
+ int index = getIndex(name);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValueNoBoundsCheck(index);
+ }
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * Variant of <code>getLocalName(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the local name at index
+ */
+ public @Local String getLocalNameNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index].getLocal(mode);
+ }
+
+ /**
+ * Variant of <code>getURI(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the namespace URI at index
+ */
+ public @NsUri String getURINoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index].getUri(mode);
+ }
+
+ /**
+ * Variant of <code>getPrefix(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the namespace prefix at index
+ */
+ public @Prefix String getPrefixNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index].getPrefix(mode);
+ }
+
+ /**
+ * Variant of <code>getValue(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the attribute value at index
+ */
+ public String getValueNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return values[index];
+ }
+
+ /**
+ * Variant of <code>getAttributeName(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the attribute name at index
+ */
+ public AttributeName getAttributeNameNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index];
+ }
+
+ // CPPONLY: /**
+ // CPPONLY: * Obtains a line number without bounds check.
+ // CPPONLY: * @param index a valid attribute index
+ // CPPONLY: * @return the line number at index or -1 if unknown
+ // CPPONLY: */
+ // CPPONLY: public int getLineNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ // CPPONLY: return lines[index];
+ // CPPONLY: }
+
+ // [NOCPP[
+
+ /**
+ * Variant of <code>getQName(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the QName at index
+ */
+ public @QName String getQNameNoBoundsCheck(int index) {
+ return names[index].getQName(mode);
+ }
+
+ /**
+ * Variant of <code>getType(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the attribute type at index
+ */
+ public @IdType String getTypeNoBoundsCheck(int index) {
+ return (names[index] == AttributeName.ID) ? "ID" : "CDATA";
+ }
+
+ public int getIndex(String qName) {
+ for (int i = 0; i < length; i++) {
+ if (names[i].getQName(mode).equals(qName)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public int getIndex(String uri, String localName) {
+ for (int i = 0; i < length; i++) {
+ if (names[i].getLocal(mode).equals(localName)
+ && names[i].getUri(mode).equals(uri)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public @IdType String getType(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public @IdType String getType(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public String getValue(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ public String getValue(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ public @Local String getLocalName(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getLocal(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @QName String getQName(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getQName(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @IdType String getType(int index) {
+ if (index < length && index >= 0) {
+ return (names[index] == AttributeName.ID) ? "ID" : "CDATA";
+ } else {
+ return null;
+ }
+ }
+
+ public AttributeName getAttributeName(int index) {
+ if (index < length && index >= 0) {
+ return names[index];
+ } else {
+ return null;
+ }
+ }
+
+ public @NsUri String getURI(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getUri(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @Prefix String getPrefix(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getPrefix(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public String getValue(int index) {
+ if (index < length && index >= 0) {
+ return values[index];
+ } else {
+ return null;
+ }
+ }
+
+ public String getId() {
+ return idValue;
+ }
+
+ public int getXmlnsLength() {
+ return xmlnsLength;
+ }
+
+ public @Local String getXmlnsLocalName(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsNames[index].getLocal(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @NsUri String getXmlnsURI(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsNames[index].getUri(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public String getXmlnsValue(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsValues[index];
+ } else {
+ return null;
+ }
+ }
+
+ public int getXmlnsIndex(AttributeName name) {
+ for (int i = 0; i < xmlnsLength; i++) {
+ if (xmlnsNames[i] == name) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public String getXmlnsValue(AttributeName name) {
+ int index = getXmlnsIndex(name);
+ if (index == -1) {
+ return null;
+ } else {
+ return getXmlnsValue(index);
+ }
+ }
+
+ public AttributeName getXmlnsAttributeName(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsNames[index];
+ } else {
+ return null;
+ }
+ }
+
+ // ]NOCPP]
+
+ void addAttribute(AttributeName name, String value
+ // [NOCPP[
+ , XmlViolationPolicy xmlnsPolicy
+ // ]NOCPP]
+ // CPPONLY: , int line
+ ) throws SAXException {
+ // [NOCPP[
+ if (name == AttributeName.ID) {
+ idValue = value;
+ }
+
+ if (name.isXmlns()) {
+ if (xmlnsNames.length == xmlnsLength) {
+ int newLen = xmlnsLength == 0 ? 2 : xmlnsLength << 1;
+ AttributeName[] newNames = new AttributeName[newLen];
+ System.arraycopy(xmlnsNames, 0, newNames, 0, xmlnsNames.length);
+ xmlnsNames = newNames;
+ String[] newValues = new String[newLen];
+ System.arraycopy(xmlnsValues, 0, newValues, 0, xmlnsValues.length);
+ xmlnsValues = newValues;
+ }
+ xmlnsNames[xmlnsLength] = name;
+ xmlnsValues[xmlnsLength] = value;
+ xmlnsLength++;
+ switch (xmlnsPolicy) {
+ case FATAL:
+ // this is ugly
+ throw new SAXException("Saw an xmlns attribute.");
+ case ALTER_INFOSET:
+ return;
+ case ALLOW:
+ // fall through
+ }
+ }
+
+ // ]NOCPP]
+
+ if (names.length == length) {
+ int newLen = length << 1; // The first growth covers virtually
+ // 100% of elements according to
+ // Hixie
+ AttributeName[] newNames = new AttributeName[newLen];
+ System.arraycopy(names, 0, newNames, 0, names.length);
+ names = newNames;
+ String[] newValues = new String[newLen];
+ System.arraycopy(values, 0, newValues, 0, values.length);
+ values = newValues;
+ // CPPONLY: int[] newLines = new int[newLen];
+ // CPPONLY: System.arraycopy(lines, 0, newLines, 0, lines.length);
+ // CPPONLY: lines = newLines;
+ }
+ names[length] = name;
+ values[length] = value;
+ // CPPONLY: lines[length] = line;
+ length++;
+ }
+
+ void clear(int m) {
+ for (int i = 0; i < length; i++) {
+ names[i].release();
+ names[i] = null;
+ Portability.releaseString(values[i]);
+ values[i] = null;
+ }
+ length = 0;
+ mode = m;
+ // [NOCPP[
+ idValue = null;
+ for (int i = 0; i < xmlnsLength; i++) {
+ xmlnsNames[i] = null;
+ xmlnsValues[i] = null;
+ }
+ xmlnsLength = 0;
+ // ]NOCPP]
+ }
+
+ /**
+ * This is used in C++ to release special <code>isindex</code>
+ * attribute values whose ownership is not transferred.
+ */
+ void releaseValue(int i) {
+ Portability.releaseString(values[i]);
+ }
+
+ /**
+ * This is only used for <code>AttributeName</code> ownership transfer
+ * in the isindex case to avoid freeing custom names twice in C++.
+ */
+ void clearWithoutReleasingContents() {
+ for (int i = 0; i < length; i++) {
+ names[i] = null;
+ values[i] = null;
+ }
+ length = 0;
+ }
+
+ boolean contains(AttributeName name) {
+ for (int i = 0; i < length; i++) {
+ if (name.equalsAnother(names[i])) {
+ return true;
+ }
+ }
+ // [NOCPP[
+ for (int i = 0; i < xmlnsLength; i++) {
+ if (name.equalsAnother(xmlnsNames[i])) {
+ return true;
+ }
+ }
+ // ]NOCPP]
+ return false;
+ }
+
+ public void adjustForMath() {
+ mode = AttributeName.MATHML;
+ }
+
+ public void adjustForSvg() {
+ mode = AttributeName.SVG;
+ }
+
+ public HtmlAttributes cloneAttributes(Interner interner)
+ throws SAXException {
+ assert (length == 0
+ // [NOCPP[
+ && xmlnsLength == 0
+ // ]NOCPP]
+ )
+ || mode == 0 || mode == 3;
+ HtmlAttributes clone = new HtmlAttributes(0);
+ for (int i = 0; i < length; i++) {
+ clone.addAttribute(names[i].cloneAttributeName(interner),
+ Portability.newStringFromString(values[i])
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , lines[i]
+ );
+ }
+ // [NOCPP[
+ for (int i = 0; i < xmlnsLength; i++) {
+ clone.addAttribute(xmlnsNames[i], xmlnsValues[i],
+ XmlViolationPolicy.ALLOW);
+ }
+ // ]NOCPP]
+ return clone; // XXX!!!
+ }
+
+ public boolean equalsAnother(HtmlAttributes other) {
+ assert mode == 0 || mode == 3 : "Trying to compare attributes in foreign content.";
+ int otherLength = other.getLength();
+ if (length != otherLength) {
+ return false;
+ }
+ for (int i = 0; i < length; i++) {
+ // Work around the limitations of C++
+ boolean found = false;
+ // The comparing just the local names is OK, since these attribute
+ // holders are both supposed to belong to HTML formatting elements
+ @Local String ownLocal = names[i].getLocal(AttributeName.HTML);
+ for (int j = 0; j < otherLength; j++) {
+ if (ownLocal == other.names[j].getLocal(AttributeName.HTML)) {
+ found = true;
+ if (!Portability.stringEqualsString(values[i], other.values[j])) {
+ return false;
+ }
+ }
+ }
+ if (!found) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // [NOCPP[
+
+ void processNonNcNames(TreeBuilder<?> treeBuilder, XmlViolationPolicy namePolicy) throws SAXException {
+ for (int i = 0; i < length; i++) {
+ AttributeName attName = names[i];
+ if (!attName.isNcName(mode)) {
+ String name = attName.getLocal(mode);
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ names[i] = AttributeName.create(NCName.escapeName(name));
+ // fall through
+ case ALLOW:
+ if (attName != AttributeName.XML_LANG) {
+ treeBuilder.warn("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0.");
+ }
+ break;
+ case FATAL:
+ treeBuilder.fatal("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0.");
+ break;
+ }
+ }
+ }
+ }
+
+ public void merge(HtmlAttributes attributes) throws SAXException {
+ int len = attributes.getLength();
+ for (int i = 0; i < len; i++) {
+ AttributeName name = attributes.getAttributeNameNoBoundsCheck(i);
+ if (!contains(name)) {
+ addAttribute(name, attributes.getValueNoBoundsCheck(i), XmlViolationPolicy.ALLOW);
+ }
+ }
+ }
+
+
+ // ]NOCPP]
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java
new file mode 100644
index 000000000..7a559d903
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import org.xml.sax.Locator;
+
+public class LocatorImpl implements Locator {
+
+ private final String systemId;
+
+ private final String publicId;
+
+ private final int column;
+
+ private final int line;
+
+ public LocatorImpl(Locator locator) {
+ this.systemId = locator.getSystemId();
+ this.publicId = locator.getPublicId();
+ this.column = locator.getColumnNumber();
+ this.line = locator.getLineNumber();
+ }
+
+ public final int getColumnNumber() {
+ return column;
+ }
+
+ public final int getLineNumber() {
+ return line;
+ }
+
+ public final String getPublicId() {
+ return publicId;
+ }
+
+ public final String getSystemId() {
+ return systemId;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java
new file mode 100644
index 000000000..9a3dc16b2
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java
@@ -0,0 +1,856 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2015 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.io.IOException;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.common.ByteReadable;
+
+import org.xml.sax.SAXException;
+
+public abstract class MetaScanner {
+
+ /**
+ * Constant for "charset".
+ */
+ private static final char[] CHARSET = { 'h', 'a', 'r', 's', 'e', 't' };
+
+ /**
+ * Constant for "content".
+ */
+ private static final char[] CONTENT = { 'o', 'n', 't', 'e', 'n', 't' };
+
+ /**
+ * Constant for "http-equiv".
+ */
+ private static final char[] HTTP_EQUIV = { 't', 't', 'p', '-', 'e', 'q',
+ 'u', 'i', 'v' };
+
+ /**
+ * Constant for "content-type".
+ */
+ private static final char[] CONTENT_TYPE = { 'c', 'o', 'n', 't', 'e', 'n',
+ 't', '-', 't', 'y', 'p', 'e' };
+
+ private static final int NO = 0;
+
+ private static final int M = 1;
+
+ private static final int E = 2;
+
+ private static final int T = 3;
+
+ private static final int A = 4;
+
+ private static final int DATA = 0;
+
+ private static final int TAG_OPEN = 1;
+
+ private static final int SCAN_UNTIL_GT = 2;
+
+ private static final int TAG_NAME = 3;
+
+ private static final int BEFORE_ATTRIBUTE_NAME = 4;
+
+ private static final int ATTRIBUTE_NAME = 5;
+
+ private static final int AFTER_ATTRIBUTE_NAME = 6;
+
+ private static final int BEFORE_ATTRIBUTE_VALUE = 7;
+
+ private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
+
+ private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
+
+ private static final int ATTRIBUTE_VALUE_UNQUOTED = 10;
+
+ private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
+
+ private static final int MARKUP_DECLARATION_OPEN = 13;
+
+ private static final int MARKUP_DECLARATION_HYPHEN = 14;
+
+ private static final int COMMENT_START = 15;
+
+ private static final int COMMENT_START_DASH = 16;
+
+ private static final int COMMENT = 17;
+
+ private static final int COMMENT_END_DASH = 18;
+
+ private static final int COMMENT_END = 19;
+
+ private static final int SELF_CLOSING_START_TAG = 20;
+
+ private static final int HTTP_EQUIV_NOT_SEEN = 0;
+
+ private static final int HTTP_EQUIV_CONTENT_TYPE = 1;
+
+ private static final int HTTP_EQUIV_OTHER = 2;
+
+ /**
+ * The data source.
+ */
+ protected ByteReadable readable;
+
+ /**
+ * The state of the state machine that recognizes the tag name "meta".
+ */
+ private int metaState = NO;
+
+ /**
+ * The current position in recognizing the attribute name "content".
+ */
+ private int contentIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute name "charset".
+ */
+ private int charsetIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute name "http-equive".
+ */
+ private int httpEquivIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute value "content-type".
+ */
+ private int contentTypeIndex = Integer.MAX_VALUE;
+
+ /**
+ * The tokenizer state.
+ */
+ protected int stateSave = DATA;
+
+ /**
+ * The currently filled length of strBuf.
+ */
+ private int strBufLen;
+
+ /**
+ * Accumulation buffer for attribute values.
+ */
+ private @Auto char[] strBuf;
+
+ private String content;
+
+ private String charset;
+
+ private int httpEquivState;
+
+ // CPPONLY: private TreeBuilder treeBuilder;
+
+ public MetaScanner(
+ // CPPONLY: TreeBuilder tb
+ ) {
+ this.readable = null;
+ this.metaState = NO;
+ this.contentIndex = Integer.MAX_VALUE;
+ this.charsetIndex = Integer.MAX_VALUE;
+ this.httpEquivIndex = Integer.MAX_VALUE;
+ this.contentTypeIndex = Integer.MAX_VALUE;
+ this.stateSave = DATA;
+ this.strBufLen = 0;
+ this.strBuf = new char[36];
+ this.content = null;
+ this.charset = null;
+ this.httpEquivState = HTTP_EQUIV_NOT_SEEN;
+ // CPPONLY: this.treeBuilder = tb;
+ }
+
+ @SuppressWarnings("unused") private void destructor() {
+ Portability.releaseString(content);
+ Portability.releaseString(charset);
+ }
+
+ // [NOCPP[
+
+ /**
+ * Reads a byte from the data source.
+ *
+ * -1 means end.
+ * @return
+ * @throws IOException
+ */
+ protected int read() throws IOException {
+ return readable.readByte();
+ }
+
+ // ]NOCPP]
+
+ // WARNING When editing this, makes sure the bytecode length shown by javap
+ // stays under 8000 bytes!
+ /**
+ * The runs the meta scanning algorithm.
+ */
+ protected final void stateLoop(int state)
+ throws SAXException, IOException {
+ int c = -1;
+ boolean reconsume = false;
+ stateloop: for (;;) {
+ switch (state) {
+ case DATA:
+ dataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '<':
+ state = MetaScanner.TAG_OPEN;
+ break dataloop; // FALL THROUGH continue
+ // stateloop;
+ default:
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case TAG_OPEN:
+ tagopenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case 'm':
+ case 'M':
+ metaState = M;
+ state = MetaScanner.TAG_NAME;
+ break tagopenloop;
+ // continue stateloop;
+ case '!':
+ state = MetaScanner.MARKUP_DECLARATION_OPEN;
+ continue stateloop;
+ case '?':
+ case '/':
+ state = MetaScanner.SCAN_UNTIL_GT;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+ metaState = NO;
+ state = MetaScanner.TAG_NAME;
+ break tagopenloop;
+ // continue stateloop;
+ }
+ state = MetaScanner.DATA;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALL THROUGH DON'T REORDER
+ case TAG_NAME:
+ tagnameloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ break tagnameloop;
+ // continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case 'e':
+ case 'E':
+ if (metaState == M) {
+ metaState = E;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ case 't':
+ case 'T':
+ if (metaState == E) {
+ metaState = T;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ case 'a':
+ case 'A':
+ if (metaState == T) {
+ metaState = A;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ default:
+ metaState = NO;
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_NAME:
+ beforeattributenameloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = DATA;
+ continue stateloop;
+ case 'c':
+ case 'C':
+ contentIndex = 0;
+ charsetIndex = 0;
+ httpEquivIndex = Integer.MAX_VALUE;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ case 'h':
+ case 'H':
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ httpEquivIndex = 0;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ default:
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ httpEquivIndex = Integer.MAX_VALUE;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_NAME:
+ attributenameloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.AFTER_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '=':
+ strBufLen = 0;
+ contentTypeIndex = 0;
+ state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
+ break attributenameloop;
+ // continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ if (metaState == A) {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) {
+ ++contentIndex;
+ } else {
+ contentIndex = Integer.MAX_VALUE;
+ }
+ if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) {
+ ++charsetIndex;
+ } else {
+ charsetIndex = Integer.MAX_VALUE;
+ }
+ if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) {
+ ++httpEquivIndex;
+ } else {
+ httpEquivIndex = Integer.MAX_VALUE;
+ }
+ }
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_VALUE:
+ beforeattributevalueloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '"':
+ state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
+ break beforeattributevalueloop;
+ // continue stateloop;
+ case '\'':
+ state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED;
+ continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ attributevaluedoublequotedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '"':
+ handleAttributeValue();
+ state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
+ break attributevaluedoublequotedloop;
+ // continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_ATTRIBUTE_VALUE_QUOTED:
+ afterattributevaluequotedloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ break afterattributevaluequotedloop;
+ // continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case SELF_CLOSING_START_TAG:
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ reconsume = true;
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_UNQUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+
+ case '\u000C':
+ handleAttributeValue();
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '>':
+ handleAttributeValue();
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // XXX reorder point
+ case AFTER_ATTRIBUTE_NAME:
+ for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '/':
+ handleAttributeValue();
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '=':
+ strBufLen = 0;
+ contentTypeIndex = 0;
+ state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
+ continue stateloop;
+ case '>':
+ handleAttributeValue();
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case 'c':
+ case 'C':
+ contentIndex = 0;
+ charsetIndex = 0;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ continue stateloop;
+ default:
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case MARKUP_DECLARATION_OPEN:
+ markupdeclarationopenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.MARKUP_DECLARATION_HYPHEN;
+ break markupdeclarationopenloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.SCAN_UNTIL_GT;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case MARKUP_DECLARATION_HYPHEN:
+ markupdeclarationhyphenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_START;
+ break markupdeclarationhyphenloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.SCAN_UNTIL_GT;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_START:
+ commentstartloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_START_DASH;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ break commentstartloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT:
+ commentloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END_DASH;
+ break commentloop;
+ // continue stateloop;
+ default:
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END_DASH:
+ commentenddashloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END;
+ break commentenddashloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END:
+ for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case '-':
+ continue;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case COMMENT_START_DASH:
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '\'':
+ handleAttributeValue();
+ state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // XXX reorder point
+ case SCAN_UNTIL_GT:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ continue;
+ }
+ }
+ }
+ }
+ stateSave = state;
+ }
+
+ private void handleCharInAttributeValue(int c) {
+ if (metaState == A) {
+ if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
+ addToBuffer(c);
+ } else if (httpEquivIndex == HTTP_EQUIV.length) {
+ if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
+ ++contentTypeIndex;
+ } else {
+ contentTypeIndex = Integer.MAX_VALUE;
+ }
+ }
+ }
+ }
+
+ @Inline private int toAsciiLowerCase(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c + 0x20;
+ }
+ return c;
+ }
+
+ /**
+ * Adds a character to the accumulation buffer.
+ * @param c the character to add
+ */
+ private void addToBuffer(int c) {
+ if (strBufLen == strBuf.length) {
+ char[] newBuf = new char[strBuf.length + (strBuf.length << 1)];
+ System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
+ strBuf = newBuf;
+ }
+ strBuf[strBufLen++] = (char)c;
+ }
+
+ /**
+ * Attempts to extract a charset name from the accumulation buffer.
+ * @return <code>true</code> if successful
+ * @throws SAXException
+ */
+ private void handleAttributeValue() throws SAXException {
+ if (metaState != A) {
+ return;
+ }
+ if (contentIndex == CONTENT.length && content == null) {
+ content = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , treeBuilder
+ );
+ return;
+ }
+ if (charsetIndex == CHARSET.length && charset == null) {
+ charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , treeBuilder
+ );
+ return;
+ }
+ if (httpEquivIndex == HTTP_EQUIV.length
+ && httpEquivState == HTTP_EQUIV_NOT_SEEN) {
+ httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE
+ : HTTP_EQUIV_OTHER;
+ return;
+ }
+ }
+
+ private boolean handleTag() throws SAXException {
+ boolean stop = handleTagInner();
+ Portability.releaseString(content);
+ content = null;
+ Portability.releaseString(charset);
+ charset = null;
+ httpEquivState = HTTP_EQUIV_NOT_SEEN;
+ return stop;
+ }
+
+ private boolean handleTagInner() throws SAXException {
+ if (charset != null && tryCharset(charset)) {
+ return true;
+ }
+ if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
+ String extract = TreeBuilder.extractCharsetFromContent(content
+ // CPPONLY: , treeBuilder
+ );
+ if (extract == null) {
+ return false;
+ }
+ boolean success = tryCharset(extract);
+ Portability.releaseString(extract);
+ return success;
+ }
+ return false;
+ }
+
+ /**
+ * Tries to switch to an encoding.
+ *
+ * @param encoding
+ * @return <code>true</code> if successful
+ * @throws SAXException
+ */
+ protected abstract boolean tryCharset(String encoding) throws SAXException;
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java
new file mode 100644
index 000000000..940cf2e9c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+public final class NCName {
+ // [NOCPP[
+
+ private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
+
+ private static final char[] HEX_TABLE = "0123456789ABCDEF".toCharArray();
+
+ public static boolean isNCNameStart(char c) {
+ return ((c >= '\u0041' && c <= '\u005A')
+ || (c >= '\u0061' && c <= '\u007A')
+ || (c >= '\u00C0' && c <= '\u00D6')
+ || (c >= '\u00D8' && c <= '\u00F6')
+ || (c >= '\u00F8' && c <= '\u00FF')
+ || (c >= '\u0100' && c <= '\u0131')
+ || (c >= '\u0134' && c <= '\u013E')
+ || (c >= '\u0141' && c <= '\u0148')
+ || (c >= '\u014A' && c <= '\u017E')
+ || (c >= '\u0180' && c <= '\u01C3')
+ || (c >= '\u01CD' && c <= '\u01F0')
+ || (c >= '\u01F4' && c <= '\u01F5')
+ || (c >= '\u01FA' && c <= '\u0217')
+ || (c >= '\u0250' && c <= '\u02A8')
+ || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386')
+ || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
+ || (c >= '\u038E' && c <= '\u03A1')
+ || (c >= '\u03A3' && c <= '\u03CE')
+ || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA')
+ || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0')
+ || (c >= '\u03E2' && c <= '\u03F3')
+ || (c >= '\u0401' && c <= '\u040C')
+ || (c >= '\u040E' && c <= '\u044F')
+ || (c >= '\u0451' && c <= '\u045C')
+ || (c >= '\u045E' && c <= '\u0481')
+ || (c >= '\u0490' && c <= '\u04C4')
+ || (c >= '\u04C7' && c <= '\u04C8')
+ || (c >= '\u04CB' && c <= '\u04CC')
+ || (c >= '\u04D0' && c <= '\u04EB')
+ || (c >= '\u04EE' && c <= '\u04F5')
+ || (c >= '\u04F8' && c <= '\u04F9')
+ || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559')
+ || (c >= '\u0561' && c <= '\u0586')
+ || (c >= '\u05D0' && c <= '\u05EA')
+ || (c >= '\u05F0' && c <= '\u05F2')
+ || (c >= '\u0621' && c <= '\u063A')
+ || (c >= '\u0641' && c <= '\u064A')
+ || (c >= '\u0671' && c <= '\u06B7')
+ || (c >= '\u06BA' && c <= '\u06BE')
+ || (c >= '\u06C0' && c <= '\u06CE')
+ || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5')
+ || (c >= '\u06E5' && c <= '\u06E6')
+ || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D')
+ || (c >= '\u0958' && c <= '\u0961')
+ || (c >= '\u0985' && c <= '\u098C')
+ || (c >= '\u098F' && c <= '\u0990')
+ || (c >= '\u0993' && c <= '\u09A8')
+ || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2')
+ || (c >= '\u09B6' && c <= '\u09B9')
+ || (c >= '\u09DC' && c <= '\u09DD')
+ || (c >= '\u09DF' && c <= '\u09E1')
+ || (c >= '\u09F0' && c <= '\u09F1')
+ || (c >= '\u0A05' && c <= '\u0A0A')
+ || (c >= '\u0A0F' && c <= '\u0A10')
+ || (c >= '\u0A13' && c <= '\u0A28')
+ || (c >= '\u0A2A' && c <= '\u0A30')
+ || (c >= '\u0A32' && c <= '\u0A33')
+ || (c >= '\u0A35' && c <= '\u0A36')
+ || (c >= '\u0A38' && c <= '\u0A39')
+ || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E')
+ || (c >= '\u0A72' && c <= '\u0A74')
+ || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D')
+ || (c >= '\u0A8F' && c <= '\u0A91')
+ || (c >= '\u0A93' && c <= '\u0AA8')
+ || (c >= '\u0AAA' && c <= '\u0AB0')
+ || (c >= '\u0AB2' && c <= '\u0AB3')
+ || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD')
+ || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C')
+ || (c >= '\u0B0F' && c <= '\u0B10')
+ || (c >= '\u0B13' && c <= '\u0B28')
+ || (c >= '\u0B2A' && c <= '\u0B30')
+ || (c >= '\u0B32' && c <= '\u0B33')
+ || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D')
+ || (c >= '\u0B5C' && c <= '\u0B5D')
+ || (c >= '\u0B5F' && c <= '\u0B61')
+ || (c >= '\u0B85' && c <= '\u0B8A')
+ || (c >= '\u0B8E' && c <= '\u0B90')
+ || (c >= '\u0B92' && c <= '\u0B95')
+ || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C')
+ || (c >= '\u0B9E' && c <= '\u0B9F')
+ || (c >= '\u0BA3' && c <= '\u0BA4')
+ || (c >= '\u0BA8' && c <= '\u0BAA')
+ || (c >= '\u0BAE' && c <= '\u0BB5')
+ || (c >= '\u0BB7' && c <= '\u0BB9')
+ || (c >= '\u0C05' && c <= '\u0C0C')
+ || (c >= '\u0C0E' && c <= '\u0C10')
+ || (c >= '\u0C12' && c <= '\u0C28')
+ || (c >= '\u0C2A' && c <= '\u0C33')
+ || (c >= '\u0C35' && c <= '\u0C39')
+ || (c >= '\u0C60' && c <= '\u0C61')
+ || (c >= '\u0C85' && c <= '\u0C8C')
+ || (c >= '\u0C8E' && c <= '\u0C90')
+ || (c >= '\u0C92' && c <= '\u0CA8')
+ || (c >= '\u0CAA' && c <= '\u0CB3')
+ || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE')
+ || (c >= '\u0CE0' && c <= '\u0CE1')
+ || (c >= '\u0D05' && c <= '\u0D0C')
+ || (c >= '\u0D0E' && c <= '\u0D10')
+ || (c >= '\u0D12' && c <= '\u0D28')
+ || (c >= '\u0D2A' && c <= '\u0D39')
+ || (c >= '\u0D60' && c <= '\u0D61')
+ || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30')
+ || (c >= '\u0E32' && c <= '\u0E33')
+ || (c >= '\u0E40' && c <= '\u0E45')
+ || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84')
+ || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A')
+ || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97')
+ || (c >= '\u0E99' && c <= '\u0E9F')
+ || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5')
+ || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB')
+ || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0')
+ || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD')
+ || (c >= '\u0EC0' && c <= '\u0EC4')
+ || (c >= '\u0F40' && c <= '\u0F47')
+ || (c >= '\u0F49' && c <= '\u0F69')
+ || (c >= '\u10A0' && c <= '\u10C5')
+ || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100')
+ || (c >= '\u1102' && c <= '\u1103')
+ || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109')
+ || (c >= '\u110B' && c <= '\u110C')
+ || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C')
+ || (c == '\u113E') || (c == '\u1140') || (c == '\u114C')
+ || (c == '\u114E') || (c == '\u1150')
+ || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159')
+ || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163')
+ || (c == '\u1165') || (c == '\u1167') || (c == '\u1169')
+ || (c >= '\u116D' && c <= '\u116E')
+ || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175')
+ || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB')
+ || (c >= '\u11AE' && c <= '\u11AF')
+ || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA')
+ || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB')
+ || (c == '\u11F0') || (c == '\u11F9')
+ || (c >= '\u1E00' && c <= '\u1E9B')
+ || (c >= '\u1EA0' && c <= '\u1EF9')
+ || (c >= '\u1F00' && c <= '\u1F15')
+ || (c >= '\u1F18' && c <= '\u1F1D')
+ || (c >= '\u1F20' && c <= '\u1F45')
+ || (c >= '\u1F48' && c <= '\u1F4D')
+ || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59')
+ || (c == '\u1F5B') || (c == '\u1F5D')
+ || (c >= '\u1F5F' && c <= '\u1F7D')
+ || (c >= '\u1F80' && c <= '\u1FB4')
+ || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE')
+ || (c >= '\u1FC2' && c <= '\u1FC4')
+ || (c >= '\u1FC6' && c <= '\u1FCC')
+ || (c >= '\u1FD0' && c <= '\u1FD3')
+ || (c >= '\u1FD6' && c <= '\u1FDB')
+ || (c >= '\u1FE0' && c <= '\u1FEC')
+ || (c >= '\u1FF2' && c <= '\u1FF4')
+ || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126')
+ || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E')
+ || (c >= '\u2180' && c <= '\u2182')
+ || (c >= '\u3041' && c <= '\u3094')
+ || (c >= '\u30A1' && c <= '\u30FA')
+ || (c >= '\u3105' && c <= '\u312C')
+ || (c >= '\uAC00' && c <= '\uD7A3')
+ || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007')
+ || (c >= '\u3021' && c <= '\u3029') || (c == '_'));
+ }
+
+ public static boolean isNCNameTrail(char c) {
+ return ((c >= '\u0030' && c <= '\u0039')
+ || (c >= '\u0660' && c <= '\u0669')
+ || (c >= '\u06F0' && c <= '\u06F9')
+ || (c >= '\u0966' && c <= '\u096F')
+ || (c >= '\u09E6' && c <= '\u09EF')
+ || (c >= '\u0A66' && c <= '\u0A6F')
+ || (c >= '\u0AE6' && c <= '\u0AEF')
+ || (c >= '\u0B66' && c <= '\u0B6F')
+ || (c >= '\u0BE7' && c <= '\u0BEF')
+ || (c >= '\u0C66' && c <= '\u0C6F')
+ || (c >= '\u0CE6' && c <= '\u0CEF')
+ || (c >= '\u0D66' && c <= '\u0D6F')
+ || (c >= '\u0E50' && c <= '\u0E59')
+ || (c >= '\u0ED0' && c <= '\u0ED9')
+ || (c >= '\u0F20' && c <= '\u0F29')
+ || (c >= '\u0041' && c <= '\u005A')
+ || (c >= '\u0061' && c <= '\u007A')
+ || (c >= '\u00C0' && c <= '\u00D6')
+ || (c >= '\u00D8' && c <= '\u00F6')
+ || (c >= '\u00F8' && c <= '\u00FF')
+ || (c >= '\u0100' && c <= '\u0131')
+ || (c >= '\u0134' && c <= '\u013E')
+ || (c >= '\u0141' && c <= '\u0148')
+ || (c >= '\u014A' && c <= '\u017E')
+ || (c >= '\u0180' && c <= '\u01C3')
+ || (c >= '\u01CD' && c <= '\u01F0')
+ || (c >= '\u01F4' && c <= '\u01F5')
+ || (c >= '\u01FA' && c <= '\u0217')
+ || (c >= '\u0250' && c <= '\u02A8')
+ || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386')
+ || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
+ || (c >= '\u038E' && c <= '\u03A1')
+ || (c >= '\u03A3' && c <= '\u03CE')
+ || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA')
+ || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0')
+ || (c >= '\u03E2' && c <= '\u03F3')
+ || (c >= '\u0401' && c <= '\u040C')
+ || (c >= '\u040E' && c <= '\u044F')
+ || (c >= '\u0451' && c <= '\u045C')
+ || (c >= '\u045E' && c <= '\u0481')
+ || (c >= '\u0490' && c <= '\u04C4')
+ || (c >= '\u04C7' && c <= '\u04C8')
+ || (c >= '\u04CB' && c <= '\u04CC')
+ || (c >= '\u04D0' && c <= '\u04EB')
+ || (c >= '\u04EE' && c <= '\u04F5')
+ || (c >= '\u04F8' && c <= '\u04F9')
+ || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559')
+ || (c >= '\u0561' && c <= '\u0586')
+ || (c >= '\u05D0' && c <= '\u05EA')
+ || (c >= '\u05F0' && c <= '\u05F2')
+ || (c >= '\u0621' && c <= '\u063A')
+ || (c >= '\u0641' && c <= '\u064A')
+ || (c >= '\u0671' && c <= '\u06B7')
+ || (c >= '\u06BA' && c <= '\u06BE')
+ || (c >= '\u06C0' && c <= '\u06CE')
+ || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5')
+ || (c >= '\u06E5' && c <= '\u06E6')
+ || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D')
+ || (c >= '\u0958' && c <= '\u0961')
+ || (c >= '\u0985' && c <= '\u098C')
+ || (c >= '\u098F' && c <= '\u0990')
+ || (c >= '\u0993' && c <= '\u09A8')
+ || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2')
+ || (c >= '\u09B6' && c <= '\u09B9')
+ || (c >= '\u09DC' && c <= '\u09DD')
+ || (c >= '\u09DF' && c <= '\u09E1')
+ || (c >= '\u09F0' && c <= '\u09F1')
+ || (c >= '\u0A05' && c <= '\u0A0A')
+ || (c >= '\u0A0F' && c <= '\u0A10')
+ || (c >= '\u0A13' && c <= '\u0A28')
+ || (c >= '\u0A2A' && c <= '\u0A30')
+ || (c >= '\u0A32' && c <= '\u0A33')
+ || (c >= '\u0A35' && c <= '\u0A36')
+ || (c >= '\u0A38' && c <= '\u0A39')
+ || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E')
+ || (c >= '\u0A72' && c <= '\u0A74')
+ || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D')
+ || (c >= '\u0A8F' && c <= '\u0A91')
+ || (c >= '\u0A93' && c <= '\u0AA8')
+ || (c >= '\u0AAA' && c <= '\u0AB0')
+ || (c >= '\u0AB2' && c <= '\u0AB3')
+ || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD')
+ || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C')
+ || (c >= '\u0B0F' && c <= '\u0B10')
+ || (c >= '\u0B13' && c <= '\u0B28')
+ || (c >= '\u0B2A' && c <= '\u0B30')
+ || (c >= '\u0B32' && c <= '\u0B33')
+ || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D')
+ || (c >= '\u0B5C' && c <= '\u0B5D')
+ || (c >= '\u0B5F' && c <= '\u0B61')
+ || (c >= '\u0B85' && c <= '\u0B8A')
+ || (c >= '\u0B8E' && c <= '\u0B90')
+ || (c >= '\u0B92' && c <= '\u0B95')
+ || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C')
+ || (c >= '\u0B9E' && c <= '\u0B9F')
+ || (c >= '\u0BA3' && c <= '\u0BA4')
+ || (c >= '\u0BA8' && c <= '\u0BAA')
+ || (c >= '\u0BAE' && c <= '\u0BB5')
+ || (c >= '\u0BB7' && c <= '\u0BB9')
+ || (c >= '\u0C05' && c <= '\u0C0C')
+ || (c >= '\u0C0E' && c <= '\u0C10')
+ || (c >= '\u0C12' && c <= '\u0C28')
+ || (c >= '\u0C2A' && c <= '\u0C33')
+ || (c >= '\u0C35' && c <= '\u0C39')
+ || (c >= '\u0C60' && c <= '\u0C61')
+ || (c >= '\u0C85' && c <= '\u0C8C')
+ || (c >= '\u0C8E' && c <= '\u0C90')
+ || (c >= '\u0C92' && c <= '\u0CA8')
+ || (c >= '\u0CAA' && c <= '\u0CB3')
+ || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE')
+ || (c >= '\u0CE0' && c <= '\u0CE1')
+ || (c >= '\u0D05' && c <= '\u0D0C')
+ || (c >= '\u0D0E' && c <= '\u0D10')
+ || (c >= '\u0D12' && c <= '\u0D28')
+ || (c >= '\u0D2A' && c <= '\u0D39')
+ || (c >= '\u0D60' && c <= '\u0D61')
+ || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30')
+ || (c >= '\u0E32' && c <= '\u0E33')
+ || (c >= '\u0E40' && c <= '\u0E45')
+ || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84')
+ || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A')
+ || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97')
+ || (c >= '\u0E99' && c <= '\u0E9F')
+ || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5')
+ || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB')
+ || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0')
+ || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD')
+ || (c >= '\u0EC0' && c <= '\u0EC4')
+ || (c >= '\u0F40' && c <= '\u0F47')
+ || (c >= '\u0F49' && c <= '\u0F69')
+ || (c >= '\u10A0' && c <= '\u10C5')
+ || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100')
+ || (c >= '\u1102' && c <= '\u1103')
+ || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109')
+ || (c >= '\u110B' && c <= '\u110C')
+ || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C')
+ || (c == '\u113E') || (c == '\u1140') || (c == '\u114C')
+ || (c == '\u114E') || (c == '\u1150')
+ || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159')
+ || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163')
+ || (c == '\u1165') || (c == '\u1167') || (c == '\u1169')
+ || (c >= '\u116D' && c <= '\u116E')
+ || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175')
+ || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB')
+ || (c >= '\u11AE' && c <= '\u11AF')
+ || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA')
+ || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB')
+ || (c == '\u11F0') || (c == '\u11F9')
+ || (c >= '\u1E00' && c <= '\u1E9B')
+ || (c >= '\u1EA0' && c <= '\u1EF9')
+ || (c >= '\u1F00' && c <= '\u1F15')
+ || (c >= '\u1F18' && c <= '\u1F1D')
+ || (c >= '\u1F20' && c <= '\u1F45')
+ || (c >= '\u1F48' && c <= '\u1F4D')
+ || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59')
+ || (c == '\u1F5B') || (c == '\u1F5D')
+ || (c >= '\u1F5F' && c <= '\u1F7D')
+ || (c >= '\u1F80' && c <= '\u1FB4')
+ || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE')
+ || (c >= '\u1FC2' && c <= '\u1FC4')
+ || (c >= '\u1FC6' && c <= '\u1FCC')
+ || (c >= '\u1FD0' && c <= '\u1FD3')
+ || (c >= '\u1FD6' && c <= '\u1FDB')
+ || (c >= '\u1FE0' && c <= '\u1FEC')
+ || (c >= '\u1FF2' && c <= '\u1FF4')
+ || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126')
+ || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E')
+ || (c >= '\u2180' && c <= '\u2182')
+ || (c >= '\u3041' && c <= '\u3094')
+ || (c >= '\u30A1' && c <= '\u30FA')
+ || (c >= '\u3105' && c <= '\u312C')
+ || (c >= '\uAC00' && c <= '\uD7A3')
+ || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007')
+ || (c >= '\u3021' && c <= '\u3029') || (c == '_') || (c == '.')
+ || (c == '-') || (c >= '\u0300' && c <= '\u0345')
+ || (c >= '\u0360' && c <= '\u0361')
+ || (c >= '\u0483' && c <= '\u0486')
+ || (c >= '\u0591' && c <= '\u05A1')
+ || (c >= '\u05A3' && c <= '\u05B9')
+ || (c >= '\u05BB' && c <= '\u05BD') || (c == '\u05BF')
+ || (c >= '\u05C1' && c <= '\u05C2') || (c == '\u05C4')
+ || (c >= '\u064B' && c <= '\u0652') || (c == '\u0670')
+ || (c >= '\u06D6' && c <= '\u06DC')
+ || (c >= '\u06DD' && c <= '\u06DF')
+ || (c >= '\u06E0' && c <= '\u06E4')
+ || (c >= '\u06E7' && c <= '\u06E8')
+ || (c >= '\u06EA' && c <= '\u06ED')
+ || (c >= '\u0901' && c <= '\u0903') || (c == '\u093C')
+ || (c >= '\u093E' && c <= '\u094C') || (c == '\u094D')
+ || (c >= '\u0951' && c <= '\u0954')
+ || (c >= '\u0962' && c <= '\u0963')
+ || (c >= '\u0981' && c <= '\u0983') || (c == '\u09BC')
+ || (c == '\u09BE') || (c == '\u09BF')
+ || (c >= '\u09C0' && c <= '\u09C4')
+ || (c >= '\u09C7' && c <= '\u09C8')
+ || (c >= '\u09CB' && c <= '\u09CD') || (c == '\u09D7')
+ || (c >= '\u09E2' && c <= '\u09E3') || (c == '\u0A02')
+ || (c == '\u0A3C') || (c == '\u0A3E') || (c == '\u0A3F')
+ || (c >= '\u0A40' && c <= '\u0A42')
+ || (c >= '\u0A47' && c <= '\u0A48')
+ || (c >= '\u0A4B' && c <= '\u0A4D')
+ || (c >= '\u0A70' && c <= '\u0A71')
+ || (c >= '\u0A81' && c <= '\u0A83') || (c == '\u0ABC')
+ || (c >= '\u0ABE' && c <= '\u0AC5')
+ || (c >= '\u0AC7' && c <= '\u0AC9')
+ || (c >= '\u0ACB' && c <= '\u0ACD')
+ || (c >= '\u0B01' && c <= '\u0B03') || (c == '\u0B3C')
+ || (c >= '\u0B3E' && c <= '\u0B43')
+ || (c >= '\u0B47' && c <= '\u0B48')
+ || (c >= '\u0B4B' && c <= '\u0B4D')
+ || (c >= '\u0B56' && c <= '\u0B57')
+ || (c >= '\u0B82' && c <= '\u0B83')
+ || (c >= '\u0BBE' && c <= '\u0BC2')
+ || (c >= '\u0BC6' && c <= '\u0BC8')
+ || (c >= '\u0BCA' && c <= '\u0BCD') || (c == '\u0BD7')
+ || (c >= '\u0C01' && c <= '\u0C03')
+ || (c >= '\u0C3E' && c <= '\u0C44')
+ || (c >= '\u0C46' && c <= '\u0C48')
+ || (c >= '\u0C4A' && c <= '\u0C4D')
+ || (c >= '\u0C55' && c <= '\u0C56')
+ || (c >= '\u0C82' && c <= '\u0C83')
+ || (c >= '\u0CBE' && c <= '\u0CC4')
+ || (c >= '\u0CC6' && c <= '\u0CC8')
+ || (c >= '\u0CCA' && c <= '\u0CCD')
+ || (c >= '\u0CD5' && c <= '\u0CD6')
+ || (c >= '\u0D02' && c <= '\u0D03')
+ || (c >= '\u0D3E' && c <= '\u0D43')
+ || (c >= '\u0D46' && c <= '\u0D48')
+ || (c >= '\u0D4A' && c <= '\u0D4D') || (c == '\u0D57')
+ || (c == '\u0E31') || (c >= '\u0E34' && c <= '\u0E3A')
+ || (c >= '\u0E47' && c <= '\u0E4E') || (c == '\u0EB1')
+ || (c >= '\u0EB4' && c <= '\u0EB9')
+ || (c >= '\u0EBB' && c <= '\u0EBC')
+ || (c >= '\u0EC8' && c <= '\u0ECD')
+ || (c >= '\u0F18' && c <= '\u0F19') || (c == '\u0F35')
+ || (c == '\u0F37') || (c == '\u0F39') || (c == '\u0F3E')
+ || (c == '\u0F3F') || (c >= '\u0F71' && c <= '\u0F84')
+ || (c >= '\u0F86' && c <= '\u0F8B')
+ || (c >= '\u0F90' && c <= '\u0F95') || (c == '\u0F97')
+ || (c >= '\u0F99' && c <= '\u0FAD')
+ || (c >= '\u0FB1' && c <= '\u0FB7') || (c == '\u0FB9')
+ || (c >= '\u20D0' && c <= '\u20DC') || (c == '\u20E1')
+ || (c >= '\u302A' && c <= '\u302F') || (c == '\u3099')
+ || (c == '\u309A') || (c == '\u00B7') || (c == '\u02D0')
+ || (c == '\u02D1') || (c == '\u0387') || (c == '\u0640')
+ || (c == '\u0E46') || (c == '\u0EC6') || (c == '\u3005')
+ || (c >= '\u3031' && c <= '\u3035')
+ || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE'));
+ }
+
+ public static boolean isNCName(String str) {
+ if (str == null) {
+ return false;
+ } else {
+ int len = str.length();
+ switch (len) {
+ case 0:
+ return false;
+ case 1:
+ return NCName.isNCNameStart(str.charAt(0));
+ default:
+ if (!NCName.isNCNameStart(str.charAt(0))) {
+ return false;
+ }
+ for (int i = 1; i < len; i++) {
+ if (!NCName.isNCNameTrail(str.charAt(i))) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ }
+
+ private static void appendUHexTo(StringBuilder sb, int c) {
+ sb.append('U');
+ for (int i = 0; i < 6; i++) {
+ sb.append(HEX_TABLE[(c & 0xF00000) >> 20]);
+ c <<= 4;
+ }
+ }
+
+ public static String escapeName(String str) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if ((c & 0xFC00) == 0xD800) {
+ char next = str.charAt(++i);
+ appendUHexTo(sb, (c << 10) + next + SURROGATE_OFFSET);
+ } else if (i == 0 && !isNCNameStart(c)) {
+ appendUHexTo(sb, c);
+ } else if (i != 0 && !isNCNameTrail(c)) {
+ appendUHexTo(sb, c);
+ } else {
+ sb.append(c);
+ }
+ }
+ return sb.toString().intern();
+ }
+ // ]NOCPP]
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java
new file mode 100644
index 000000000..266a5a28e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera
+ * Software ASA.
+ *
+ * You are granted a license to use, reproduce and create derivative works of
+ * this document.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.CharacterName;
+import nu.validator.htmlparser.annotation.NoLength;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class NamedCharacters {
+
+ static final @NoLength @CharacterName String[] NAMES = { "lig", "lig;",
+ "P", "P;", "cute", "cute;", "reve;", "irc", "irc;", "y;", "r;",
+ "rave", "rave;", "pha;", "acr;", "d;", "gon;", "pf;",
+ "plyFunction;", "ing", "ing;", "cr;", "sign;", "ilde", "ilde;",
+ "ml", "ml;", "ckslash;", "rv;", "rwed;", "y;", "cause;",
+ "rnoullis;", "ta;", "r;", "pf;", "eve;", "cr;", "mpeq;", "cy;",
+ "PY", "PY;", "cute;", "p;", "pitalDifferentialD;", "yleys;",
+ "aron;", "edil", "edil;", "irc;", "onint;", "ot;", "dilla;",
+ "nterDot;", "r;", "i;", "rcleDot;", "rcleMinus;", "rclePlus;",
+ "rcleTimes;", "ockwiseContourIntegral;", "oseCurlyDoubleQuote;",
+ "oseCurlyQuote;", "lon;", "lone;", "ngruent;", "nint;",
+ "ntourIntegral;", "pf;", "product;",
+ "unterClockwiseContourIntegral;", "oss;", "cr;", "p;", "pCap;",
+ ";", "otrahd;", "cy;", "cy;", "cy;", "gger;", "rr;", "shv;",
+ "aron;", "y;", "l;", "lta;", "r;", "acriticalAcute;",
+ "acriticalDot;", "acriticalDoubleAcute;", "acriticalGrave;",
+ "acriticalTilde;", "amond;", "fferentialD;", "pf;", "t;", "tDot;",
+ "tEqual;", "ubleContourIntegral;", "ubleDot;", "ubleDownArrow;",
+ "ubleLeftArrow;", "ubleLeftRightArrow;", "ubleLeftTee;",
+ "ubleLongLeftArrow;", "ubleLongLeftRightArrow;",
+ "ubleLongRightArrow;", "ubleRightArrow;", "ubleRightTee;",
+ "ubleUpArrow;", "ubleUpDownArrow;", "ubleVerticalBar;", "wnArrow;",
+ "wnArrowBar;", "wnArrowUpArrow;", "wnBreve;", "wnLeftRightVector;",
+ "wnLeftTeeVector;", "wnLeftVector;", "wnLeftVectorBar;",
+ "wnRightTeeVector;", "wnRightVector;", "wnRightVectorBar;",
+ "wnTee;", "wnTeeArrow;", "wnarrow;", "cr;", "trok;", "G;", "H",
+ "H;", "cute", "cute;", "aron;", "irc", "irc;", "y;", "ot;", "r;",
+ "rave", "rave;", "ement;", "acr;", "ptySmallSquare;",
+ "ptyVerySmallSquare;", "gon;", "pf;", "silon;", "ual;",
+ "ualTilde;", "uilibrium;", "cr;", "im;", "a;", "ml", "ml;",
+ "ists;", "ponentialE;", "y;", "r;", "lledSmallSquare;",
+ "lledVerySmallSquare;", "pf;", "rAll;", "uriertrf;", "cr;", "cy;",
+ "", ";", "mma;", "mmad;", "reve;", "edil;", "irc;", "y;", "ot;",
+ "r;", ";", "pf;", "eaterEqual;", "eaterEqualLess;",
+ "eaterFullEqual;", "eaterGreater;", "eaterLess;",
+ "eaterSlantEqual;", "eaterTilde;", "cr;", ";", "RDcy;", "cek;",
+ "t;", "irc;", "r;", "lbertSpace;", "pf;", "rizontalLine;", "cr;",
+ "trok;", "mpDownHump;", "mpEqual;", "cy;", "lig;", "cy;", "cute",
+ "cute;", "irc", "irc;", "y;", "ot;", "r;", "rave", "rave;", ";",
+ "acr;", "aginaryI;", "plies;", "t;", "tegral;", "tersection;",
+ "visibleComma;", "visibleTimes;", "gon;", "pf;", "ta;", "cr;",
+ "ilde;", "kcy;", "ml", "ml;", "irc;", "y;", "r;", "pf;", "cr;",
+ "ercy;", "kcy;", "cy;", "cy;", "ppa;", "edil;", "y;", "r;", "pf;",
+ "cr;", "cy;", "", ";", "cute;", "mbda;", "ng;", "placetrf;", "rr;",
+ "aron;", "edil;", "y;", "ftAngleBracket;", "ftArrow;",
+ "ftArrowBar;", "ftArrowRightArrow;", "ftCeiling;",
+ "ftDoubleBracket;", "ftDownTeeVector;", "ftDownVector;",
+ "ftDownVectorBar;", "ftFloor;", "ftRightArrow;", "ftRightVector;",
+ "ftTee;", "ftTeeArrow;", "ftTeeVector;", "ftTriangle;",
+ "ftTriangleBar;", "ftTriangleEqual;", "ftUpDownVector;",
+ "ftUpTeeVector;", "ftUpVector;", "ftUpVectorBar;", "ftVector;",
+ "ftVectorBar;", "ftarrow;", "ftrightarrow;", "ssEqualGreater;",
+ "ssFullEqual;", "ssGreater;", "ssLess;", "ssSlantEqual;",
+ "ssTilde;", "r;", ";", "eftarrow;", "idot;", "ngLeftArrow;",
+ "ngLeftRightArrow;", "ngRightArrow;", "ngleftarrow;",
+ "ngleftrightarrow;", "ngrightarrow;", "pf;", "werLeftArrow;",
+ "werRightArrow;", "cr;", "h;", "trok;", ";", "p;", "y;",
+ "diumSpace;", "llintrf;", "r;", "nusPlus;", "pf;", "cr;", ";",
+ "cy;", "cute;", "aron;", "edil;", "y;", "gativeMediumSpace;",
+ "gativeThickSpace;", "gativeThinSpace;", "gativeVeryThinSpace;",
+ "stedGreaterGreater;", "stedLessLess;", "wLine;", "r;", "Break;",
+ "nBreakingSpace;", "pf;", "t;", "tCongruent;", "tCupCap;",
+ "tDoubleVerticalBar;", "tElement;", "tEqual;", "tEqualTilde;",
+ "tExists;", "tGreater;", "tGreaterEqual;", "tGreaterFullEqual;",
+ "tGreaterGreater;", "tGreaterLess;", "tGreaterSlantEqual;",
+ "tGreaterTilde;", "tHumpDownHump;", "tHumpEqual;",
+ "tLeftTriangle;", "tLeftTriangleBar;", "tLeftTriangleEqual;",
+ "tLess;", "tLessEqual;", "tLessGreater;", "tLessLess;",
+ "tLessSlantEqual;", "tLessTilde;", "tNestedGreaterGreater;",
+ "tNestedLessLess;", "tPrecedes;", "tPrecedesEqual;",
+ "tPrecedesSlantEqual;", "tReverseElement;", "tRightTriangle;",
+ "tRightTriangleBar;", "tRightTriangleEqual;", "tSquareSubset;",
+ "tSquareSubsetEqual;", "tSquareSuperset;", "tSquareSupersetEqual;",
+ "tSubset;", "tSubsetEqual;", "tSucceeds;", "tSucceedsEqual;",
+ "tSucceedsSlantEqual;", "tSucceedsTilde;", "tSuperset;",
+ "tSupersetEqual;", "tTilde;", "tTildeEqual;", "tTildeFullEqual;",
+ "tTildeTilde;", "tVerticalBar;", "cr;", "ilde", "ilde;", ";",
+ "lig;", "cute", "cute;", "irc", "irc;", "y;", "blac;", "r;",
+ "rave", "rave;", "acr;", "ega;", "icron;", "pf;",
+ "enCurlyDoubleQuote;", "enCurlyQuote;", ";", "cr;", "lash",
+ "lash;", "ilde", "ilde;", "imes;", "ml", "ml;", "erBar;",
+ "erBrace;", "erBracket;", "erParenthesis;", "rtialD;", "y;", "r;",
+ "i;", ";", "usMinus;", "incareplane;", "pf;", ";", "ecedes;",
+ "ecedesEqual;", "ecedesSlantEqual;", "ecedesTilde;", "ime;",
+ "oduct;", "oportion;", "oportional;", "cr;", "i;", "OT", "OT;",
+ "r;", "pf;", "cr;", "arr;", "G", "G;", "cute;", "ng;", "rr;",
+ "rrtl;", "aron;", "edil;", "y;", ";", "verseElement;",
+ "verseEquilibrium;", "verseUpEquilibrium;", "r;", "o;",
+ "ghtAngleBracket;", "ghtArrow;", "ghtArrowBar;",
+ "ghtArrowLeftArrow;", "ghtCeiling;", "ghtDoubleBracket;",
+ "ghtDownTeeVector;", "ghtDownVector;", "ghtDownVectorBar;",
+ "ghtFloor;", "ghtTee;", "ghtTeeArrow;", "ghtTeeVector;",
+ "ghtTriangle;", "ghtTriangleBar;", "ghtTriangleEqual;",
+ "ghtUpDownVector;", "ghtUpTeeVector;", "ghtUpVector;",
+ "ghtUpVectorBar;", "ghtVector;", "ghtVectorBar;", "ghtarrow;",
+ "pf;", "undImplies;", "ightarrow;", "cr;", "h;", "leDelayed;",
+ "CHcy;", "cy;", "FTcy;", "cute;", ";", "aron;", "edil;", "irc;",
+ "y;", "r;", "ortDownArrow;", "ortLeftArrow;", "ortRightArrow;",
+ "ortUpArrow;", "gma;", "allCircle;", "pf;", "rt;", "uare;",
+ "uareIntersection;", "uareSubset;", "uareSubsetEqual;",
+ "uareSuperset;", "uareSupersetEqual;", "uareUnion;", "cr;", "ar;",
+ "b;", "bset;", "bsetEqual;", "cceeds;", "cceedsEqual;",
+ "cceedsSlantEqual;", "cceedsTilde;", "chThat;", "m;", "p;",
+ "perset;", "persetEqual;", "pset;", "ORN", "ORN;", "ADE;", "Hcy;",
+ "cy;", "b;", "u;", "aron;", "edil;", "y;", "r;", "erefore;",
+ "eta;", "ickSpace;", "inSpace;", "lde;", "ldeEqual;",
+ "ldeFullEqual;", "ldeTilde;", "pf;", "ipleDot;", "cr;", "trok;",
+ "cute", "cute;", "rr;", "rrocir;", "rcy;", "reve;", "irc", "irc;",
+ "y;", "blac;", "r;", "rave", "rave;", "acr;", "derBar;",
+ "derBrace;", "derBracket;", "derParenthesis;", "ion;", "ionPlus;",
+ "gon;", "pf;", "Arrow;", "ArrowBar;", "ArrowDownArrow;",
+ "DownArrow;", "Equilibrium;", "Tee;", "TeeArrow;", "arrow;",
+ "downarrow;", "perLeftArrow;", "perRightArrow;", "si;", "silon;",
+ "ing;", "cr;", "ilde;", "ml", "ml;", "ash;", "ar;", "y;", "ash;",
+ "ashl;", "e;", "rbar;", "rt;", "rticalBar;", "rticalLine;",
+ "rticalSeparator;", "rticalTilde;", "ryThinSpace;", "r;", "pf;",
+ "cr;", "dash;", "irc;", "dge;", "r;", "pf;", "cr;", "r;", ";",
+ "pf;", "cr;", "cy;", "cy;", "cy;", "cute", "cute;", "irc;", "y;",
+ "r;", "pf;", "cr;", "ml;", "cy;", "cute;", "aron;", "y;", "ot;",
+ "roWidthSpace;", "ta;", "r;", "pf;", "cr;", "cute", "cute;",
+ "reve;", ";", "E;", "d;", "irc", "irc;", "ute", "ute;", "y;",
+ "lig", "lig;", ";", "r;", "rave", "rave;", "efsym;", "eph;",
+ "pha;", "acr;", "alg;", "p", "p;", "d;", "dand;", "dd;", "dslope;",
+ "dv;", "g;", "ge;", "gle;", "gmsd;", "gmsdaa;", "gmsdab;",
+ "gmsdac;", "gmsdad;", "gmsdae;", "gmsdaf;", "gmsdag;", "gmsdah;",
+ "grt;", "grtvb;", "grtvbd;", "gsph;", "gst;", "gzarr;", "gon;",
+ "pf;", ";", "E;", "acir;", "e;", "id;", "os;", "prox;", "proxeq;",
+ "ing", "ing;", "cr;", "t;", "ymp;", "ympeq;", "ilde", "ilde;",
+ "ml", "ml;", "conint;", "int;", "ot;", "ckcong;", "ckepsilon;",
+ "ckprime;", "cksim;", "cksimeq;", "rvee;", "rwed;", "rwedge;",
+ "rk;", "rktbrk;", "ong;", "y;", "quo;", "caus;", "cause;",
+ "mptyv;", "psi;", "rnou;", "ta;", "th;", "tween;", "r;", "gcap;",
+ "gcirc;", "gcup;", "godot;", "goplus;", "gotimes;", "gsqcup;",
+ "gstar;", "gtriangledown;", "gtriangleup;", "guplus;", "gvee;",
+ "gwedge;", "arow;", "acklozenge;", "acksquare;", "acktriangle;",
+ "acktriangledown;", "acktriangleleft;", "acktriangleright;",
+ "ank;", "k12;", "k14;", "k34;", "ock;", "e;", "equiv;", "ot;",
+ "pf;", "t;", "ttom;", "wtie;", "xDL;", "xDR;", "xDl;", "xDr;",
+ "xH;", "xHD;", "xHU;", "xHd;", "xHu;", "xUL;", "xUR;", "xUl;",
+ "xUr;", "xV;", "xVH;", "xVL;", "xVR;", "xVh;", "xVl;", "xVr;",
+ "xbox;", "xdL;", "xdR;", "xdl;", "xdr;", "xh;", "xhD;", "xhU;",
+ "xhd;", "xhu;", "xminus;", "xplus;", "xtimes;", "xuL;", "xuR;",
+ "xul;", "xur;", "xv;", "xvH;", "xvL;", "xvR;", "xvh;", "xvl;",
+ "xvr;", "rime;", "eve;", "vbar", "vbar;", "cr;", "emi;", "im;",
+ "ime;", "ol;", "olb;", "olhsub;", "ll;", "llet;", "mp;", "mpE;",
+ "mpe;", "mpeq;", "cute;", "p;", "pand;", "pbrcup;", "pcap;",
+ "pcup;", "pdot;", "ps;", "ret;", "ron;", "aps;", "aron;", "edil",
+ "edil;", "irc;", "ups;", "upssm;", "ot;", "dil", "dil;", "mptyv;",
+ "nt", "nt;", "nterdot;", "r;", "cy;", "eck;", "eckmark;", "i;",
+ "r;", "rE;", "rc;", "rceq;", "rclearrowleft;", "rclearrowright;",
+ "rcledR;", "rcledS;", "rcledast;", "rcledcirc;", "rcleddash;",
+ "re;", "rfnint;", "rmid;", "rscir;", "ubs;", "ubsuit;", "lon;",
+ "lone;", "loneq;", "mma;", "mmat;", "mp;", "mpfn;", "mplement;",
+ "mplexes;", "ng;", "ngdot;", "nint;", "pf;", "prod;", "py", "py;",
+ "pysr;", "arr;", "oss;", "cr;", "ub;", "ube;", "up;", "upe;",
+ "dot;", "darrl;", "darrr;", "epr;", "esc;", "larr;", "larrp;",
+ "p;", "pbrcap;", "pcap;", "pcup;", "pdot;", "por;", "ps;", "rarr;",
+ "rarrm;", "rlyeqprec;", "rlyeqsucc;", "rlyvee;", "rlywedge;",
+ "rren", "rren;", "rvearrowleft;", "rvearrowright;", "vee;", "wed;",
+ "conint;", "int;", "lcty;", "rr;", "ar;", "gger;", "leth;", "rr;",
+ "sh;", "shv;", "karow;", "lac;", "aron;", "y;", ";", "agger;",
+ "arr;", "otseq;", "g", "g;", "lta;", "mptyv;", "isht;", "r;",
+ "arl;", "arr;", "am;", "amond;", "amondsuit;", "ams;", "e;",
+ "gamma;", "sin;", "v;", "vide", "vide;", "videontimes;", "vonx;",
+ "cy;", "corn;", "crop;", "llar;", "pf;", "t;", "teq;", "teqdot;",
+ "tminus;", "tplus;", "tsquare;", "ublebarwedge;", "wnarrow;",
+ "wndownarrows;", "wnharpoonleft;", "wnharpoonright;", "bkarow;",
+ "corn;", "crop;", "cr;", "cy;", "ol;", "trok;", "dot;", "ri;",
+ "rif;", "arr;", "har;", "angle;", "cy;", "igrarr;", "Dot;", "ot;",
+ "cute", "cute;", "ster;", "aron;", "ir;", "irc", "irc;", "olon;",
+ "y;", "ot;", ";", "Dot;", "r;", ";", "rave", "rave;", "s;",
+ "sdot;", ";", "inters;", "l;", "s;", "sdot;", "acr;", "pty;",
+ "ptyset;", "ptyv;", "sp13;", "sp14;", "sp;", "g;", "sp;", "gon;",
+ "pf;", "ar;", "arsl;", "lus;", "si;", "silon;", "siv;", "circ;",
+ "colon;", "sim;", "slantgtr;", "slantless;", "uals;", "uest;",
+ "uiv;", "uivDD;", "vparsl;", "Dot;", "arr;", "cr;", "dot;", "im;",
+ "a;", "h", "h;", "ml", "ml;", "ro;", "cl;", "ist;", "pectation;",
+ "ponentiale;", "llingdotseq;", "y;", "male;", "ilig;", "lig;",
+ "llig;", "r;", "lig;", "lig;", "at;", "lig;", "tns;", "of;", "pf;",
+ "rall;", "rk;", "rkv;", "artint;", "ac12", "ac12;", "ac13;",
+ "ac14", "ac14;", "ac15;", "ac16;", "ac18;", "ac23;", "ac25;",
+ "ac34", "ac34;", "ac35;", "ac38;", "ac45;", "ac56;", "ac58;",
+ "ac78;", "asl;", "own;", "cr;", ";", "l;", "cute;", "mma;",
+ "mmad;", "p;", "reve;", "irc;", "y;", "ot;", ";", "l;", "q;",
+ "qq;", "qslant;", "s;", "scc;", "sdot;", "sdoto;", "sdotol;",
+ "sl;", "sles;", "r;", ";", "g;", "mel;", "cy;", ";", "E;", "a;",
+ "j;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;", "pf;",
+ "ave;", "cr;", "im;", "ime;", "iml;", "", ";", "cc;", "cir;",
+ "dot;", "lPar;", "quest;", "rapprox;", "rarr;", "rdot;",
+ "reqless;", "reqqless;", "rless;", "rsim;", "ertneqq;", "nE;",
+ "rr;", "irsp;", "lf;", "milt;", "rdcy;", "rr;", "rrcir;", "rrw;",
+ "ar;", "irc;", "arts;", "artsuit;", "llip;", "rcon;", "r;",
+ "searow;", "swarow;", "arr;", "mtht;", "okleftarrow;",
+ "okrightarrow;", "pf;", "rbar;", "cr;", "lash;", "trok;", "bull;",
+ "phen;", "cute", "cute;", ";", "irc", "irc;", "y;", "cy;", "xcl",
+ "xcl;", "f;", "r;", "rave", "rave;", ";", "iint;", "int;", "nfin;",
+ "ota;", "lig;", "acr;", "age;", "agline;", "agpart;", "ath;",
+ "of;", "ped;", ";", "care;", "fin;", "fintie;", "odot;", "t;",
+ "tcal;", "tegers;", "tercal;", "tlarhk;", "tprod;", "cy;", "gon;",
+ "pf;", "ta;", "rod;", "uest", "uest;", "cr;", "in;", "inE;",
+ "indot;", "ins;", "insv;", "inv;", ";", "ilde;", "kcy;", "ml",
+ "ml;", "irc;", "y;", "r;", "ath;", "pf;", "cr;", "ercy;", "kcy;",
+ "ppa;", "ppav;", "edil;", "y;", "r;", "reen;", "cy;", "cy;", "pf;",
+ "cr;", "arr;", "rr;", "tail;", "arr;", ";", "g;", "ar;", "cute;",
+ "emptyv;", "gran;", "mbda;", "ng;", "ngd;", "ngle;", "p;", "quo",
+ "quo;", "rr;", "rrb;", "rrbfs;", "rrfs;", "rrhk;", "rrlp;",
+ "rrpl;", "rrsim;", "rrtl;", "t;", "tail;", "te;", "tes;", "arr;",
+ "brk;", "race;", "rack;", "rke;", "rksld;", "rkslu;", "aron;",
+ "edil;", "eil;", "ub;", "y;", "ca;", "quo;", "quor;", "rdhar;",
+ "rushar;", "sh;", ";", "ftarrow;", "ftarrowtail;",
+ "ftharpoondown;", "ftharpoonup;", "ftleftarrows;", "ftrightarrow;",
+ "ftrightarrows;", "ftrightharpoons;", "ftrightsquigarrow;",
+ "ftthreetimes;", "g;", "q;", "qq;", "qslant;", "s;", "scc;",
+ "sdot;", "sdoto;", "sdotor;", "sg;", "sges;", "ssapprox;",
+ "ssdot;", "sseqgtr;", "sseqqgtr;", "ssgtr;", "sssim;", "isht;",
+ "loor;", "r;", ";", "E;", "ard;", "aru;", "arul;", "blk;", "cy;",
+ ";", "arr;", "corner;", "hard;", "tri;", "idot;", "oust;",
+ "oustache;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;",
+ "ang;", "arr;", "brk;", "ngleftarrow;", "ngleftrightarrow;",
+ "ngmapsto;", "ngrightarrow;", "oparrowleft;", "oparrowright;",
+ "par;", "pf;", "plus;", "times;", "wast;", "wbar;", "z;", "zenge;",
+ "zf;", "ar;", "arlt;", "arr;", "corner;", "har;", "hard;", "m;",
+ "tri;", "aquo;", "cr;", "h;", "im;", "ime;", "img;", "qb;", "quo;",
+ "quor;", "trok;", "", ";", "cc;", "cir;", "dot;", "hree;", "imes;",
+ "larr;", "quest;", "rPar;", "ri;", "rie;", "rif;", "rdshar;",
+ "ruhar;", "ertneqq;", "nE;", "Dot;", "cr", "cr;", "le;", "lt;",
+ "ltese;", "p;", "psto;", "pstodown;", "pstoleft;", "pstoup;",
+ "rker;", "omma;", "y;", "ash;", "asuredangle;", "r;", "o;", "cro",
+ "cro;", "d;", "dast;", "dcir;", "ddot", "ddot;", "nus;", "nusb;",
+ "nusd;", "nusdu;", "cp;", "dr;", "plus;", "dels;", "pf;", ";",
+ "cr;", "tpos;", ";", "ltimap;", "map;", "g;", "t;", "tv;",
+ "eftarrow;", "eftrightarrow;", "l;", "t;", "tv;", "ightarrow;",
+ "Dash;", "dash;", "bla;", "cute;", "ng;", "p;", "pE;", "pid;",
+ "pos;", "pprox;", "tur;", "tural;", "turals;", "sp", "sp;", "ump;",
+ "umpe;", "ap;", "aron;", "edil;", "ong;", "ongdot;", "up;", "y;",
+ "ash;", ";", "Arr;", "arhk;", "arr;", "arrow;", "dot;", "quiv;",
+ "sear;", "sim;", "xist;", "xists;", "r;", "E;", "e;", "eq;",
+ "eqq;", "eqslant;", "es;", "sim;", "t;", "tr;", "Arr;", "arr;",
+ "par;", ";", "s;", "sd;", "v;", "cy;", "Arr;", "E;", "arr;", "dr;",
+ "e;", "eftarrow;", "eftrightarrow;", "eq;", "eqq;", "eqslant;",
+ "es;", "ess;", "sim;", "t;", "tri;", "trie;", "id;", "pf;", "t",
+ "t;", "tin;", "tinE;", "tindot;", "tinva;", "tinvb;", "tinvc;",
+ "tni;", "tniva;", "tnivb;", "tnivc;", "ar;", "arallel;", "arsl;",
+ "art;", "olint;", "r;", "rcue;", "re;", "rec;", "receq;", "Arr;",
+ "arr;", "arrc;", "arrw;", "ightarrow;", "tri;", "trie;", "c;",
+ "ccue;", "ce;", "cr;", "hortmid;", "hortparallel;", "im;", "ime;",
+ "imeq;", "mid;", "par;", "qsube;", "qsupe;", "ub;", "ubE;", "ube;",
+ "ubset;", "ubseteq;", "ubseteqq;", "ucc;", "ucceq;", "up;", "upE;",
+ "upe;", "upset;", "upseteq;", "upseteqq;", "gl;", "ilde", "ilde;",
+ "lg;", "riangleleft;", "rianglelefteq;", "riangleright;",
+ "rianglerighteq;", ";", "m;", "mero;", "msp;", "Dash;", "Harr;",
+ "ap;", "dash;", "ge;", "gt;", "infin;", "lArr;", "le;", "lt;",
+ "ltrie;", "rArr;", "rtrie;", "sim;", "Arr;", "arhk;", "arr;",
+ "arrow;", "near;", ";", "cute", "cute;", "st;", "ir;", "irc",
+ "irc;", "y;", "ash;", "blac;", "iv;", "ot;", "sold;", "lig;",
+ "cir;", "r;", "on;", "rave", "rave;", "t;", "bar;", "m;", "nt;",
+ "arr;", "cir;", "cross;", "ine;", "t;", "acr;", "ega;", "icron;",
+ "id;", "inus;", "pf;", "ar;", "erp;", "lus;", ";", "arr;", "d;",
+ "der;", "derof;", "df", "df;", "dm", "dm;", "igof;", "or;",
+ "slope;", "v;", "cr;", "lash", "lash;", "ol;", "ilde", "ilde;",
+ "imes;", "imesas;", "ml", "ml;", "bar;", "r;", "ra", "ra;",
+ "rallel;", "rsim;", "rsl;", "rt;", "y;", "rcnt;", "riod;", "rmil;",
+ "rp;", "rtenk;", "r;", "i;", "iv;", "mmat;", "one;", ";",
+ "tchfork;", "v;", "anck;", "anckh;", "ankv;", "us;", "usacir;",
+ "usb;", "uscir;", "usdo;", "usdu;", "use;", "usmn", "usmn;",
+ "ussim;", "ustwo;", ";", "intint;", "pf;", "und", "und;", ";",
+ "E;", "ap;", "cue;", "e;", "ec;", "ecapprox;", "eccurlyeq;",
+ "eceq;", "ecnapprox;", "ecneqq;", "ecnsim;", "ecsim;", "ime;",
+ "imes;", "nE;", "nap;", "nsim;", "od;", "ofalar;", "ofline;",
+ "ofsurf;", "op;", "opto;", "sim;", "urel;", "cr;", "i;", "ncsp;",
+ "r;", "nt;", "pf;", "rime;", "cr;", "aternions;", "atint;", "est;",
+ "esteq;", "ot", "ot;", "arr;", "rr;", "tail;", "arr;", "ar;",
+ "ce;", "cute;", "dic;", "emptyv;", "ng;", "ngd;", "nge;", "ngle;",
+ "quo", "quo;", "rr;", "rrap;", "rrb;", "rrbfs;", "rrc;", "rrfs;",
+ "rrhk;", "rrlp;", "rrpl;", "rrsim;", "rrtl;", "rrw;", "tail;",
+ "tio;", "tionals;", "arr;", "brk;", "race;", "rack;", "rke;",
+ "rksld;", "rkslu;", "aron;", "edil;", "eil;", "ub;", "y;", "ca;",
+ "ldhar;", "quo;", "quor;", "sh;", "al;", "aline;", "alpart;",
+ "als;", "ct;", "g", "g;", "isht;", "loor;", "r;", "ard;", "aru;",
+ "arul;", "o;", "ov;", "ghtarrow;", "ghtarrowtail;",
+ "ghtharpoondown;", "ghtharpoonup;", "ghtleftarrows;",
+ "ghtleftharpoons;", "ghtrightarrows;", "ghtsquigarrow;",
+ "ghtthreetimes;", "ng;", "singdotseq;", "arr;", "har;", "m;",
+ "oust;", "oustache;", "mid;", "ang;", "arr;", "brk;", "par;",
+ "pf;", "plus;", "times;", "ar;", "argt;", "polint;", "arr;",
+ "aquo;", "cr;", "h;", "qb;", "quo;", "quor;", "hree;", "imes;",
+ "ri;", "rie;", "rif;", "riltri;", "luhar;", ";", "cute;", "quo;",
+ ";", "E;", "ap;", "aron;", "cue;", "e;", "edil;", "irc;", "nE;",
+ "nap;", "nsim;", "polint;", "sim;", "y;", "ot;", "otb;", "ote;",
+ "Arr;", "arhk;", "arr;", "arrow;", "ct", "ct;", "mi;", "swar;",
+ "tminus;", "tmn;", "xt;", "r;", "rown;", "arp;", "chcy;", "cy;",
+ "ortmid;", "ortparallel;", "y", "y;", "gma;", "gmaf;", "gmav;",
+ "m;", "mdot;", "me;", "meq;", "mg;", "mgE;", "ml;", "mlE;", "mne;",
+ "mplus;", "mrarr;", "arr;", "allsetminus;", "ashp;", "eparsl;",
+ "id;", "ile;", "t;", "te;", "tes;", "ftcy;", "l;", "lb;", "lbar;",
+ "pf;", "ades;", "adesuit;", "ar;", "cap;", "caps;", "cup;",
+ "cups;", "sub;", "sube;", "subset;", "subseteq;", "sup;", "supe;",
+ "supset;", "supseteq;", "u;", "uare;", "uarf;", "uf;", "arr;",
+ "cr;", "etmn;", "mile;", "tarf;", "ar;", "arf;", "raightepsilon;",
+ "raightphi;", "rns;", "b;", "bE;", "bdot;", "be;", "bedot;",
+ "bmult;", "bnE;", "bne;", "bplus;", "brarr;", "bset;", "bseteq;",
+ "bseteqq;", "bsetneq;", "bsetneqq;", "bsim;", "bsub;", "bsup;",
+ "cc;", "ccapprox;", "cccurlyeq;", "cceq;", "ccnapprox;", "ccneqq;",
+ "ccnsim;", "ccsim;", "m;", "ng;", "p1", "p1;", "p2", "p2;", "p3",
+ "p3;", "p;", "pE;", "pdot;", "pdsub;", "pe;", "pedot;", "phsol;",
+ "phsub;", "plarr;", "pmult;", "pnE;", "pne;", "pplus;", "pset;",
+ "pseteq;", "pseteqq;", "psetneq;", "psetneqq;", "psim;", "psub;",
+ "psup;", "Arr;", "arhk;", "arr;", "arrow;", "nwar;", "lig", "lig;",
+ "rget;", "u;", "rk;", "aron;", "edil;", "y;", "ot;", "lrec;", "r;",
+ "ere4;", "erefore;", "eta;", "etasym;", "etav;", "ickapprox;",
+ "icksim;", "insp;", "kap;", "ksim;", "orn", "orn;", "lde;", "mes",
+ "mes;", "mesb;", "mesbar;", "mesd;", "nt;", "ea;", "p;", "pbot;",
+ "pcir;", "pf;", "pfork;", "sa;", "rime;", "ade;", "iangle;",
+ "iangledown;", "iangleleft;", "ianglelefteq;", "iangleq;",
+ "iangleright;", "ianglerighteq;", "idot;", "ie;", "iminus;",
+ "iplus;", "isb;", "itime;", "pezium;", "cr;", "cy;", "hcy;",
+ "trok;", "ixt;", "oheadleftarrow;", "oheadrightarrow;", "rr;",
+ "ar;", "cute", "cute;", "rr;", "rcy;", "reve;", "irc", "irc;",
+ "y;", "arr;", "blac;", "har;", "isht;", "r;", "rave", "rave;",
+ "arl;", "arr;", "blk;", "corn;", "corner;", "crop;", "tri;",
+ "acr;", "l", "l;", "gon;", "pf;", "arrow;", "downarrow;",
+ "harpoonleft;", "harpoonright;", "lus;", "si;", "sih;", "silon;",
+ "uparrows;", "corn;", "corner;", "crop;", "ing;", "tri;", "cr;",
+ "dot;", "ilde;", "ri;", "rif;", "arr;", "ml", "ml;", "angle;",
+ "rr;", "ar;", "arv;", "ash;", "ngrt;", "repsilon;", "rkappa;",
+ "rnothing;", "rphi;", "rpi;", "rpropto;", "rr;", "rrho;",
+ "rsigma;", "rsubsetneq;", "rsubsetneqq;", "rsupsetneq;",
+ "rsupsetneqq;", "rtheta;", "rtriangleleft;", "rtriangleright;",
+ "y;", "ash;", "e;", "ebar;", "eeq;", "llip;", "rbar;", "rt;", "r;",
+ "tri;", "sub;", "sup;", "pf;", "rop;", "tri;", "cr;", "ubnE;",
+ "ubne;", "upnE;", "upne;", "igzag;", "irc;", "dbar;", "dge;",
+ "dgeq;", "ierp;", "r;", "pf;", ";", ";", "eath;", "cr;", "ap;",
+ "irc;", "up;", "tri;", "r;", "Arr;", "arr;", ";", "Arr;", "arr;",
+ "ap;", "is;", "dot;", "pf;", "plus;", "time;", "Arr;", "arr;",
+ "cr;", "qcup;", "plus;", "tri;", "ee;", "edge;", "cute", "cute;",
+ "cy;", "irc;", "y;", "n", "n;", "r;", "cy;", "pf;", "cr;", "cy;",
+ "ml", "ml;", "cute;", "aron;", "y;", "ot;", "etrf;", "ta;", "r;",
+ "cy;", "grarr;", "pf;", "cr;", "j;", "nj;", };
+
+ static final @NoLength char[][] VALUES = { { '\u00c6' }, { '\u00c6' },
+ { '\u0026' }, { '\u0026' }, { '\u00c1' }, { '\u00c1' },
+ { '\u0102' }, { '\u00c2' }, { '\u00c2' }, { '\u0410' },
+ { '\ud835', '\udd04' }, { '\u00c0' }, { '\u00c0' }, { '\u0391' },
+ { '\u0100' }, { '\u2a53' }, { '\u0104' }, { '\ud835', '\udd38' },
+ { '\u2061' }, { '\u00c5' }, { '\u00c5' }, { '\ud835', '\udc9c' },
+ { '\u2254' }, { '\u00c3' }, { '\u00c3' }, { '\u00c4' },
+ { '\u00c4' }, { '\u2216' }, { '\u2ae7' }, { '\u2306' },
+ { '\u0411' }, { '\u2235' }, { '\u212c' }, { '\u0392' },
+ { '\ud835', '\udd05' }, { '\ud835', '\udd39' }, { '\u02d8' },
+ { '\u212c' }, { '\u224e' }, { '\u0427' }, { '\u00a9' },
+ { '\u00a9' }, { '\u0106' }, { '\u22d2' }, { '\u2145' },
+ { '\u212d' }, { '\u010c' }, { '\u00c7' }, { '\u00c7' },
+ { '\u0108' }, { '\u2230' }, { '\u010a' }, { '\u00b8' },
+ { '\u00b7' }, { '\u212d' }, { '\u03a7' }, { '\u2299' },
+ { '\u2296' }, { '\u2295' }, { '\u2297' }, { '\u2232' },
+ { '\u201d' }, { '\u2019' }, { '\u2237' }, { '\u2a74' },
+ { '\u2261' }, { '\u222f' }, { '\u222e' }, { '\u2102' },
+ { '\u2210' }, { '\u2233' }, { '\u2a2f' }, { '\ud835', '\udc9e' },
+ { '\u22d3' }, { '\u224d' }, { '\u2145' }, { '\u2911' },
+ { '\u0402' }, { '\u0405' }, { '\u040f' }, { '\u2021' },
+ { '\u21a1' }, { '\u2ae4' }, { '\u010e' }, { '\u0414' },
+ { '\u2207' }, { '\u0394' }, { '\ud835', '\udd07' }, { '\u00b4' },
+ { '\u02d9' }, { '\u02dd' }, { '\u0060' }, { '\u02dc' },
+ { '\u22c4' }, { '\u2146' }, { '\ud835', '\udd3b' }, { '\u00a8' },
+ { '\u20dc' }, { '\u2250' }, { '\u222f' }, { '\u00a8' },
+ { '\u21d3' }, { '\u21d0' }, { '\u21d4' }, { '\u2ae4' },
+ { '\u27f8' }, { '\u27fa' }, { '\u27f9' }, { '\u21d2' },
+ { '\u22a8' }, { '\u21d1' }, { '\u21d5' }, { '\u2225' },
+ { '\u2193' }, { '\u2913' }, { '\u21f5' }, { '\u0311' },
+ { '\u2950' }, { '\u295e' }, { '\u21bd' }, { '\u2956' },
+ { '\u295f' }, { '\u21c1' }, { '\u2957' }, { '\u22a4' },
+ { '\u21a7' }, { '\u21d3' }, { '\ud835', '\udc9f' }, { '\u0110' },
+ { '\u014a' }, { '\u00d0' }, { '\u00d0' }, { '\u00c9' },
+ { '\u00c9' }, { '\u011a' }, { '\u00ca' }, { '\u00ca' },
+ { '\u042d' }, { '\u0116' }, { '\ud835', '\udd08' }, { '\u00c8' },
+ { '\u00c8' }, { '\u2208' }, { '\u0112' }, { '\u25fb' },
+ { '\u25ab' }, { '\u0118' }, { '\ud835', '\udd3c' }, { '\u0395' },
+ { '\u2a75' }, { '\u2242' }, { '\u21cc' }, { '\u2130' },
+ { '\u2a73' }, { '\u0397' }, { '\u00cb' }, { '\u00cb' },
+ { '\u2203' }, { '\u2147' }, { '\u0424' }, { '\ud835', '\udd09' },
+ { '\u25fc' }, { '\u25aa' }, { '\ud835', '\udd3d' }, { '\u2200' },
+ { '\u2131' }, { '\u2131' }, { '\u0403' }, { '\u003e' },
+ { '\u003e' }, { '\u0393' }, { '\u03dc' }, { '\u011e' },
+ { '\u0122' }, { '\u011c' }, { '\u0413' }, { '\u0120' },
+ { '\ud835', '\udd0a' }, { '\u22d9' }, { '\ud835', '\udd3e' },
+ { '\u2265' }, { '\u22db' }, { '\u2267' }, { '\u2aa2' },
+ { '\u2277' }, { '\u2a7e' }, { '\u2273' }, { '\ud835', '\udca2' },
+ { '\u226b' }, { '\u042a' }, { '\u02c7' }, { '\u005e' },
+ { '\u0124' }, { '\u210c' }, { '\u210b' }, { '\u210d' },
+ { '\u2500' }, { '\u210b' }, { '\u0126' }, { '\u224e' },
+ { '\u224f' }, { '\u0415' }, { '\u0132' }, { '\u0401' },
+ { '\u00cd' }, { '\u00cd' }, { '\u00ce' }, { '\u00ce' },
+ { '\u0418' }, { '\u0130' }, { '\u2111' }, { '\u00cc' },
+ { '\u00cc' }, { '\u2111' }, { '\u012a' }, { '\u2148' },
+ { '\u21d2' }, { '\u222c' }, { '\u222b' }, { '\u22c2' },
+ { '\u2063' }, { '\u2062' }, { '\u012e' }, { '\ud835', '\udd40' },
+ { '\u0399' }, { '\u2110' }, { '\u0128' }, { '\u0406' },
+ { '\u00cf' }, { '\u00cf' }, { '\u0134' }, { '\u0419' },
+ { '\ud835', '\udd0d' }, { '\ud835', '\udd41' },
+ { '\ud835', '\udca5' }, { '\u0408' }, { '\u0404' }, { '\u0425' },
+ { '\u040c' }, { '\u039a' }, { '\u0136' }, { '\u041a' },
+ { '\ud835', '\udd0e' }, { '\ud835', '\udd42' },
+ { '\ud835', '\udca6' }, { '\u0409' }, { '\u003c' }, { '\u003c' },
+ { '\u0139' }, { '\u039b' }, { '\u27ea' }, { '\u2112' },
+ { '\u219e' }, { '\u013d' }, { '\u013b' }, { '\u041b' },
+ { '\u27e8' }, { '\u2190' }, { '\u21e4' }, { '\u21c6' },
+ { '\u2308' }, { '\u27e6' }, { '\u2961' }, { '\u21c3' },
+ { '\u2959' }, { '\u230a' }, { '\u2194' }, { '\u294e' },
+ { '\u22a3' }, { '\u21a4' }, { '\u295a' }, { '\u22b2' },
+ { '\u29cf' }, { '\u22b4' }, { '\u2951' }, { '\u2960' },
+ { '\u21bf' }, { '\u2958' }, { '\u21bc' }, { '\u2952' },
+ { '\u21d0' }, { '\u21d4' }, { '\u22da' }, { '\u2266' },
+ { '\u2276' }, { '\u2aa1' }, { '\u2a7d' }, { '\u2272' },
+ { '\ud835', '\udd0f' }, { '\u22d8' }, { '\u21da' }, { '\u013f' },
+ { '\u27f5' }, { '\u27f7' }, { '\u27f6' }, { '\u27f8' },
+ { '\u27fa' }, { '\u27f9' }, { '\ud835', '\udd43' }, { '\u2199' },
+ { '\u2198' }, { '\u2112' }, { '\u21b0' }, { '\u0141' },
+ { '\u226a' }, { '\u2905' }, { '\u041c' }, { '\u205f' },
+ { '\u2133' }, { '\ud835', '\udd10' }, { '\u2213' },
+ { '\ud835', '\udd44' }, { '\u2133' }, { '\u039c' }, { '\u040a' },
+ { '\u0143' }, { '\u0147' }, { '\u0145' }, { '\u041d' },
+ { '\u200b' }, { '\u200b' }, { '\u200b' }, { '\u200b' },
+ { '\u226b' }, { '\u226a' }, { '\n' }, { '\ud835', '\udd11' },
+ { '\u2060' }, { '\u00a0' }, { '\u2115' }, { '\u2aec' },
+ { '\u2262' }, { '\u226d' }, { '\u2226' }, { '\u2209' },
+ { '\u2260' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u226f' },
+ { '\u2271' }, { '\u2267', '\u0338' }, { '\u226b', '\u0338' },
+ { '\u2279' }, { '\u2a7e', '\u0338' }, { '\u2275' },
+ { '\u224e', '\u0338' }, { '\u224f', '\u0338' }, { '\u22ea' },
+ { '\u29cf', '\u0338' }, { '\u22ec' }, { '\u226e' }, { '\u2270' },
+ { '\u2278' }, { '\u226a', '\u0338' }, { '\u2a7d', '\u0338' },
+ { '\u2274' }, { '\u2aa2', '\u0338' }, { '\u2aa1', '\u0338' },
+ { '\u2280' }, { '\u2aaf', '\u0338' }, { '\u22e0' }, { '\u220c' },
+ { '\u22eb' }, { '\u29d0', '\u0338' }, { '\u22ed' },
+ { '\u228f', '\u0338' }, { '\u22e2' }, { '\u2290', '\u0338' },
+ { '\u22e3' }, { '\u2282', '\u20d2' }, { '\u2288' }, { '\u2281' },
+ { '\u2ab0', '\u0338' }, { '\u22e1' }, { '\u227f', '\u0338' },
+ { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2241' }, { '\u2244' },
+ { '\u2247' }, { '\u2249' }, { '\u2224' }, { '\ud835', '\udca9' },
+ { '\u00d1' }, { '\u00d1' }, { '\u039d' }, { '\u0152' },
+ { '\u00d3' }, { '\u00d3' }, { '\u00d4' }, { '\u00d4' },
+ { '\u041e' }, { '\u0150' }, { '\ud835', '\udd12' }, { '\u00d2' },
+ { '\u00d2' }, { '\u014c' }, { '\u03a9' }, { '\u039f' },
+ { '\ud835', '\udd46' }, { '\u201c' }, { '\u2018' }, { '\u2a54' },
+ { '\ud835', '\udcaa' }, { '\u00d8' }, { '\u00d8' }, { '\u00d5' },
+ { '\u00d5' }, { '\u2a37' }, { '\u00d6' }, { '\u00d6' },
+ { '\u203e' }, { '\u23de' }, { '\u23b4' }, { '\u23dc' },
+ { '\u2202' }, { '\u041f' }, { '\ud835', '\udd13' }, { '\u03a6' },
+ { '\u03a0' }, { '\u00b1' }, { '\u210c' }, { '\u2119' },
+ { '\u2abb' }, { '\u227a' }, { '\u2aaf' }, { '\u227c' },
+ { '\u227e' }, { '\u2033' }, { '\u220f' }, { '\u2237' },
+ { '\u221d' }, { '\ud835', '\udcab' }, { '\u03a8' }, { '\u0022' },
+ { '\u0022' }, { '\ud835', '\udd14' }, { '\u211a' },
+ { '\ud835', '\udcac' }, { '\u2910' }, { '\u00ae' }, { '\u00ae' },
+ { '\u0154' }, { '\u27eb' }, { '\u21a0' }, { '\u2916' },
+ { '\u0158' }, { '\u0156' }, { '\u0420' }, { '\u211c' },
+ { '\u220b' }, { '\u21cb' }, { '\u296f' }, { '\u211c' },
+ { '\u03a1' }, { '\u27e9' }, { '\u2192' }, { '\u21e5' },
+ { '\u21c4' }, { '\u2309' }, { '\u27e7' }, { '\u295d' },
+ { '\u21c2' }, { '\u2955' }, { '\u230b' }, { '\u22a2' },
+ { '\u21a6' }, { '\u295b' }, { '\u22b3' }, { '\u29d0' },
+ { '\u22b5' }, { '\u294f' }, { '\u295c' }, { '\u21be' },
+ { '\u2954' }, { '\u21c0' }, { '\u2953' }, { '\u21d2' },
+ { '\u211d' }, { '\u2970' }, { '\u21db' }, { '\u211b' },
+ { '\u21b1' }, { '\u29f4' }, { '\u0429' }, { '\u0428' },
+ { '\u042c' }, { '\u015a' }, { '\u2abc' }, { '\u0160' },
+ { '\u015e' }, { '\u015c' }, { '\u0421' }, { '\ud835', '\udd16' },
+ { '\u2193' }, { '\u2190' }, { '\u2192' }, { '\u2191' },
+ { '\u03a3' }, { '\u2218' }, { '\ud835', '\udd4a' }, { '\u221a' },
+ { '\u25a1' }, { '\u2293' }, { '\u228f' }, { '\u2291' },
+ { '\u2290' }, { '\u2292' }, { '\u2294' }, { '\ud835', '\udcae' },
+ { '\u22c6' }, { '\u22d0' }, { '\u22d0' }, { '\u2286' },
+ { '\u227b' }, { '\u2ab0' }, { '\u227d' }, { '\u227f' },
+ { '\u220b' }, { '\u2211' }, { '\u22d1' }, { '\u2283' },
+ { '\u2287' }, { '\u22d1' }, { '\u00de' }, { '\u00de' },
+ { '\u2122' }, { '\u040b' }, { '\u0426' }, { '\u0009' },
+ { '\u03a4' }, { '\u0164' }, { '\u0162' }, { '\u0422' },
+ { '\ud835', '\udd17' }, { '\u2234' }, { '\u0398' },
+ { '\u205f', '\u200a' }, { '\u2009' }, { '\u223c' }, { '\u2243' },
+ { '\u2245' }, { '\u2248' }, { '\ud835', '\udd4b' }, { '\u20db' },
+ { '\ud835', '\udcaf' }, { '\u0166' }, { '\u00da' }, { '\u00da' },
+ { '\u219f' }, { '\u2949' }, { '\u040e' }, { '\u016c' },
+ { '\u00db' }, { '\u00db' }, { '\u0423' }, { '\u0170' },
+ { '\ud835', '\udd18' }, { '\u00d9' }, { '\u00d9' }, { '\u016a' },
+ { '\u005f' }, { '\u23df' }, { '\u23b5' }, { '\u23dd' },
+ { '\u22c3' }, { '\u228e' }, { '\u0172' }, { '\ud835', '\udd4c' },
+ { '\u2191' }, { '\u2912' }, { '\u21c5' }, { '\u2195' },
+ { '\u296e' }, { '\u22a5' }, { '\u21a5' }, { '\u21d1' },
+ { '\u21d5' }, { '\u2196' }, { '\u2197' }, { '\u03d2' },
+ { '\u03a5' }, { '\u016e' }, { '\ud835', '\udcb0' }, { '\u0168' },
+ { '\u00dc' }, { '\u00dc' }, { '\u22ab' }, { '\u2aeb' },
+ { '\u0412' }, { '\u22a9' }, { '\u2ae6' }, { '\u22c1' },
+ { '\u2016' }, { '\u2016' }, { '\u2223' }, { '\u007c' },
+ { '\u2758' }, { '\u2240' }, { '\u200a' }, { '\ud835', '\udd19' },
+ { '\ud835', '\udd4d' }, { '\ud835', '\udcb1' }, { '\u22aa' },
+ { '\u0174' }, { '\u22c0' }, { '\ud835', '\udd1a' },
+ { '\ud835', '\udd4e' }, { '\ud835', '\udcb2' },
+ { '\ud835', '\udd1b' }, { '\u039e' }, { '\ud835', '\udd4f' },
+ { '\ud835', '\udcb3' }, { '\u042f' }, { '\u0407' }, { '\u042e' },
+ { '\u00dd' }, { '\u00dd' }, { '\u0176' }, { '\u042b' },
+ { '\ud835', '\udd1c' }, { '\ud835', '\udd50' },
+ { '\ud835', '\udcb4' }, { '\u0178' }, { '\u0416' }, { '\u0179' },
+ { '\u017d' }, { '\u0417' }, { '\u017b' }, { '\u200b' },
+ { '\u0396' }, { '\u2128' }, { '\u2124' }, { '\ud835', '\udcb5' },
+ { '\u00e1' }, { '\u00e1' }, { '\u0103' }, { '\u223e' },
+ { '\u223e', '\u0333' }, { '\u223f' }, { '\u00e2' }, { '\u00e2' },
+ { '\u00b4' }, { '\u00b4' }, { '\u0430' }, { '\u00e6' },
+ { '\u00e6' }, { '\u2061' }, { '\ud835', '\udd1e' }, { '\u00e0' },
+ { '\u00e0' }, { '\u2135' }, { '\u2135' }, { '\u03b1' },
+ { '\u0101' }, { '\u2a3f' }, { '\u0026' }, { '\u0026' },
+ { '\u2227' }, { '\u2a55' }, { '\u2a5c' }, { '\u2a58' },
+ { '\u2a5a' }, { '\u2220' }, { '\u29a4' }, { '\u2220' },
+ { '\u2221' }, { '\u29a8' }, { '\u29a9' }, { '\u29aa' },
+ { '\u29ab' }, { '\u29ac' }, { '\u29ad' }, { '\u29ae' },
+ { '\u29af' }, { '\u221f' }, { '\u22be' }, { '\u299d' },
+ { '\u2222' }, { '\u00c5' }, { '\u237c' }, { '\u0105' },
+ { '\ud835', '\udd52' }, { '\u2248' }, { '\u2a70' }, { '\u2a6f' },
+ { '\u224a' }, { '\u224b' }, { '\'' }, { '\u2248' }, { '\u224a' },
+ { '\u00e5' }, { '\u00e5' }, { '\ud835', '\udcb6' }, { '\u002a' },
+ { '\u2248' }, { '\u224d' }, { '\u00e3' }, { '\u00e3' },
+ { '\u00e4' }, { '\u00e4' }, { '\u2233' }, { '\u2a11' },
+ { '\u2aed' }, { '\u224c' }, { '\u03f6' }, { '\u2035' },
+ { '\u223d' }, { '\u22cd' }, { '\u22bd' }, { '\u2305' },
+ { '\u2305' }, { '\u23b5' }, { '\u23b6' }, { '\u224c' },
+ { '\u0431' }, { '\u201e' }, { '\u2235' }, { '\u2235' },
+ { '\u29b0' }, { '\u03f6' }, { '\u212c' }, { '\u03b2' },
+ { '\u2136' }, { '\u226c' }, { '\ud835', '\udd1f' }, { '\u22c2' },
+ { '\u25ef' }, { '\u22c3' }, { '\u2a00' }, { '\u2a01' },
+ { '\u2a02' }, { '\u2a06' }, { '\u2605' }, { '\u25bd' },
+ { '\u25b3' }, { '\u2a04' }, { '\u22c1' }, { '\u22c0' },
+ { '\u290d' }, { '\u29eb' }, { '\u25aa' }, { '\u25b4' },
+ { '\u25be' }, { '\u25c2' }, { '\u25b8' }, { '\u2423' },
+ { '\u2592' }, { '\u2591' }, { '\u2593' }, { '\u2588' },
+ { '\u003d', '\u20e5' }, { '\u2261', '\u20e5' }, { '\u2310' },
+ { '\ud835', '\udd53' }, { '\u22a5' }, { '\u22a5' }, { '\u22c8' },
+ { '\u2557' }, { '\u2554' }, { '\u2556' }, { '\u2553' },
+ { '\u2550' }, { '\u2566' }, { '\u2569' }, { '\u2564' },
+ { '\u2567' }, { '\u255d' }, { '\u255a' }, { '\u255c' },
+ { '\u2559' }, { '\u2551' }, { '\u256c' }, { '\u2563' },
+ { '\u2560' }, { '\u256b' }, { '\u2562' }, { '\u255f' },
+ { '\u29c9' }, { '\u2555' }, { '\u2552' }, { '\u2510' },
+ { '\u250c' }, { '\u2500' }, { '\u2565' }, { '\u2568' },
+ { '\u252c' }, { '\u2534' }, { '\u229f' }, { '\u229e' },
+ { '\u22a0' }, { '\u255b' }, { '\u2558' }, { '\u2518' },
+ { '\u2514' }, { '\u2502' }, { '\u256a' }, { '\u2561' },
+ { '\u255e' }, { '\u253c' }, { '\u2524' }, { '\u251c' },
+ { '\u2035' }, { '\u02d8' }, { '\u00a6' }, { '\u00a6' },
+ { '\ud835', '\udcb7' }, { '\u204f' }, { '\u223d' }, { '\u22cd' },
+ { '\\' }, { '\u29c5' }, { '\u27c8' }, { '\u2022' }, { '\u2022' },
+ { '\u224e' }, { '\u2aae' }, { '\u224f' }, { '\u224f' },
+ { '\u0107' }, { '\u2229' }, { '\u2a44' }, { '\u2a49' },
+ { '\u2a4b' }, { '\u2a47' }, { '\u2a40' }, { '\u2229', '\ufe00' },
+ { '\u2041' }, { '\u02c7' }, { '\u2a4d' }, { '\u010d' },
+ { '\u00e7' }, { '\u00e7' }, { '\u0109' }, { '\u2a4c' },
+ { '\u2a50' }, { '\u010b' }, { '\u00b8' }, { '\u00b8' },
+ { '\u29b2' }, { '\u00a2' }, { '\u00a2' }, { '\u00b7' },
+ { '\ud835', '\udd20' }, { '\u0447' }, { '\u2713' }, { '\u2713' },
+ { '\u03c7' }, { '\u25cb' }, { '\u29c3' }, { '\u02c6' },
+ { '\u2257' }, { '\u21ba' }, { '\u21bb' }, { '\u00ae' },
+ { '\u24c8' }, { '\u229b' }, { '\u229a' }, { '\u229d' },
+ { '\u2257' }, { '\u2a10' }, { '\u2aef' }, { '\u29c2' },
+ { '\u2663' }, { '\u2663' }, { '\u003a' }, { '\u2254' },
+ { '\u2254' }, { '\u002c' }, { '\u0040' }, { '\u2201' },
+ { '\u2218' }, { '\u2201' }, { '\u2102' }, { '\u2245' },
+ { '\u2a6d' }, { '\u222e' }, { '\ud835', '\udd54' }, { '\u2210' },
+ { '\u00a9' }, { '\u00a9' }, { '\u2117' }, { '\u21b5' },
+ { '\u2717' }, { '\ud835', '\udcb8' }, { '\u2acf' }, { '\u2ad1' },
+ { '\u2ad0' }, { '\u2ad2' }, { '\u22ef' }, { '\u2938' },
+ { '\u2935' }, { '\u22de' }, { '\u22df' }, { '\u21b6' },
+ { '\u293d' }, { '\u222a' }, { '\u2a48' }, { '\u2a46' },
+ { '\u2a4a' }, { '\u228d' }, { '\u2a45' }, { '\u222a', '\ufe00' },
+ { '\u21b7' }, { '\u293c' }, { '\u22de' }, { '\u22df' },
+ { '\u22ce' }, { '\u22cf' }, { '\u00a4' }, { '\u00a4' },
+ { '\u21b6' }, { '\u21b7' }, { '\u22ce' }, { '\u22cf' },
+ { '\u2232' }, { '\u2231' }, { '\u232d' }, { '\u21d3' },
+ { '\u2965' }, { '\u2020' }, { '\u2138' }, { '\u2193' },
+ { '\u2010' }, { '\u22a3' }, { '\u290f' }, { '\u02dd' },
+ { '\u010f' }, { '\u0434' }, { '\u2146' }, { '\u2021' },
+ { '\u21ca' }, { '\u2a77' }, { '\u00b0' }, { '\u00b0' },
+ { '\u03b4' }, { '\u29b1' }, { '\u297f' }, { '\ud835', '\udd21' },
+ { '\u21c3' }, { '\u21c2' }, { '\u22c4' }, { '\u22c4' },
+ { '\u2666' }, { '\u2666' }, { '\u00a8' }, { '\u03dd' },
+ { '\u22f2' }, { '\u00f7' }, { '\u00f7' }, { '\u00f7' },
+ { '\u22c7' }, { '\u22c7' }, { '\u0452' }, { '\u231e' },
+ { '\u230d' }, { '\u0024' }, { '\ud835', '\udd55' }, { '\u02d9' },
+ { '\u2250' }, { '\u2251' }, { '\u2238' }, { '\u2214' },
+ { '\u22a1' }, { '\u2306' }, { '\u2193' }, { '\u21ca' },
+ { '\u21c3' }, { '\u21c2' }, { '\u2910' }, { '\u231f' },
+ { '\u230c' }, { '\ud835', '\udcb9' }, { '\u0455' }, { '\u29f6' },
+ { '\u0111' }, { '\u22f1' }, { '\u25bf' }, { '\u25be' },
+ { '\u21f5' }, { '\u296f' }, { '\u29a6' }, { '\u045f' },
+ { '\u27ff' }, { '\u2a77' }, { '\u2251' }, { '\u00e9' },
+ { '\u00e9' }, { '\u2a6e' }, { '\u011b' }, { '\u2256' },
+ { '\u00ea' }, { '\u00ea' }, { '\u2255' }, { '\u044d' },
+ { '\u0117' }, { '\u2147' }, { '\u2252' }, { '\ud835', '\udd22' },
+ { '\u2a9a' }, { '\u00e8' }, { '\u00e8' }, { '\u2a96' },
+ { '\u2a98' }, { '\u2a99' }, { '\u23e7' }, { '\u2113' },
+ { '\u2a95' }, { '\u2a97' }, { '\u0113' }, { '\u2205' },
+ { '\u2205' }, { '\u2205' }, { '\u2004' }, { '\u2005' },
+ { '\u2003' }, { '\u014b' }, { '\u2002' }, { '\u0119' },
+ { '\ud835', '\udd56' }, { '\u22d5' }, { '\u29e3' }, { '\u2a71' },
+ { '\u03b5' }, { '\u03b5' }, { '\u03f5' }, { '\u2256' },
+ { '\u2255' }, { '\u2242' }, { '\u2a96' }, { '\u2a95' },
+ { '\u003d' }, { '\u225f' }, { '\u2261' }, { '\u2a78' },
+ { '\u29e5' }, { '\u2253' }, { '\u2971' }, { '\u212f' },
+ { '\u2250' }, { '\u2242' }, { '\u03b7' }, { '\u00f0' },
+ { '\u00f0' }, { '\u00eb' }, { '\u00eb' }, { '\u20ac' },
+ { '\u0021' }, { '\u2203' }, { '\u2130' }, { '\u2147' },
+ { '\u2252' }, { '\u0444' }, { '\u2640' }, { '\ufb03' },
+ { '\ufb00' }, { '\ufb04' }, { '\ud835', '\udd23' }, { '\ufb01' },
+ { '\u0066', '\u006a' }, { '\u266d' }, { '\ufb02' }, { '\u25b1' },
+ { '\u0192' }, { '\ud835', '\udd57' }, { '\u2200' }, { '\u22d4' },
+ { '\u2ad9' }, { '\u2a0d' }, { '\u00bd' }, { '\u00bd' },
+ { '\u2153' }, { '\u00bc' }, { '\u00bc' }, { '\u2155' },
+ { '\u2159' }, { '\u215b' }, { '\u2154' }, { '\u2156' },
+ { '\u00be' }, { '\u00be' }, { '\u2157' }, { '\u215c' },
+ { '\u2158' }, { '\u215a' }, { '\u215d' }, { '\u215e' },
+ { '\u2044' }, { '\u2322' }, { '\ud835', '\udcbb' }, { '\u2267' },
+ { '\u2a8c' }, { '\u01f5' }, { '\u03b3' }, { '\u03dd' },
+ { '\u2a86' }, { '\u011f' }, { '\u011d' }, { '\u0433' },
+ { '\u0121' }, { '\u2265' }, { '\u22db' }, { '\u2265' },
+ { '\u2267' }, { '\u2a7e' }, { '\u2a7e' }, { '\u2aa9' },
+ { '\u2a80' }, { '\u2a82' }, { '\u2a84' }, { '\u22db', '\ufe00' },
+ { '\u2a94' }, { '\ud835', '\udd24' }, { '\u226b' }, { '\u22d9' },
+ { '\u2137' }, { '\u0453' }, { '\u2277' }, { '\u2a92' },
+ { '\u2aa5' }, { '\u2aa4' }, { '\u2269' }, { '\u2a8a' },
+ { '\u2a8a' }, { '\u2a88' }, { '\u2a88' }, { '\u2269' },
+ { '\u22e7' }, { '\ud835', '\udd58' }, { '\u0060' }, { '\u210a' },
+ { '\u2273' }, { '\u2a8e' }, { '\u2a90' }, { '\u003e' },
+ { '\u003e' }, { '\u2aa7' }, { '\u2a7a' }, { '\u22d7' },
+ { '\u2995' }, { '\u2a7c' }, { '\u2a86' }, { '\u2978' },
+ { '\u22d7' }, { '\u22db' }, { '\u2a8c' }, { '\u2277' },
+ { '\u2273' }, { '\u2269', '\ufe00' }, { '\u2269', '\ufe00' },
+ { '\u21d4' }, { '\u200a' }, { '\u00bd' }, { '\u210b' },
+ { '\u044a' }, { '\u2194' }, { '\u2948' }, { '\u21ad' },
+ { '\u210f' }, { '\u0125' }, { '\u2665' }, { '\u2665' },
+ { '\u2026' }, { '\u22b9' }, { '\ud835', '\udd25' }, { '\u2925' },
+ { '\u2926' }, { '\u21ff' }, { '\u223b' }, { '\u21a9' },
+ { '\u21aa' }, { '\ud835', '\udd59' }, { '\u2015' },
+ { '\ud835', '\udcbd' }, { '\u210f' }, { '\u0127' }, { '\u2043' },
+ { '\u2010' }, { '\u00ed' }, { '\u00ed' }, { '\u2063' },
+ { '\u00ee' }, { '\u00ee' }, { '\u0438' }, { '\u0435' },
+ { '\u00a1' }, { '\u00a1' }, { '\u21d4' }, { '\ud835', '\udd26' },
+ { '\u00ec' }, { '\u00ec' }, { '\u2148' }, { '\u2a0c' },
+ { '\u222d' }, { '\u29dc' }, { '\u2129' }, { '\u0133' },
+ { '\u012b' }, { '\u2111' }, { '\u2110' }, { '\u2111' },
+ { '\u0131' }, { '\u22b7' }, { '\u01b5' }, { '\u2208' },
+ { '\u2105' }, { '\u221e' }, { '\u29dd' }, { '\u0131' },
+ { '\u222b' }, { '\u22ba' }, { '\u2124' }, { '\u22ba' },
+ { '\u2a17' }, { '\u2a3c' }, { '\u0451' }, { '\u012f' },
+ { '\ud835', '\udd5a' }, { '\u03b9' }, { '\u2a3c' }, { '\u00bf' },
+ { '\u00bf' }, { '\ud835', '\udcbe' }, { '\u2208' }, { '\u22f9' },
+ { '\u22f5' }, { '\u22f4' }, { '\u22f3' }, { '\u2208' },
+ { '\u2062' }, { '\u0129' }, { '\u0456' }, { '\u00ef' },
+ { '\u00ef' }, { '\u0135' }, { '\u0439' }, { '\ud835', '\udd27' },
+ { '\u0237' }, { '\ud835', '\udd5b' }, { '\ud835', '\udcbf' },
+ { '\u0458' }, { '\u0454' }, { '\u03ba' }, { '\u03f0' },
+ { '\u0137' }, { '\u043a' }, { '\ud835', '\udd28' }, { '\u0138' },
+ { '\u0445' }, { '\u045c' }, { '\ud835', '\udd5c' },
+ { '\ud835', '\udcc0' }, { '\u21da' }, { '\u21d0' }, { '\u291b' },
+ { '\u290e' }, { '\u2266' }, { '\u2a8b' }, { '\u2962' },
+ { '\u013a' }, { '\u29b4' }, { '\u2112' }, { '\u03bb' },
+ { '\u27e8' }, { '\u2991' }, { '\u27e8' }, { '\u2a85' },
+ { '\u00ab' }, { '\u00ab' }, { '\u2190' }, { '\u21e4' },
+ { '\u291f' }, { '\u291d' }, { '\u21a9' }, { '\u21ab' },
+ { '\u2939' }, { '\u2973' }, { '\u21a2' }, { '\u2aab' },
+ { '\u2919' }, { '\u2aad' }, { '\u2aad', '\ufe00' }, { '\u290c' },
+ { '\u2772' }, { '\u007b' }, { '\u005b' }, { '\u298b' },
+ { '\u298f' }, { '\u298d' }, { '\u013e' }, { '\u013c' },
+ { '\u2308' }, { '\u007b' }, { '\u043b' }, { '\u2936' },
+ { '\u201c' }, { '\u201e' }, { '\u2967' }, { '\u294b' },
+ { '\u21b2' }, { '\u2264' }, { '\u2190' }, { '\u21a2' },
+ { '\u21bd' }, { '\u21bc' }, { '\u21c7' }, { '\u2194' },
+ { '\u21c6' }, { '\u21cb' }, { '\u21ad' }, { '\u22cb' },
+ { '\u22da' }, { '\u2264' }, { '\u2266' }, { '\u2a7d' },
+ { '\u2a7d' }, { '\u2aa8' }, { '\u2a7f' }, { '\u2a81' },
+ { '\u2a83' }, { '\u22da', '\ufe00' }, { '\u2a93' }, { '\u2a85' },
+ { '\u22d6' }, { '\u22da' }, { '\u2a8b' }, { '\u2276' },
+ { '\u2272' }, { '\u297c' }, { '\u230a' }, { '\ud835', '\udd29' },
+ { '\u2276' }, { '\u2a91' }, { '\u21bd' }, { '\u21bc' },
+ { '\u296a' }, { '\u2584' }, { '\u0459' }, { '\u226a' },
+ { '\u21c7' }, { '\u231e' }, { '\u296b' }, { '\u25fa' },
+ { '\u0140' }, { '\u23b0' }, { '\u23b0' }, { '\u2268' },
+ { '\u2a89' }, { '\u2a89' }, { '\u2a87' }, { '\u2a87' },
+ { '\u2268' }, { '\u22e6' }, { '\u27ec' }, { '\u21fd' },
+ { '\u27e6' }, { '\u27f5' }, { '\u27f7' }, { '\u27fc' },
+ { '\u27f6' }, { '\u21ab' }, { '\u21ac' }, { '\u2985' },
+ { '\ud835', '\udd5d' }, { '\u2a2d' }, { '\u2a34' }, { '\u2217' },
+ { '\u005f' }, { '\u25ca' }, { '\u25ca' }, { '\u29eb' },
+ { '\u0028' }, { '\u2993' }, { '\u21c6' }, { '\u231f' },
+ { '\u21cb' }, { '\u296d' }, { '\u200e' }, { '\u22bf' },
+ { '\u2039' }, { '\ud835', '\udcc1' }, { '\u21b0' }, { '\u2272' },
+ { '\u2a8d' }, { '\u2a8f' }, { '\u005b' }, { '\u2018' },
+ { '\u201a' }, { '\u0142' }, { '\u003c' }, { '\u003c' },
+ { '\u2aa6' }, { '\u2a79' }, { '\u22d6' }, { '\u22cb' },
+ { '\u22c9' }, { '\u2976' }, { '\u2a7b' }, { '\u2996' },
+ { '\u25c3' }, { '\u22b4' }, { '\u25c2' }, { '\u294a' },
+ { '\u2966' }, { '\u2268', '\ufe00' }, { '\u2268', '\ufe00' },
+ { '\u223a' }, { '\u00af' }, { '\u00af' }, { '\u2642' },
+ { '\u2720' }, { '\u2720' }, { '\u21a6' }, { '\u21a6' },
+ { '\u21a7' }, { '\u21a4' }, { '\u21a5' }, { '\u25ae' },
+ { '\u2a29' }, { '\u043c' }, { '\u2014' }, { '\u2221' },
+ { '\ud835', '\udd2a' }, { '\u2127' }, { '\u00b5' }, { '\u00b5' },
+ { '\u2223' }, { '\u002a' }, { '\u2af0' }, { '\u00b7' },
+ { '\u00b7' }, { '\u2212' }, { '\u229f' }, { '\u2238' },
+ { '\u2a2a' }, { '\u2adb' }, { '\u2026' }, { '\u2213' },
+ { '\u22a7' }, { '\ud835', '\udd5e' }, { '\u2213' },
+ { '\ud835', '\udcc2' }, { '\u223e' }, { '\u03bc' }, { '\u22b8' },
+ { '\u22b8' }, { '\u22d9', '\u0338' }, { '\u226b', '\u20d2' },
+ { '\u226b', '\u0338' }, { '\u21cd' }, { '\u21ce' },
+ { '\u22d8', '\u0338' }, { '\u226a', '\u20d2' },
+ { '\u226a', '\u0338' }, { '\u21cf' }, { '\u22af' }, { '\u22ae' },
+ { '\u2207' }, { '\u0144' }, { '\u2220', '\u20d2' }, { '\u2249' },
+ { '\u2a70', '\u0338' }, { '\u224b', '\u0338' }, { '\u0149' },
+ { '\u2249' }, { '\u266e' }, { '\u266e' }, { '\u2115' },
+ { '\u00a0' }, { '\u00a0' }, { '\u224e', '\u0338' },
+ { '\u224f', '\u0338' }, { '\u2a43' }, { '\u0148' }, { '\u0146' },
+ { '\u2247' }, { '\u2a6d', '\u0338' }, { '\u2a42' }, { '\u043d' },
+ { '\u2013' }, { '\u2260' }, { '\u21d7' }, { '\u2924' },
+ { '\u2197' }, { '\u2197' }, { '\u2250', '\u0338' }, { '\u2262' },
+ { '\u2928' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u2204' },
+ { '\ud835', '\udd2b' }, { '\u2267', '\u0338' }, { '\u2271' },
+ { '\u2271' }, { '\u2267', '\u0338' }, { '\u2a7e', '\u0338' },
+ { '\u2a7e', '\u0338' }, { '\u2275' }, { '\u226f' }, { '\u226f' },
+ { '\u21ce' }, { '\u21ae' }, { '\u2af2' }, { '\u220b' },
+ { '\u22fc' }, { '\u22fa' }, { '\u220b' }, { '\u045a' },
+ { '\u21cd' }, { '\u2266', '\u0338' }, { '\u219a' }, { '\u2025' },
+ { '\u2270' }, { '\u219a' }, { '\u21ae' }, { '\u2270' },
+ { '\u2266', '\u0338' }, { '\u2a7d', '\u0338' },
+ { '\u2a7d', '\u0338' }, { '\u226e' }, { '\u2274' }, { '\u226e' },
+ { '\u22ea' }, { '\u22ec' }, { '\u2224' }, { '\ud835', '\udd5f' },
+ { '\u00ac' }, { '\u00ac' }, { '\u2209' }, { '\u22f9', '\u0338' },
+ { '\u22f5', '\u0338' }, { '\u2209' }, { '\u22f7' }, { '\u22f6' },
+ { '\u220c' }, { '\u220c' }, { '\u22fe' }, { '\u22fd' },
+ { '\u2226' }, { '\u2226' }, { '\u2afd', '\u20e5' },
+ { '\u2202', '\u0338' }, { '\u2a14' }, { '\u2280' }, { '\u22e0' },
+ { '\u2aaf', '\u0338' }, { '\u2280' }, { '\u2aaf', '\u0338' },
+ { '\u21cf' }, { '\u219b' }, { '\u2933', '\u0338' },
+ { '\u219d', '\u0338' }, { '\u219b' }, { '\u22eb' }, { '\u22ed' },
+ { '\u2281' }, { '\u22e1' }, { '\u2ab0', '\u0338' },
+ { '\ud835', '\udcc3' }, { '\u2224' }, { '\u2226' }, { '\u2241' },
+ { '\u2244' }, { '\u2244' }, { '\u2224' }, { '\u2226' },
+ { '\u22e2' }, { '\u22e3' }, { '\u2284' }, { '\u2ac5', '\u0338' },
+ { '\u2288' }, { '\u2282', '\u20d2' }, { '\u2288' },
+ { '\u2ac5', '\u0338' }, { '\u2281' }, { '\u2ab0', '\u0338' },
+ { '\u2285' }, { '\u2ac6', '\u0338' }, { '\u2289' },
+ { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2ac6', '\u0338' },
+ { '\u2279' }, { '\u00f1' }, { '\u00f1' }, { '\u2278' },
+ { '\u22ea' }, { '\u22ec' }, { '\u22eb' }, { '\u22ed' },
+ { '\u03bd' }, { '\u0023' }, { '\u2116' }, { '\u2007' },
+ { '\u22ad' }, { '\u2904' }, { '\u224d', '\u20d2' }, { '\u22ac' },
+ { '\u2265', '\u20d2' }, { '\u003e', '\u20d2' }, { '\u29de' },
+ { '\u2902' }, { '\u2264', '\u20d2' }, { '\u003c', '\u20d2' },
+ { '\u22b4', '\u20d2' }, { '\u2903' }, { '\u22b5', '\u20d2' },
+ { '\u223c', '\u20d2' }, { '\u21d6' }, { '\u2923' }, { '\u2196' },
+ { '\u2196' }, { '\u2927' }, { '\u24c8' }, { '\u00f3' },
+ { '\u00f3' }, { '\u229b' }, { '\u229a' }, { '\u00f4' },
+ { '\u00f4' }, { '\u043e' }, { '\u229d' }, { '\u0151' },
+ { '\u2a38' }, { '\u2299' }, { '\u29bc' }, { '\u0153' },
+ { '\u29bf' }, { '\ud835', '\udd2c' }, { '\u02db' }, { '\u00f2' },
+ { '\u00f2' }, { '\u29c1' }, { '\u29b5' }, { '\u03a9' },
+ { '\u222e' }, { '\u21ba' }, { '\u29be' }, { '\u29bb' },
+ { '\u203e' }, { '\u29c0' }, { '\u014d' }, { '\u03c9' },
+ { '\u03bf' }, { '\u29b6' }, { '\u2296' }, { '\ud835', '\udd60' },
+ { '\u29b7' }, { '\u29b9' }, { '\u2295' }, { '\u2228' },
+ { '\u21bb' }, { '\u2a5d' }, { '\u2134' }, { '\u2134' },
+ { '\u00aa' }, { '\u00aa' }, { '\u00ba' }, { '\u00ba' },
+ { '\u22b6' }, { '\u2a56' }, { '\u2a57' }, { '\u2a5b' },
+ { '\u2134' }, { '\u00f8' }, { '\u00f8' }, { '\u2298' },
+ { '\u00f5' }, { '\u00f5' }, { '\u2297' }, { '\u2a36' },
+ { '\u00f6' }, { '\u00f6' }, { '\u233d' }, { '\u2225' },
+ { '\u00b6' }, { '\u00b6' }, { '\u2225' }, { '\u2af3' },
+ { '\u2afd' }, { '\u2202' }, { '\u043f' }, { '\u0025' },
+ { '\u002e' }, { '\u2030' }, { '\u22a5' }, { '\u2031' },
+ { '\ud835', '\udd2d' }, { '\u03c6' }, { '\u03d5' }, { '\u2133' },
+ { '\u260e' }, { '\u03c0' }, { '\u22d4' }, { '\u03d6' },
+ { '\u210f' }, { '\u210e' }, { '\u210f' }, { '\u002b' },
+ { '\u2a23' }, { '\u229e' }, { '\u2a22' }, { '\u2214' },
+ { '\u2a25' }, { '\u2a72' }, { '\u00b1' }, { '\u00b1' },
+ { '\u2a26' }, { '\u2a27' }, { '\u00b1' }, { '\u2a15' },
+ { '\ud835', '\udd61' }, { '\u00a3' }, { '\u00a3' }, { '\u227a' },
+ { '\u2ab3' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' },
+ { '\u227a' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' },
+ { '\u2ab9' }, { '\u2ab5' }, { '\u22e8' }, { '\u227e' },
+ { '\u2032' }, { '\u2119' }, { '\u2ab5' }, { '\u2ab9' },
+ { '\u22e8' }, { '\u220f' }, { '\u232e' }, { '\u2312' },
+ { '\u2313' }, { '\u221d' }, { '\u221d' }, { '\u227e' },
+ { '\u22b0' }, { '\ud835', '\udcc5' }, { '\u03c8' }, { '\u2008' },
+ { '\ud835', '\udd2e' }, { '\u2a0c' }, { '\ud835', '\udd62' },
+ { '\u2057' }, { '\ud835', '\udcc6' }, { '\u210d' }, { '\u2a16' },
+ { '\u003f' }, { '\u225f' }, { '\u0022' }, { '\u0022' },
+ { '\u21db' }, { '\u21d2' }, { '\u291c' }, { '\u290f' },
+ { '\u2964' }, { '\u223d', '\u0331' }, { '\u0155' }, { '\u221a' },
+ { '\u29b3' }, { '\u27e9' }, { '\u2992' }, { '\u29a5' },
+ { '\u27e9' }, { '\u00bb' }, { '\u00bb' }, { '\u2192' },
+ { '\u2975' }, { '\u21e5' }, { '\u2920' }, { '\u2933' },
+ { '\u291e' }, { '\u21aa' }, { '\u21ac' }, { '\u2945' },
+ { '\u2974' }, { '\u21a3' }, { '\u219d' }, { '\u291a' },
+ { '\u2236' }, { '\u211a' }, { '\u290d' }, { '\u2773' },
+ { '\u007d' }, { '\u005d' }, { '\u298c' }, { '\u298e' },
+ { '\u2990' }, { '\u0159' }, { '\u0157' }, { '\u2309' },
+ { '\u007d' }, { '\u0440' }, { '\u2937' }, { '\u2969' },
+ { '\u201d' }, { '\u201d' }, { '\u21b3' }, { '\u211c' },
+ { '\u211b' }, { '\u211c' }, { '\u211d' }, { '\u25ad' },
+ { '\u00ae' }, { '\u00ae' }, { '\u297d' }, { '\u230b' },
+ { '\ud835', '\udd2f' }, { '\u21c1' }, { '\u21c0' }, { '\u296c' },
+ { '\u03c1' }, { '\u03f1' }, { '\u2192' }, { '\u21a3' },
+ { '\u21c1' }, { '\u21c0' }, { '\u21c4' }, { '\u21cc' },
+ { '\u21c9' }, { '\u219d' }, { '\u22cc' }, { '\u02da' },
+ { '\u2253' }, { '\u21c4' }, { '\u21cc' }, { '\u200f' },
+ { '\u23b1' }, { '\u23b1' }, { '\u2aee' }, { '\u27ed' },
+ { '\u21fe' }, { '\u27e7' }, { '\u2986' }, { '\ud835', '\udd63' },
+ { '\u2a2e' }, { '\u2a35' }, { '\u0029' }, { '\u2994' },
+ { '\u2a12' }, { '\u21c9' }, { '\u203a' }, { '\ud835', '\udcc7' },
+ { '\u21b1' }, { '\u005d' }, { '\u2019' }, { '\u2019' },
+ { '\u22cc' }, { '\u22ca' }, { '\u25b9' }, { '\u22b5' },
+ { '\u25b8' }, { '\u29ce' }, { '\u2968' }, { '\u211e' },
+ { '\u015b' }, { '\u201a' }, { '\u227b' }, { '\u2ab4' },
+ { '\u2ab8' }, { '\u0161' }, { '\u227d' }, { '\u2ab0' },
+ { '\u015f' }, { '\u015d' }, { '\u2ab6' }, { '\u2aba' },
+ { '\u22e9' }, { '\u2a13' }, { '\u227f' }, { '\u0441' },
+ { '\u22c5' }, { '\u22a1' }, { '\u2a66' }, { '\u21d8' },
+ { '\u2925' }, { '\u2198' }, { '\u2198' }, { '\u00a7' },
+ { '\u00a7' }, { '\u003b' }, { '\u2929' }, { '\u2216' },
+ { '\u2216' }, { '\u2736' }, { '\ud835', '\udd30' }, { '\u2322' },
+ { '\u266f' }, { '\u0449' }, { '\u0448' }, { '\u2223' },
+ { '\u2225' }, { '\u00ad' }, { '\u00ad' }, { '\u03c3' },
+ { '\u03c2' }, { '\u03c2' }, { '\u223c' }, { '\u2a6a' },
+ { '\u2243' }, { '\u2243' }, { '\u2a9e' }, { '\u2aa0' },
+ { '\u2a9d' }, { '\u2a9f' }, { '\u2246' }, { '\u2a24' },
+ { '\u2972' }, { '\u2190' }, { '\u2216' }, { '\u2a33' },
+ { '\u29e4' }, { '\u2223' }, { '\u2323' }, { '\u2aaa' },
+ { '\u2aac' }, { '\u2aac', '\ufe00' }, { '\u044c' }, { '\u002f' },
+ { '\u29c4' }, { '\u233f' }, { '\ud835', '\udd64' }, { '\u2660' },
+ { '\u2660' }, { '\u2225' }, { '\u2293' }, { '\u2293', '\ufe00' },
+ { '\u2294' }, { '\u2294', '\ufe00' }, { '\u228f' }, { '\u2291' },
+ { '\u228f' }, { '\u2291' }, { '\u2290' }, { '\u2292' },
+ { '\u2290' }, { '\u2292' }, { '\u25a1' }, { '\u25a1' },
+ { '\u25aa' }, { '\u25aa' }, { '\u2192' }, { '\ud835', '\udcc8' },
+ { '\u2216' }, { '\u2323' }, { '\u22c6' }, { '\u2606' },
+ { '\u2605' }, { '\u03f5' }, { '\u03d5' }, { '\u00af' },
+ { '\u2282' }, { '\u2ac5' }, { '\u2abd' }, { '\u2286' },
+ { '\u2ac3' }, { '\u2ac1' }, { '\u2acb' }, { '\u228a' },
+ { '\u2abf' }, { '\u2979' }, { '\u2282' }, { '\u2286' },
+ { '\u2ac5' }, { '\u228a' }, { '\u2acb' }, { '\u2ac7' },
+ { '\u2ad5' }, { '\u2ad3' }, { '\u227b' }, { '\u2ab8' },
+ { '\u227d' }, { '\u2ab0' }, { '\u2aba' }, { '\u2ab6' },
+ { '\u22e9' }, { '\u227f' }, { '\u2211' }, { '\u266a' },
+ { '\u00b9' }, { '\u00b9' }, { '\u00b2' }, { '\u00b2' },
+ { '\u00b3' }, { '\u00b3' }, { '\u2283' }, { '\u2ac6' },
+ { '\u2abe' }, { '\u2ad8' }, { '\u2287' }, { '\u2ac4' },
+ { '\u27c9' }, { '\u2ad7' }, { '\u297b' }, { '\u2ac2' },
+ { '\u2acc' }, { '\u228b' }, { '\u2ac0' }, { '\u2283' },
+ { '\u2287' }, { '\u2ac6' }, { '\u228b' }, { '\u2acc' },
+ { '\u2ac8' }, { '\u2ad4' }, { '\u2ad6' }, { '\u21d9' },
+ { '\u2926' }, { '\u2199' }, { '\u2199' }, { '\u292a' },
+ { '\u00df' }, { '\u00df' }, { '\u2316' }, { '\u03c4' },
+ { '\u23b4' }, { '\u0165' }, { '\u0163' }, { '\u0442' },
+ { '\u20db' }, { '\u2315' }, { '\ud835', '\udd31' }, { '\u2234' },
+ { '\u2234' }, { '\u03b8' }, { '\u03d1' }, { '\u03d1' },
+ { '\u2248' }, { '\u223c' }, { '\u2009' }, { '\u2248' },
+ { '\u223c' }, { '\u00fe' }, { '\u00fe' }, { '\u02dc' },
+ { '\u00d7' }, { '\u00d7' }, { '\u22a0' }, { '\u2a31' },
+ { '\u2a30' }, { '\u222d' }, { '\u2928' }, { '\u22a4' },
+ { '\u2336' }, { '\u2af1' }, { '\ud835', '\udd65' }, { '\u2ada' },
+ { '\u2929' }, { '\u2034' }, { '\u2122' }, { '\u25b5' },
+ { '\u25bf' }, { '\u25c3' }, { '\u22b4' }, { '\u225c' },
+ { '\u25b9' }, { '\u22b5' }, { '\u25ec' }, { '\u225c' },
+ { '\u2a3a' }, { '\u2a39' }, { '\u29cd' }, { '\u2a3b' },
+ { '\u23e2' }, { '\ud835', '\udcc9' }, { '\u0446' }, { '\u045b' },
+ { '\u0167' }, { '\u226c' }, { '\u219e' }, { '\u21a0' },
+ { '\u21d1' }, { '\u2963' }, { '\u00fa' }, { '\u00fa' },
+ { '\u2191' }, { '\u045e' }, { '\u016d' }, { '\u00fb' },
+ { '\u00fb' }, { '\u0443' }, { '\u21c5' }, { '\u0171' },
+ { '\u296e' }, { '\u297e' }, { '\ud835', '\udd32' }, { '\u00f9' },
+ { '\u00f9' }, { '\u21bf' }, { '\u21be' }, { '\u2580' },
+ { '\u231c' }, { '\u231c' }, { '\u230f' }, { '\u25f8' },
+ { '\u016b' }, { '\u00a8' }, { '\u00a8' }, { '\u0173' },
+ { '\ud835', '\udd66' }, { '\u2191' }, { '\u2195' }, { '\u21bf' },
+ { '\u21be' }, { '\u228e' }, { '\u03c5' }, { '\u03d2' },
+ { '\u03c5' }, { '\u21c8' }, { '\u231d' }, { '\u231d' },
+ { '\u230e' }, { '\u016f' }, { '\u25f9' }, { '\ud835', '\udcca' },
+ { '\u22f0' }, { '\u0169' }, { '\u25b5' }, { '\u25b4' },
+ { '\u21c8' }, { '\u00fc' }, { '\u00fc' }, { '\u29a7' },
+ { '\u21d5' }, { '\u2ae8' }, { '\u2ae9' }, { '\u22a8' },
+ { '\u299c' }, { '\u03f5' }, { '\u03f0' }, { '\u2205' },
+ { '\u03d5' }, { '\u03d6' }, { '\u221d' }, { '\u2195' },
+ { '\u03f1' }, { '\u03c2' }, { '\u228a', '\ufe00' },
+ { '\u2acb', '\ufe00' }, { '\u228b', '\ufe00' },
+ { '\u2acc', '\ufe00' }, { '\u03d1' }, { '\u22b2' }, { '\u22b3' },
+ { '\u0432' }, { '\u22a2' }, { '\u2228' }, { '\u22bb' },
+ { '\u225a' }, { '\u22ee' }, { '\u007c' }, { '\u007c' },
+ { '\ud835', '\udd33' }, { '\u22b2' }, { '\u2282', '\u20d2' },
+ { '\u2283', '\u20d2' }, { '\ud835', '\udd67' }, { '\u221d' },
+ { '\u22b3' }, { '\ud835', '\udccb' }, { '\u2acb', '\ufe00' },
+ { '\u228a', '\ufe00' }, { '\u2acc', '\ufe00' },
+ { '\u228b', '\ufe00' }, { '\u299a' }, { '\u0175' }, { '\u2a5f' },
+ { '\u2227' }, { '\u2259' }, { '\u2118' }, { '\ud835', '\udd34' },
+ { '\ud835', '\udd68' }, { '\u2118' }, { '\u2240' }, { '\u2240' },
+ { '\ud835', '\udccc' }, { '\u22c2' }, { '\u25ef' }, { '\u22c3' },
+ { '\u25bd' }, { '\ud835', '\udd35' }, { '\u27fa' }, { '\u27f7' },
+ { '\u03be' }, { '\u27f8' }, { '\u27f5' }, { '\u27fc' },
+ { '\u22fb' }, { '\u2a00' }, { '\ud835', '\udd69' }, { '\u2a01' },
+ { '\u2a02' }, { '\u27f9' }, { '\u27f6' }, { '\ud835', '\udccd' },
+ { '\u2a06' }, { '\u2a04' }, { '\u25b3' }, { '\u22c1' },
+ { '\u22c0' }, { '\u00fd' }, { '\u00fd' }, { '\u044f' },
+ { '\u0177' }, { '\u044b' }, { '\u00a5' }, { '\u00a5' },
+ { '\ud835', '\udd36' }, { '\u0457' }, { '\ud835', '\udd6a' },
+ { '\ud835', '\udcce' }, { '\u044e' }, { '\u00ff' }, { '\u00ff' },
+ { '\u017a' }, { '\u017e' }, { '\u0437' }, { '\u017c' },
+ { '\u2128' }, { '\u03b6' }, { '\ud835', '\udd37' }, { '\u0436' },
+ { '\u21dd' }, { '\ud835', '\udd6b' }, { '\ud835', '\udccf' },
+ { '\u200d' }, { '\u200c' }, };
+
+ final static char[][] WINDOWS_1252 = { { '\u20AC' }, { '\u0081' },
+ { '\u201A' }, { '\u0192' }, { '\u201E' }, { '\u2026' },
+ { '\u2020' }, { '\u2021' }, { '\u02C6' }, { '\u2030' },
+ { '\u0160' }, { '\u2039' }, { '\u0152' }, { '\u008D' },
+ { '\u017D' }, { '\u008F' }, { '\u0090' }, { '\u2018' },
+ { '\u2019' }, { '\u201C' }, { '\u201D' }, { '\u2022' },
+ { '\u2013' }, { '\u2014' }, { '\u02DC' }, { '\u2122' },
+ { '\u0161' }, { '\u203A' }, { '\u0153' }, { '\u009D' },
+ { '\u017E' }, { '\u0178' } };
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java
new file mode 100644
index 000000000..311f8f77f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java
@@ -0,0 +1,311 @@
+/*
+ * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera
+ * Software ASA.
+ *
+ * You are granted a license to use, reproduce and create derivative works of
+ * this document.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.NoLength;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class NamedCharactersAccel {
+
+ static final @NoLength int[][] HILO_ACCEL = {
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 12386493, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 40174181, 0, 0, 0, 0, 60162966, 0, 0, 0,
+ 75367550, 0, 0, 0, 82183396, 0, 0, 0, 0, 0, 115148507, 0,
+ 0, 135989275, 139397199, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28770743, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 82248935, 0, 0, 0, 0, 0, 115214046, 0, 0, 0, 139528272, 0,
+ 0, 0, 0, },
+ null,
+ { 0, 0, 0, 4980811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 38470219, 0, 0, 0, 0, 0, 0, 0, 0, 64553944, 0, 0, 0, 0,
+ 0, 0, 0, 92145022, 0, 0, 0, 0, 0, 0, 0, 0, 139593810, 0, 0,
+ 0, 0, },
+ { 65536, 0, 0, 0, 0, 0, 0, 0, 13172937, 0, 0, 0, 0, 0, 25297282, 0,
+ 0, 28901816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 71500866, 0, 0, 0, 0, 82380008, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, },
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 94897574, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 2555943, 0, 0, 0, 0, 0, 0, 0, 15532269, 0, 0, 0, 0, 0, 0,
+ 0, 31785444, 34406924, 0, 0, 0, 0, 0, 40895088, 0, 0, 0,
+ 60228503, 0, 0, 0, 0, 0, 0, 0, 82445546, 0, 0, 0, 0, 0,
+ 115279583, 0, 0, 136054812, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 40239718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 5046349, 0, 0, 10944679, 0, 13238474, 0, 15597806,
+ 16056565, 0, 20578618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, },
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 95225257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 196610, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 8454273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 46072511, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 2687016, 0, 0, 0, 0, 0, 13304011, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 31850982, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ null,
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 34472462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 95290798, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 5111886, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 34603535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 105776718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 8585346, 0, 11075752, 0, 0, 0, 0, 16187638, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28508594, 0, 0,
+ 0, 0, 0, 0, 0, 40305255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 95421871, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ null,
+ null,
+ null,
+ { 0, 0, 0, 5177423, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ { 327684, 1900571, 2949162, 5374032, 8716420, 0, 11206826,
+ 12517566, 13435084, 0, 15663343, 16515320, 19988785,
+ 20644155, 25428355, 27197855, 0, 29163962, 31916519,
+ 34734609, 36045347, 0, 0, 0, 40436328, 40960625, 41615994,
+ 46596800, 54264627, 60556184, 64750554, 68879387, 71763012,
+ 75826303, 77268122, 0, 81462490, 83952875, 92865919,
+ 96142769, 105973327, 110167691, 0, 116917984, 121833283,
+ 132253665, 136251421, 140707923, 0, 0, 144574620,
+ 145361066, },
+ { 393222, 0, 0, 0, 0, 0, 11272364, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 36176423, 38535756, 0, 0, 0, 0, 41681532, 46727880,
+ 0, 60687261, 0, 0, 71828552, 75891846, 0, 0, 0, 84411650,
+ 0, 96404924, 0, 0, 0, 117376761, 121898820, 132319203,
+ 136382496, 0, 0, 0, 0, 0, },
+ { 589831, 1966110, 3276846, 5505107, 8978566, 10420383, 11468973,
+ 12583104, 13631694, 15139046, 15794416, 16711933, 20054322,
+ 20840764, 25624965, 27263392, 0, 29360574, 32244200,
+ 34931219, 36373033, 38601293, 39584348, 0, 40567402,
+ 41091698, 42205821, 46858954, 54723389, 60818335, 65143773,
+ 68944924, 71959625, 75957383, 77530268, 80938194, 81593564,
+ 84739337, 92997002, 96863680, 106235474, 110233234, 0,
+ 117704448, 122816325, 132515812, 136579106, 140773476,
+ 142149753, 143001732, 144705695, 145492139, },
+ { 0, 0, 3342387, 0, 9044106, 0, 11534512, 0, 13697233, 0, 0, 0, 0,
+ 0, 25690504, 0, 0, 0, 0, 0, 36438572, 38732366, 0, 0, 0,
+ 41157236, 0, 46924492, 54788932, 61080481, 65209315, 0,
+ 72025163, 0, 0, 0, 0, 85132558, 93062540, 96929223,
+ 106563158, 0, 0, 118032133, 123012947, 132581351,
+ 136775717, 140839013, 0, 143067271, 0, 145557677, },
+ { 0, 2162719, 3473460, 5636181, 0, 0, 0, 0, 0, 0, 0, 18809088,
+ 20185395, 21299519, 0, 0, 0, 29622721, 0, 0, 0, 39256656,
+ 39649885, 0, 0, 41288309, 42336901, 47448781, 55182149,
+ 61342629, 65274852, 69010461, 72811596, 76219528, 77726880,
+ 0, 0, 86967572, 93128077, 97650120, 106628699, 110560915,
+ 0, 118490890, 123733846, 132646888, 0, 141232230,
+ 142411898, 0, 144836769, 145688750, },
+ { 655370, 2228258, 3538998, 5701719, 9109643, 10485920, 11600049,
+ 12648641, 13762770, 15204584, 15859954, 18874656, 20250933,
+ 21365062, 25756041, 27328929, 28574132, 29688261, 32309741,
+ 34996758, 36504109, 39322200, 39715422, 39912033, 40632940,
+ 41353847, 42467975, 47514325, 55247691, 61473705, 65405925,
+ 69272606, 72877144, 76285068, 77857955, 81003732, 81659102,
+ 87164208, 93193614, 97715667, 106759772, 110626456,
+ 114296528, 118687505, 123864929, 132712425, 136906792,
+ 141297772, 142477438, 143132808, 144902307, 145754288, },
+ { 786443, 0, 0, 0, 9240716, 0, 11665586, 0, 13893843, 0, 0, 0, 0,
+ 0, 25887114, 0, 0, 0, 0, 0, 36635182, 0, 0, 0, 0, 0,
+ 42599049, 0, 0, 0, 65733607, 0, 73008217, 0, 77989029, 0,
+ 81724639, 87295283, 0, 98305492, 107021918, 0, 0, 0, 0, 0,
+ 137037866, 0, 0, 0, 0, 0, },
+ { 0, 0, 3604535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27394466, 0,
+ 29753798, 32571886, 35258903, 0, 0, 0, 0, 0, 0, 0, 0,
+ 55509836, 61604779, 0, 0, 0, 0, 0, 0, 81790176, 87557429,
+ 93259151, 98502109, 107152994, 110888601, 0, 119015188,
+ 124323683, 133498858, 137234476, 0, 0, 143263881, 0,
+ 145819825, },
+ { 0, 0, 3866680, 6160472, 0, 10616993, 0, 12714178, 0, 0, 0, 0,
+ 20316470, 0, 0, 27460003, 0, 31261127, 32637426, 35521051,
+ 0, 0, 0, 39977570, 0, 0, 0, 48366294, 56492880, 62391213,
+ 0, 69338146, 73073755, 0, 78316711, 0, 0, 0, 93980048,
+ 98764256, 107218532, 111085213, 114362065, 119736089,
+ 125241194, 133957622, 0, 0, 0, 143329419, 144967844,
+ 145885362, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 62456761, 0, 69403683, 73139292, 0,
+ 78382252, 0, 81855713, 87622969, 0, 98829796, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 48431843, 0, 0, 0, 0, 0, 76416141, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 851981, 0, 4063292, 0, 9306254, 0, 0, 0, 0, 0, 0, 19005729, 0, 0,
+ 0, 27525540, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42795659,
+ 49152740, 56623967, 62587834, 66061292, 69600292, 73401437,
+ 0, 0, 0, 0, 87950650, 94111131, 99878373, 107546213,
+ 112002720, 0, 119932708, 125306744, 0, 137496623,
+ 141363309, 0, 143460492, 0, 0, },
+ { 917518, 0, 0, 0, 9502863, 0, 0, 0, 14155989, 0, 0, 19071267, 0,
+ 0, 26083724, 0, 0, 0, 32702963, 0, 36700720, 0, 0, 0, 0, 0,
+ 43057806, 0, 0, 0, 66520049, 0, 0, 0, 78841005, 81069269,
+ 0, 88147263, 0, 99943925, 107873898, 112068270, 0,
+ 120063783, 125831033, 0, 137693235, 0, 0, 143526030, 0, 0, },
+ { 983055, 0, 0, 0, 0, 0, 0, 0, 14483673, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 37093937, 0, 0, 0, 0, 0, 44565138, 49349359, 0, 0,
+ 66651128, 69665831, 73860193, 0, 79561908, 0, 0, 88606018,
+ 94176669, 0, 0, 0, 0, 120129321, 0, 0, 0, 141494382, 0,
+ 143591567, 0, 0, },
+ { 1114128, 2293795, 4587583, 8257631, 9633938, 10813603, 11731123,
+ 12845251, 14680286, 15270121, 15925491, 19661092, 20382007,
+ 24969543, 26149263, 27656613, 28639669, 31392222, 32768500,
+ 35586591, 37225015, 39387737, 39780959, 40043107, 40698477,
+ 41419384, 44696233, 52495090, 57738081, 63439804, 66782202,
+ 69927976, 73925736, 76809359, 79824063, 81134806, 81921250,
+ 89785673, 94307742, 100795894, 107939439, 112330415,
+ 114427602, 120588074, 126158721, 134416381, 137824310,
+ 141559920, 142542975, 143853712, 145033381, 145950899, },
+ { 1179666, 0, 0, 0, 9699476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26280336,
+ 0, 0, 0, 0, 0, 38076985, 0, 0, 0, 0, 0, 45220523, 52560674,
+ 0, 0, 67175420, 69993516, 0, 0, 79889603, 0, 0, 89916763,
+ 94373280, 101451267, 108136048, 0, 114493139, 120784689,
+ 126355334, 134481924, 138414136, 141625457, 142608512, 0,
+ 0, 0, },
+ { 0, 0, 0, 0, 9896085, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 33292789, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67830786, 0, 0,
+ 0, 80020676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127403913, 0, 0, 0,
+ 0, 0, 0, 0, },
+ { 1310739, 2359332, 4653127, 0, 0, 0, 12189876, 0, 0, 0, 0, 0, 0,
+ 0, 26345874, 28246439, 0, 31457760, 0, 35652128, 38142534,
+ 0, 0, 0, 0, 0, 45351603, 52757283, 57869170, 63636425,
+ 67961868, 71304237, 73991273, 0, 0, 0, 0, 90309981, 0,
+ 101910029, 108988019, 114034355, 0, 120850228, 127469465,
+ 135464965, 138741825, 141690994, 142739585, 143984788, 0,
+ 0, },
+ { 1441813, 2424869, 4718664, 8388735, 10027160, 10879142, 12255419,
+ 12976325, 14745825, 15401194, 15991028, 19857709, 20447544,
+ 25035134, 26542483, 28377520, 28705206, 31588833, 33358333,
+ 35783201, 38208071, 39453274, 39846496, 40108644, 40764014,
+ 41484921, 45613749, 53216038, 58196852, 63898572, 68158478,
+ 71369793, 74253418, 77005973, 80479430, 81265879, 81986787,
+ 90965347, 94504353, 103679508, 109250176, 114165453,
+ 114558676, 121243445, 127731610, 135727124, 138807366,
+ 142018675, 142805123, 144115862, 145098918, 146016436, },
+ { 1572887, 0, 0, 0, 10092698, 0, 12320956, 0, 14811362, 0, 0,
+ 19923248, 0, 25166207, 26739094, 0, 0, 0, 33423870, 0,
+ 38273608, 0, 0, 0, 0, 0, 45744825, 0, 58262393, 64095184,
+ 68355089, 0, 75170926, 0, 80610509, 0, 0, 91817325, 0,
+ 104203823, 109512324, 0, 0, 121636667, 128059294, 0,
+ 139069511, 0, 0, 0, 0, 0, },
+ { 1703961, 2490406, 4849737, 0, 10223771, 0, 0, 13107399, 15007971,
+ 15466732, 0, 0, 20513081, 25231745, 26870169, 0, 0,
+ 31654371, 34275839, 0, 38404681, 0, 0, 0, 40829551, 0,
+ 45875899, 53609261, 59900794, 64226259, 68551700, 0, 0, 0,
+ 80807119, 81331417, 0, 91948410, 94700963, 104465975,
+ 109643400, 114230991, 114951893, 121702209, 131663779, 0,
+ 139266123, 0, 0, 144246936, 145295527, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27132315, 0, 0, 0, 0,
+ 0, 0, 39518811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75302012, 0,
+ 0, 0, 0, 92079484, 0, 105383483, 109708938, 0, 0, 0, 0, 0,
+ 0, 0, 0, 144312474, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 46006973, 0, 60031891, 64291797, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 105711177, 0, 0, 0, 0, 131991514, 135923736,
+ 139331662, 0, 0, 144378011, 0, 146147509, },
+ { 0, 0, 0, 0, 10354845, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68813847, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 121767746, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 60097429, 0, 0, 0, 0, 77137048, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 64422870, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 132122591, 0, 0, 142084216, 0, 0, 0, 0, }, };
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java
new file mode 100644
index 000000000..2b3f96625
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2008-2015 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Literal;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.common.Interner;
+
+public final class Portability {
+
+ // Allocating methods
+
+ /**
+ * Allocates a new local name object. In C++, the refcount must be set up in such a way that
+ * calling <code>releaseLocal</code> on the return value balances the refcount set by this method.
+ */
+ public static @Local String newLocalNameFromBuffer(@NoLength char[] buf, int offset, int length, Interner interner) {
+ return new String(buf, offset, length).intern();
+ }
+
+ public static String newStringFromBuffer(@NoLength char[] buf, int offset, int length
+ // CPPONLY: , TreeBuilder treeBuilder
+ ) {
+ return new String(buf, offset, length);
+ }
+
+ public static String newEmptyString() {
+ return "";
+ }
+
+ public static String newStringFromLiteral(@Literal String literal) {
+ return literal;
+ }
+
+ public static String newStringFromString(String string) {
+ return string;
+ }
+
+ // XXX get rid of this
+ public static char[] newCharArrayFromLocal(@Local String local) {
+ return local.toCharArray();
+ }
+
+ public static char[] newCharArrayFromString(String string) {
+ return string.toCharArray();
+ }
+
+ public static @Local String newLocalFromLocal(@Local String local, Interner interner) {
+ return local;
+ }
+
+ // Deallocation methods
+
+ public static void releaseString(String str) {
+ // No-op in Java
+ }
+
+ // Comparison methods
+
+ public static boolean localEqualsBuffer(@Local String local, @NoLength char[] buf, int offset, int length) {
+ if (local.length() != length) {
+ return false;
+ }
+ for (int i = 0; i < length; i++) {
+ if (local.charAt(i) != buf[offset + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(@Literal String lowerCaseLiteral,
+ String string) {
+ if (string == null) {
+ return false;
+ }
+ if (lowerCaseLiteral.length() > string.length()) {
+ return false;
+ }
+ for (int i = 0; i < lowerCaseLiteral.length(); i++) {
+ char c0 = lowerCaseLiteral.charAt(i);
+ char c1 = string.charAt(i);
+ if (c1 >= 'A' && c1 <= 'Z') {
+ c1 += 0x20;
+ }
+ if (c0 != c1) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean lowerCaseLiteralEqualsIgnoreAsciiCaseString(@Literal String lowerCaseLiteral,
+ String string) {
+ if (string == null) {
+ return false;
+ }
+ if (lowerCaseLiteral.length() != string.length()) {
+ return false;
+ }
+ for (int i = 0; i < lowerCaseLiteral.length(); i++) {
+ char c0 = lowerCaseLiteral.charAt(i);
+ char c1 = string.charAt(i);
+ if (c1 >= 'A' && c1 <= 'Z') {
+ c1 += 0x20;
+ }
+ if (c0 != c1) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean literalEqualsString(@Literal String literal, String string) {
+ return literal.equals(string);
+ }
+
+ public static boolean stringEqualsString(String one, String other) {
+ return one.equals(other);
+ }
+
+ public static void delete(Object o) {
+
+ }
+
+ public static void deleteArray(Object o) {
+
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java
new file mode 100644
index 000000000..fad5f43db
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+public class PushedLocation {
+ private final int line;
+
+ private final int linePrev;
+
+ private final int col;
+
+ private final int colPrev;
+
+ private final boolean nextCharOnNewLine;
+
+ private final String publicId;
+
+ private final String systemId;
+
+ private final PushedLocation next;
+
+ /**
+ * @param line
+ * @param linePrev
+ * @param col
+ * @param colPrev
+ * @param nextCharOnNewLine
+ * @param publicId
+ * @param systemId
+ * @param next
+ */
+ public PushedLocation(int line, int linePrev, int col, int colPrev,
+ boolean nextCharOnNewLine, String publicId, String systemId,
+ PushedLocation next) {
+ this.line = line;
+ this.linePrev = linePrev;
+ this.col = col;
+ this.colPrev = colPrev;
+ this.nextCharOnNewLine = nextCharOnNewLine;
+ this.publicId = publicId;
+ this.systemId = systemId;
+ this.next = next;
+ }
+
+ /**
+ * Returns the line.
+ *
+ * @return the line
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Returns the linePrev.
+ *
+ * @return the linePrev
+ */
+ public int getLinePrev() {
+ return linePrev;
+ }
+
+ /**
+ * Returns the col.
+ *
+ * @return the col
+ */
+ public int getCol() {
+ return col;
+ }
+
+ /**
+ * Returns the colPrev.
+ *
+ * @return the colPrev
+ */
+ public int getColPrev() {
+ return colPrev;
+ }
+
+ /**
+ * Returns the nextCharOnNewLine.
+ *
+ * @return the nextCharOnNewLine
+ */
+ public boolean isNextCharOnNewLine() {
+ return nextCharOnNewLine;
+ }
+
+ /**
+ * Returns the publicId.
+ *
+ * @return the publicId
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ * Returns the systemId.
+ *
+ * @return the systemId
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ /**
+ * Returns the next.
+ *
+ * @return the next
+ */
+ public PushedLocation getNext() {
+ return next;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java
new file mode 100644
index 000000000..b671bc903
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2011 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NsUri;
+
+final class StackNode<T> {
+ final int flags;
+
+ final @Local String name;
+
+ final @Local String popName;
+
+ final @NsUri String ns;
+
+ final T node;
+
+ // Only used on the list of formatting elements
+ HtmlAttributes attributes;
+
+ private int refcount = 1;
+
+ // [NOCPP[
+
+ private final TaintableLocatorImpl locator;
+
+ public TaintableLocatorImpl getLocator() {
+ return locator;
+ }
+
+ // ]NOCPP]
+
+ @Inline public int getFlags() {
+ return flags;
+ }
+
+ public int getGroup() {
+ return flags & ElementName.GROUP_MASK;
+ }
+
+ public boolean isScoping() {
+ return (flags & ElementName.SCOPING) != 0;
+ }
+
+ public boolean isSpecial() {
+ return (flags & ElementName.SPECIAL) != 0;
+ }
+
+ public boolean isFosterParenting() {
+ return (flags & ElementName.FOSTER_PARENTING) != 0;
+ }
+
+ public boolean isHtmlIntegrationPoint() {
+ return (flags & ElementName.HTML_INTEGRATION_POINT) != 0;
+ }
+
+ // [NOCPP[
+
+ public boolean isOptionalEndTag() {
+ return (flags & ElementName.OPTIONAL_END_TAG) != 0;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * Constructor for copying. This doesn't take another <code>StackNode</code>
+ * because in C++ the caller is reponsible for reobtaining the local names
+ * from another interner.
+ *
+ * @param flags
+ * @param ns
+ * @param name
+ * @param node
+ * @param popName
+ * @param attributes
+ */
+ StackNode(int flags, @NsUri String ns, @Local String name, T node,
+ @Local String popName, HtmlAttributes attributes
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = flags;
+ this.name = name;
+ this.popName = popName;
+ this.ns = ns;
+ this.node = node;
+ this.attributes = attributes;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Short hand for well-known HTML elements.
+ *
+ * @param elementName
+ * @param node
+ */
+ StackNode(ElementName elementName, T node
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = elementName.getFlags();
+ this.name = elementName.name;
+ this.popName = elementName.name;
+ this.ns = "http://www.w3.org/1999/xhtml";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ assert !elementName.isCustom() : "Don't use this constructor for custom elements.";
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Constructor for HTML formatting elements.
+ *
+ * @param elementName
+ * @param node
+ * @param attributes
+ */
+ StackNode(ElementName elementName, T node, HtmlAttributes attributes
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = elementName.getFlags();
+ this.name = elementName.name;
+ this.popName = elementName.name;
+ this.ns = "http://www.w3.org/1999/xhtml";
+ this.node = node;
+ this.attributes = attributes;
+ this.refcount = 1;
+ assert !elementName.isCustom() : "Don't use this constructor for custom elements.";
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * The common-case HTML constructor.
+ *
+ * @param elementName
+ * @param node
+ * @param popName
+ */
+ StackNode(ElementName elementName, T node, @Local String popName
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = elementName.getFlags();
+ this.name = elementName.name;
+ this.popName = popName;
+ this.ns = "http://www.w3.org/1999/xhtml";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Constructor for SVG elements. Note that the order of the arguments is
+ * what distinguishes this from the HTML constructor. This is ugly, but
+ * AFAICT the least disruptive way to make this work with Java's generics
+ * and without unnecessary branches. :-(
+ *
+ * @param elementName
+ * @param popName
+ * @param node
+ */
+ StackNode(ElementName elementName, @Local String popName, T node
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = prepareSvgFlags(elementName.getFlags());
+ this.name = elementName.name;
+ this.popName = popName;
+ this.ns = "http://www.w3.org/2000/svg";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Constructor for MathML.
+ *
+ * @param elementName
+ * @param node
+ * @param popName
+ * @param markAsIntegrationPoint
+ */
+ StackNode(ElementName elementName, T node, @Local String popName,
+ boolean markAsIntegrationPoint
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = prepareMathFlags(elementName.getFlags(),
+ markAsIntegrationPoint);
+ this.name = elementName.name;
+ this.popName = popName;
+ this.ns = "http://www.w3.org/1998/Math/MathML";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ private static int prepareSvgFlags(int flags) {
+ flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING
+ | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG);
+ if ((flags & ElementName.SCOPING_AS_SVG) != 0) {
+ flags |= (ElementName.SCOPING | ElementName.SPECIAL | ElementName.HTML_INTEGRATION_POINT);
+ }
+ return flags;
+ }
+
+ private static int prepareMathFlags(int flags,
+ boolean markAsIntegrationPoint) {
+ flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING
+ | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG);
+ if ((flags & ElementName.SCOPING_AS_MATHML) != 0) {
+ flags |= (ElementName.SCOPING | ElementName.SPECIAL);
+ }
+ if (markAsIntegrationPoint) {
+ flags |= ElementName.HTML_INTEGRATION_POINT;
+ }
+ return flags;
+ }
+
+ @SuppressWarnings("unused") private void destructor() {
+ Portability.delete(attributes);
+ }
+
+ public void dropAttributes() {
+ attributes = null;
+ }
+
+ // [NOCPP[
+ /**
+ * @see java.lang.Object#toString()
+ */
+ @Override public @Local String toString() {
+ return name;
+ }
+
+ // ]NOCPP]
+
+ public void retain() {
+ refcount++;
+ }
+
+ public void release() {
+ refcount--;
+ if (refcount == 0) {
+ Portability.delete(this);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java
new file mode 100644
index 000000000..d79641bcb
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Auto;
+
+
+public class StateSnapshot<T> implements TreeBuilderState<T> {
+
+ private final @Auto StackNode<T>[] stack;
+
+ private final @Auto StackNode<T>[] listOfActiveFormattingElements;
+
+ private final @Auto int[] templateModeStack;
+
+ private final T formPointer;
+
+ private final T headPointer;
+
+ private final T deepTreeSurrogateParent;
+
+ private final int mode;
+
+ private final int originalMode;
+
+ private final boolean framesetOk;
+
+ private final boolean needToDropLF;
+
+ private final boolean quirks;
+
+ /**
+ * @param stack
+ * @param listOfActiveFormattingElements
+ * @param templateModeStack
+ * @param formPointer
+ * @param headPointer
+ * @param deepTreeSurrogateParent
+ * @param mode
+ * @param originalMode
+ * @param framesetOk
+ * @param needToDropLF
+ * @param quirks
+ */
+ StateSnapshot(StackNode<T>[] stack,
+ StackNode<T>[] listOfActiveFormattingElements, int[] templateModeStack, T formPointer,
+ T headPointer, T deepTreeSurrogateParent, int mode, int originalMode,
+ boolean framesetOk, boolean needToDropLF, boolean quirks) {
+ this.stack = stack;
+ this.listOfActiveFormattingElements = listOfActiveFormattingElements;
+ this.templateModeStack = templateModeStack;
+ this.formPointer = formPointer;
+ this.headPointer = headPointer;
+ this.deepTreeSurrogateParent = deepTreeSurrogateParent;
+ this.mode = mode;
+ this.originalMode = originalMode;
+ this.framesetOk = framesetOk;
+ this.needToDropLF = needToDropLF;
+ this.quirks = quirks;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack()
+ */
+ public StackNode<T>[] getStack() {
+ return stack;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack()
+ */
+ public int[] getTemplateModeStack() {
+ return templateModeStack;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements()
+ */
+ public StackNode<T>[] getListOfActiveFormattingElements() {
+ return listOfActiveFormattingElements;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer()
+ */
+ public T getFormPointer() {
+ return formPointer;
+ }
+
+ /**
+ * Returns the headPointer.
+ *
+ * @return the headPointer
+ */
+ public T getHeadPointer() {
+ return headPointer;
+ }
+
+ /**
+ * Returns the deepTreeSurrogateParent.
+ *
+ * @return the deepTreeSurrogateParent
+ */
+ public T getDeepTreeSurrogateParent() {
+ return deepTreeSurrogateParent;
+ }
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public int getMode() {
+ return mode;
+ }
+
+ /**
+ * Returns the originalMode.
+ *
+ * @return the originalMode
+ */
+ public int getOriginalMode() {
+ return originalMode;
+ }
+
+ /**
+ * Returns the framesetOk.
+ *
+ * @return the framesetOk
+ */
+ public boolean isFramesetOk() {
+ return framesetOk;
+ }
+
+ /**
+ * Returns the needToDropLF.
+ *
+ * @return the needToDropLF
+ */
+ public boolean isNeedToDropLF() {
+ return needToDropLF;
+ }
+
+ /**
+ * Returns the quirks.
+ *
+ * @return the quirks
+ */
+ public boolean isQuirks() {
+ return quirks;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength()
+ */
+ public int getListOfActiveFormattingElementsLength() {
+ return listOfActiveFormattingElements.length;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength()
+ */
+ public int getStackLength() {
+ return stack.length;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength()
+ */
+ public int getTemplateModeStackLength() {
+ return templateModeStack.length;
+ }
+
+ @SuppressWarnings("unused") private void destructor() {
+ for (int i = 0; i < stack.length; i++) {
+ stack[i].release();
+ }
+ for (int i = 0; i < listOfActiveFormattingElements.length; i++) {
+ if (listOfActiveFormattingElements[i] != null) {
+ listOfActiveFormattingElements[i].release();
+ }
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java
new file mode 100644
index 000000000..37cdb75d3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import org.xml.sax.Locator;
+
+public class TaintableLocatorImpl extends LocatorImpl {
+
+ private boolean tainted;
+
+ public TaintableLocatorImpl(Locator locator) {
+ super(locator);
+ this.tainted = false;
+ }
+
+ public void markTainted() {
+ tainted = true;
+ }
+
+ public boolean isTainted() {
+ return tainted;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
new file mode 100644
index 000000000..125ef3266
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
@@ -0,0 +1,7080 @@
+/*
+ * Copyright (c) 2005-2007 Henri Sivonen
+ * Copyright (c) 2007-2015 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
+ * Foundation, and Opera Software ASA.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * The comments following this one that use the same comment syntax as this
+ * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007
+ * amended as of June 18 2008 and May 31 2010.
+ * That document came with this statement:
+ * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and
+ * Opera Software ASA. You are granted a license to use, reproduce and
+ * create derivative works of this document."
+ */
+
+package nu.validator.htmlparser.impl;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.CharacterName;
+import nu.validator.htmlparser.annotation.Const;
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.common.EncodingDeclarationHandler;
+import nu.validator.htmlparser.common.Interner;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+/**
+ * An implementation of
+ * https://html.spec.whatwg.org/multipage/syntax.html#tokenization
+ *
+ * This class implements the <code>Locator</code> interface. This is not an
+ * incidental implementation detail: Users of this class are encouraged to make
+ * use of the <code>Locator</code> nature.
+ *
+ * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
+ * can be configured to treat these conditions as fatal or to coerce the infoset
+ * to something that XML 1.0 allows.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class Tokenizer implements Locator {
+
+ private static final int DATA_AND_RCDATA_MASK = ~1;
+
+ public static final int DATA = 0;
+
+ public static final int RCDATA = 1;
+
+ public static final int SCRIPT_DATA = 2;
+
+ public static final int RAWTEXT = 3;
+
+ public static final int SCRIPT_DATA_ESCAPED = 4;
+
+ public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
+
+ public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
+
+ public static final int ATTRIBUTE_VALUE_UNQUOTED = 7;
+
+ public static final int PLAINTEXT = 8;
+
+ public static final int TAG_OPEN = 9;
+
+ public static final int CLOSE_TAG_OPEN = 10;
+
+ public static final int TAG_NAME = 11;
+
+ public static final int BEFORE_ATTRIBUTE_NAME = 12;
+
+ public static final int ATTRIBUTE_NAME = 13;
+
+ public static final int AFTER_ATTRIBUTE_NAME = 14;
+
+ public static final int BEFORE_ATTRIBUTE_VALUE = 15;
+
+ public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
+
+ public static final int BOGUS_COMMENT = 17;
+
+ public static final int MARKUP_DECLARATION_OPEN = 18;
+
+ public static final int DOCTYPE = 19;
+
+ public static final int BEFORE_DOCTYPE_NAME = 20;
+
+ public static final int DOCTYPE_NAME = 21;
+
+ public static final int AFTER_DOCTYPE_NAME = 22;
+
+ public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
+
+ public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
+
+ public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
+
+ public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
+
+ public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
+
+ public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
+
+ public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
+
+ public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
+
+ public static final int BOGUS_DOCTYPE = 31;
+
+ public static final int COMMENT_START = 32;
+
+ public static final int COMMENT_START_DASH = 33;
+
+ public static final int COMMENT = 34;
+
+ public static final int COMMENT_END_DASH = 35;
+
+ public static final int COMMENT_END = 36;
+
+ public static final int COMMENT_END_BANG = 37;
+
+ public static final int NON_DATA_END_TAG_NAME = 38;
+
+ public static final int MARKUP_DECLARATION_HYPHEN = 39;
+
+ public static final int MARKUP_DECLARATION_OCTYPE = 40;
+
+ public static final int DOCTYPE_UBLIC = 41;
+
+ public static final int DOCTYPE_YSTEM = 42;
+
+ public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
+
+ public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
+
+ public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
+
+ public static final int CONSUME_CHARACTER_REFERENCE = 46;
+
+ public static final int CONSUME_NCR = 47;
+
+ public static final int CHARACTER_REFERENCE_TAIL = 48;
+
+ public static final int HEX_NCR_LOOP = 49;
+
+ public static final int DECIMAL_NRC_LOOP = 50;
+
+ public static final int HANDLE_NCR_VALUE = 51;
+
+ public static final int HANDLE_NCR_VALUE_RECONSUME = 52;
+
+ public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53;
+
+ public static final int SELF_CLOSING_START_TAG = 54;
+
+ public static final int CDATA_START = 55;
+
+ public static final int CDATA_SECTION = 56;
+
+ public static final int CDATA_RSQB = 57;
+
+ public static final int CDATA_RSQB_RSQB = 58;
+
+ public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59;
+
+ public static final int SCRIPT_DATA_ESCAPE_START = 60;
+
+ public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61;
+
+ public static final int SCRIPT_DATA_ESCAPED_DASH = 62;
+
+ public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
+
+ public static final int BOGUS_COMMENT_HYPHEN = 64;
+
+ public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
+
+ public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
+
+ public static final int PROCESSING_INSTRUCTION = 73;
+
+ public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
+
+ /**
+ * Magic value for UTF-16 operations.
+ */
+ private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
+
+ /**
+ * UTF-16 code unit array containing less than and greater than for emitting
+ * those characters on certain parse errors.
+ */
+ private static final @NoLength char[] LT_GT = { '<', '>' };
+
+ /**
+ * UTF-16 code unit array containing less than and solidus for emitting
+ * those characters on certain parse errors.
+ */
+ private static final @NoLength char[] LT_SOLIDUS = { '<', '/' };
+
+ /**
+ * UTF-16 code unit array containing ]] for emitting those characters on
+ * state transitions.
+ */
+ private static final @NoLength char[] RSQB_RSQB = { ']', ']' };
+
+ /**
+ * Array version of U+FFFD.
+ */
+ private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+
+ // [NOCPP[
+
+ /**
+ * Array version of space.
+ */
+ private static final @NoLength char[] SPACE = { ' ' };
+
+ // ]NOCPP]
+
+ /**
+ * Array version of line feed.
+ */
+ private static final @NoLength char[] LF = { '\n' };
+
+ /**
+ * "CDATA[" as <code>char[]</code>
+ */
+ private static final @NoLength char[] CDATA_LSQB = { 'C', 'D', 'A', 'T',
+ 'A', '[' };
+
+ /**
+ * "octype" as <code>char[]</code>
+ */
+ private static final @NoLength char[] OCTYPE = { 'o', 'c', 't', 'y', 'p',
+ 'e' };
+
+ /**
+ * "ublic" as <code>char[]</code>
+ */
+ private static final @NoLength char[] UBLIC = { 'u', 'b', 'l', 'i', 'c' };
+
+ /**
+ * "ystem" as <code>char[]</code>
+ */
+ private static final @NoLength char[] YSTEM = { 'y', 's', 't', 'e', 'm' };
+
+ private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' };
+
+ private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' };
+
+ private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' };
+
+ private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't',
+ 'e', 'x', 't' };
+
+ private static final char[] XMP_ARR = { 'x', 'm', 'p' };
+
+ private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r',
+ 'e', 'a' };
+
+ private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' };
+
+ private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e',
+ 'd' };
+
+ private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i',
+ 'p', 't' };
+
+ private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm',
+ 'e', 's' };
+
+ /**
+ * The token handler.
+ */
+ protected final TokenHandler tokenHandler;
+
+ protected EncodingDeclarationHandler encodingDeclarationHandler;
+
+ // [NOCPP[
+
+ /**
+ * The error handler.
+ */
+ protected ErrorHandler errorHandler;
+
+ // ]NOCPP]
+
+ /**
+ * Whether the previous char read was CR.
+ */
+ protected boolean lastCR;
+
+ protected int stateSave;
+
+ private int returnStateSave;
+
+ protected int index;
+
+ private boolean forceQuirks;
+
+ private char additional;
+
+ private int entCol;
+
+ private int firstCharKey;
+
+ private int lo;
+
+ private int hi;
+
+ private int candidate;
+
+ private int charRefBufMark;
+
+ protected int value;
+
+ private boolean seenDigits;
+
+ protected int cstart;
+
+ /**
+ * The SAX public id for the resource being tokenized. (Only passed to back
+ * as part of locator data.)
+ */
+ private String publicId;
+
+ /**
+ * The SAX system id for the resource being tokenized. (Only passed to back
+ * as part of locator data.)
+ */
+ private String systemId;
+
+ /**
+ * Buffer for bufferable things other than those that fit the description
+ * of <code>charRefBuf</code>.
+ */
+ private @Auto char[] strBuf;
+
+ /**
+ * Number of significant <code>char</code>s in <code>strBuf</code>.
+ */
+ private int strBufLen;
+
+ /**
+ * Buffer for characters that might form a character reference but may
+ * end up not forming one.
+ */
+ private final @Auto char[] charRefBuf;
+
+ /**
+ * Number of significant <code>char</code>s in <code>charRefBuf</code>.
+ */
+ private int charRefBufLen;
+
+ /**
+ * Buffer for expanding NCRs falling into the Basic Multilingual Plane.
+ */
+ private final @Auto char[] bmpChar;
+
+ /**
+ * Buffer for expanding astral NCRs.
+ */
+ private final @Auto char[] astralChar;
+
+ /**
+ * The element whose end tag closes the current CDATA or RCDATA element.
+ */
+ protected ElementName endTagExpectation = null;
+
+ private char[] endTagExpectationAsArray; // not @Auto!
+
+ /**
+ * <code>true</code> if tokenizing an end tag
+ */
+ protected boolean endTag;
+
+ /**
+ * The current tag token name.
+ */
+ private ElementName tagName = null;
+
+ /**
+ * The current attribute name.
+ */
+ protected AttributeName attributeName = null;
+
+ // [NOCPP[
+
+ /**
+ * Whether comment tokens are emitted.
+ */
+ private boolean wantsComments = false;
+
+ /**
+ * <code>true</code> when HTML4-specific additional errors are requested.
+ */
+ protected boolean html4;
+
+ /**
+ * Whether the stream is past the first 1024 bytes.
+ */
+ private boolean metaBoundaryPassed;
+
+ // ]NOCPP]
+
+ /**
+ * The name of the current doctype token.
+ */
+ private @Local String doctypeName;
+
+ /**
+ * The public id of the current doctype token.
+ */
+ private String publicIdentifier;
+
+ /**
+ * The system id of the current doctype token.
+ */
+ private String systemIdentifier;
+
+ /**
+ * The attribute holder.
+ */
+ private HtmlAttributes attributes;
+
+ // [NOCPP[
+
+ /**
+ * The policy for vertical tab and form feed.
+ */
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ /**
+ * The policy for comments.
+ */
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata;
+
+ private int mappingLangToXmlLang;
+
+ // ]NOCPP]
+
+ private final boolean newAttributesEachTime;
+
+ private boolean shouldSuspend;
+
+ protected boolean confident;
+
+ private int line;
+
+ /*
+ * The line number of the current attribute. First set to the line of the
+ * attribute name and if there is a value, set to the line the value
+ * started on.
+ */
+ // CPPONLY: private int attributeLine;
+
+ private Interner interner;
+
+ // CPPONLY: private boolean viewingXmlSource;
+
+ // [NOCPP[
+
+ protected LocatorImpl ampersandLocation;
+
+ public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) {
+ this.tokenHandler = tokenHandler;
+ this.encodingDeclarationHandler = null;
+ this.newAttributesEachTime = newAttributesEachTime;
+ // &CounterClockwiseContourIntegral; is the longest valid char ref and
+ // the semicolon never gets appended to the buffer.
+ this.charRefBuf = new char[32];
+ this.bmpChar = new char[1];
+ this.astralChar = new char[2];
+ this.tagName = null;
+ this.attributeName = null;
+ this.doctypeName = null;
+ this.publicIdentifier = null;
+ this.systemIdentifier = null;
+ this.attributes = null;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * The constructor.
+ *
+ * @param tokenHandler
+ * the handler for receiving tokens
+ */
+ public Tokenizer(TokenHandler tokenHandler
+ // CPPONLY: , boolean viewingXmlSource
+ ) {
+ this.tokenHandler = tokenHandler;
+ this.encodingDeclarationHandler = null;
+ // [NOCPP[
+ this.newAttributesEachTime = false;
+ // ]NOCPP]
+ // &CounterClockwiseContourIntegral; is the longest valid char ref and
+ // the semicolon never gets appended to the buffer.
+ this.charRefBuf = new char[32];
+ this.bmpChar = new char[1];
+ this.astralChar = new char[2];
+ this.tagName = null;
+ this.attributeName = null;
+ this.doctypeName = null;
+ this.publicIdentifier = null;
+ this.systemIdentifier = null;
+ // [NOCPP[
+ this.attributes = null;
+ // ]NOCPP]
+ // CPPONLY: this.attributes = tokenHandler.HasBuilder() ? new HtmlAttributes(mappingLangToXmlLang) : null;
+ // CPPONLY: this.newAttributesEachTime = !tokenHandler.HasBuilder();
+ // CPPONLY: this.viewingXmlSource = viewingXmlSource;
+ }
+
+ public void setInterner(Interner interner) {
+ this.interner = interner;
+ }
+
+ public void initLocation(String newPublicId, String newSystemId) {
+ this.systemId = newSystemId;
+ this.publicId = newPublicId;
+
+ }
+
+ // CPPONLY: boolean isViewingXmlSource() {
+ // CPPONLY: return viewingXmlSource;
+ // CPPONLY: }
+
+ // [NOCPP[
+
+ /**
+ * Returns the mappingLangToXmlLang.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang == AttributeName.HTML_LANG;
+ }
+
+ /**
+ * Sets the mappingLangToXmlLang.
+ *
+ * @param mappingLangToXmlLang
+ * the mappingLangToXmlLang to set
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG
+ : AttributeName.HTML;
+ }
+
+ /**
+ * Sets the error handler.
+ *
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler eh) {
+ this.errorHandler = eh;
+ }
+
+ public ErrorHandler getErrorHandler() {
+ return this.errorHandler;
+ }
+
+ /**
+ * Sets the commentPolicy.
+ *
+ * @param commentPolicy
+ * the commentPolicy to set
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ }
+
+ /**
+ * Sets the contentNonXmlCharPolicy.
+ *
+ * @param contentNonXmlCharPolicy
+ * the contentNonXmlCharPolicy to set
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) {
+ throw new IllegalArgumentException(
+ "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
+ }
+ }
+
+ /**
+ * Sets the contentSpacePolicy.
+ *
+ * @param contentSpacePolicy
+ * the contentSpacePolicy to set
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ }
+
+ /**
+ * Sets the xmlnsPolicy.
+ *
+ * @param xmlnsPolicy
+ * the xmlnsPolicy to set
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ }
+
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ }
+
+ /**
+ * Sets the html4ModeCompatibleWithXhtml1Schemata.
+ *
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ * the html4ModeCompatibleWithXhtml1Schemata to set
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ // ]NOCPP]
+
+ // For the token handler to call
+ /**
+ * Sets the tokenizer state and the associated element name. This should
+ * only ever used to put the tokenizer into one of the states that have
+ * a special end tag expectation.
+ *
+ * @param specialTokenizerState
+ * the tokenizer state to set
+ */
+ public void setState(int specialTokenizerState) {
+ this.stateSave = specialTokenizerState;
+ this.endTagExpectation = null;
+ this.endTagExpectationAsArray = null;
+ }
+
+ // [NOCPP[
+
+ /**
+ * Sets the tokenizer state and the associated element name. This should
+ * only ever used to put the tokenizer into one of the states that have
+ * a special end tag expectation. For use from the tokenizer test harness.
+ *
+ * @param specialTokenizerState
+ * the tokenizer state to set
+ * @param endTagExpectation
+ * the expected end tag for transitioning back to normal
+ */
+ public void setStateAndEndTagExpectation(int specialTokenizerState,
+ @Local String endTagExpectation) {
+ this.stateSave = specialTokenizerState;
+ if (specialTokenizerState == Tokenizer.DATA) {
+ return;
+ }
+ @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation);
+ this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0,
+ asArray.length, interner);
+ endTagExpectationToArray();
+ }
+
+ // ]NOCPP]
+
+ /**
+ * Sets the tokenizer state and the associated element name. This should
+ * only ever used to put the tokenizer into one of the states that have
+ * a special end tag expectation.
+ *
+ * @param specialTokenizerState
+ * the tokenizer state to set
+ * @param endTagExpectation
+ * the expected end tag for transitioning back to normal
+ */
+ public void setStateAndEndTagExpectation(int specialTokenizerState,
+ ElementName endTagExpectation) {
+ this.stateSave = specialTokenizerState;
+ this.endTagExpectation = endTagExpectation;
+ endTagExpectationToArray();
+ }
+
+ private void endTagExpectationToArray() {
+ switch (endTagExpectation.getGroup()) {
+ case TreeBuilder.TITLE:
+ endTagExpectationAsArray = TITLE_ARR;
+ return;
+ case TreeBuilder.SCRIPT:
+ endTagExpectationAsArray = SCRIPT_ARR;
+ return;
+ case TreeBuilder.STYLE:
+ endTagExpectationAsArray = STYLE_ARR;
+ return;
+ case TreeBuilder.PLAINTEXT:
+ endTagExpectationAsArray = PLAINTEXT_ARR;
+ return;
+ case TreeBuilder.XMP:
+ endTagExpectationAsArray = XMP_ARR;
+ return;
+ case TreeBuilder.TEXTAREA:
+ endTagExpectationAsArray = TEXTAREA_ARR;
+ return;
+ case TreeBuilder.IFRAME:
+ endTagExpectationAsArray = IFRAME_ARR;
+ return;
+ case TreeBuilder.NOEMBED:
+ endTagExpectationAsArray = NOEMBED_ARR;
+ return;
+ case TreeBuilder.NOSCRIPT:
+ endTagExpectationAsArray = NOSCRIPT_ARR;
+ return;
+ case TreeBuilder.NOFRAMES:
+ endTagExpectationAsArray = NOFRAMES_ARR;
+ return;
+ default:
+ assert false: "Bad end tag expectation.";
+ return;
+ }
+ }
+
+ /**
+ * For C++ use only.
+ */
+ public void setLineNumber(int line) {
+ // CPPONLY: this.attributeLine = line; // XXX is this needed?
+ this.line = line;
+ }
+
+ // start Locator impl
+
+ /**
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ @Inline public int getLineNumber() {
+ return line;
+ }
+
+ // [NOCPP[
+
+ /**
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ @Inline public int getColumnNumber() {
+ return -1;
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ // end Locator impl
+
+ // end public API
+
+ public void notifyAboutMetaBoundary() {
+ metaBoundaryPassed = true;
+ }
+
+ void turnOnAdditionalHtml4Errors() {
+ html4 = true;
+ }
+
+ // ]NOCPP]
+
+ HtmlAttributes emptyAttributes() {
+ // [NOCPP[
+ if (newAttributesEachTime) {
+ return new HtmlAttributes(mappingLangToXmlLang);
+ } else {
+ // ]NOCPP]
+ return HtmlAttributes.EMPTY_ATTRIBUTES;
+ // [NOCPP[
+ }
+ // ]NOCPP]
+ }
+
+ @Inline private void appendCharRefBuf(char c) {
+ // CPPONLY: assert charRefBufLen < charRefBuf.length:
+ // CPPONLY: "RELEASE: Attempted to overrun charRefBuf!";
+ charRefBuf[charRefBufLen++] = c;
+ }
+
+ private void emitOrAppendCharRefBuf(int returnState) throws SAXException {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendCharRefBufToStrBuf();
+ } else {
+ if (charRefBufLen > 0) {
+ tokenHandler.characters(charRefBuf, 0, charRefBufLen);
+ charRefBufLen = 0;
+ }
+ }
+ }
+
+ @Inline private void clearStrBufAfterUse() {
+ strBufLen = 0;
+ }
+
+ @Inline private void clearStrBufBeforeUse() {
+ assert strBufLen == 0: "strBufLen not reset after previous use!";
+ strBufLen = 0; // no-op in the absence of bugs
+ }
+
+ @Inline private void clearStrBufAfterOneHyphen() {
+ assert strBufLen == 1: "strBufLen length not one!";
+ assert strBuf[0] == '-': "strBuf does not start with a hyphen!";
+ strBufLen = 0;
+ }
+
+ /**
+ * Appends to the buffer.
+ *
+ * @param c
+ * the UTF-16 code unit to append
+ */
+ @Inline private void appendStrBuf(char c) {
+ // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient.";
+ // CPPONLY: if (strBufLen == strBuf.length) {
+ // CPPONLY: if (!EnsureBufferSpace(1)) {
+ // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure";
+ // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not
+ // CPPONLY: }
+ strBuf[strBufLen++] = c;
+ }
+
+ /**
+ * The buffer as a String. Currently only used for error reporting.
+ *
+ * <p>
+ * C++ memory note: The return value must be released.
+ *
+ * @return the buffer as a string
+ */
+ protected String strBufToString() {
+ String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , tokenHandler
+ );
+ clearStrBufAfterUse();
+ return str;
+ }
+
+ /**
+ * Returns the buffer as a local name. The return value is released in
+ * emitDoctypeToken().
+ *
+ * @return the buffer as local name
+ */
+ private void strBufToDoctypeName() {
+ doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
+ interner);
+ clearStrBufAfterUse();
+ }
+
+ /**
+ * Emits the buffer as character tokens.
+ *
+ * @throws SAXException
+ * if the token handler threw
+ */
+ private void emitStrBuf() throws SAXException {
+ if (strBufLen > 0) {
+ tokenHandler.characters(strBuf, 0, strBufLen);
+ clearStrBufAfterUse();
+ }
+ }
+
+ @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
+ // [NOCPP[
+ switch (commentPolicy) {
+ case ALTER_INFOSET:
+ appendStrBuf(' ');
+ // FALLTHROUGH
+ case ALLOW:
+ warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ // ]NOCPP]
+ appendStrBuf('-');
+ // [NOCPP[
+ break;
+ case FATAL:
+ fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ break;
+ }
+ // ]NOCPP]
+ }
+
+ // [NOCPP[
+ private void maybeAppendSpaceToBogusComment() throws SAXException {
+ switch (commentPolicy) {
+ case ALTER_INFOSET:
+ appendStrBuf(' ');
+ // FALLTHROUGH
+ case ALLOW:
+ warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
+ break;
+ case FATAL:
+ fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
+ break;
+ }
+ }
+
+ // ]NOCPP]
+
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
+ throws SAXException {
+ errConsecutiveHyphens();
+ // [NOCPP[
+ switch (commentPolicy) {
+ case ALTER_INFOSET:
+ strBufLen--;
+ // WARNING!!! This expands the worst case of the buffer length
+ // given the length of input!
+ appendStrBuf(' ');
+ appendStrBuf('-');
+ // FALLTHROUGH
+ case ALLOW:
+ warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ // ]NOCPP]
+ appendStrBuf(c);
+ // [NOCPP[
+ break;
+ case FATAL:
+ fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ break;
+ }
+ // ]NOCPP]
+ }
+
+ private void appendStrBuf(@NoLength char[] buffer, int offset, int length) {
+ int newLen = strBufLen + length;
+ // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient.";
+ // CPPONLY: if (strBuf.length < newLen) {
+ // CPPONLY: if (!EnsureBufferSpace(length)) {
+ // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure";
+ // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not
+ // CPPONLY: }
+ System.arraycopy(buffer, offset, strBuf, strBufLen, length);
+ strBufLen = newLen;
+ }
+
+ /**
+ * Append the contents of the char reference buffer to the main one.
+ */
+ @Inline private void appendCharRefBufToStrBuf() {
+ appendStrBuf(charRefBuf, 0, charRefBufLen);
+ charRefBufLen = 0;
+ }
+
+ /**
+ * Emits the current comment token.
+ *
+ * @param pos
+ * TODO
+ *
+ * @throws SAXException
+ */
+ private void emitComment(int provisionalHyphens, int pos)
+ throws SAXException {
+ // [NOCPP[
+ if (wantsComments) {
+ // ]NOCPP]
+ tokenHandler.comment(strBuf, 0, strBufLen
+ - provisionalHyphens);
+ // [NOCPP[
+ }
+ // ]NOCPP]
+ clearStrBufAfterUse();
+ cstart = pos + 1;
+ }
+
+ /**
+ * Flushes coalesced character tokens.
+ *
+ * @param buf
+ * TODO
+ * @param pos
+ * TODO
+ *
+ * @throws SAXException
+ */
+ protected void flushChars(@NoLength char[] buf, int pos)
+ throws SAXException {
+ if (pos > cstart) {
+ tokenHandler.characters(buf, cstart, pos - cstart);
+ }
+ cstart = Integer.MAX_VALUE;
+ }
+
+ /**
+ * Reports an condition that would make the infoset incompatible with XML
+ * 1.0 as fatal.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ * @throws SAXParseException
+ */
+ public void fatal(String message) throws SAXException {
+ SAXParseException spe = new SAXParseException(message, this);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ /**
+ * Reports a Parse Error.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ public void err(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.error(spe);
+ }
+
+ public void errTreeBuilder(String message) throws SAXException {
+ ErrorHandler eh = null;
+ if (tokenHandler instanceof TreeBuilder<?>) {
+ TreeBuilder<?> treeBuilder = (TreeBuilder<?>) tokenHandler;
+ eh = treeBuilder.getErrorHandler();
+ }
+ if (eh == null) {
+ eh = errorHandler;
+ }
+ if (eh == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, this);
+ eh.error(spe);
+ }
+
+ /**
+ * Reports a warning
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ public void warn(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.warning(spe);
+ }
+
+ private void strBufToElementNameString() {
+ tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
+ interner);
+ clearStrBufAfterUse();
+ }
+
+ private int emitCurrentTagToken(boolean selfClosing, int pos)
+ throws SAXException {
+ cstart = pos + 1;
+ maybeErrSlashInEndTag(selfClosing);
+ stateSave = Tokenizer.DATA;
+ HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
+ : attributes);
+ if (endTag) {
+ /*
+ * When an end tag token is emitted, the content model flag must be
+ * switched to the PCDATA state.
+ */
+ maybeErrAttributesOnEndTag(attrs);
+ // CPPONLY: if (!viewingXmlSource) {
+ tokenHandler.endTag(tagName);
+ // CPPONLY: }
+ // CPPONLY: if (newAttributesEachTime) {
+ // CPPONLY: Portability.delete(attributes);
+ // CPPONLY: attributes = null;
+ // CPPONLY: }
+ } else {
+ // CPPONLY: if (viewingXmlSource) {
+ // CPPONLY: assert newAttributesEachTime;
+ // CPPONLY: Portability.delete(attributes);
+ // CPPONLY: attributes = null;
+ // CPPONLY: } else {
+ tokenHandler.startTag(tagName, attrs, selfClosing);
+ // CPPONLY: }
+ }
+ tagName.release();
+ tagName = null;
+ if (newAttributesEachTime) {
+ attributes = null;
+ } else {
+ attributes.clear(mappingLangToXmlLang);
+ }
+ /*
+ * The token handler may have called setStateAndEndTagExpectation
+ * and changed stateSave since the start of this method.
+ */
+ return stateSave;
+ }
+
+ private void attributeNameComplete() throws SAXException {
+ attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen
+ // [NOCPP[
+ , namePolicy != XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ , interner);
+ clearStrBufAfterUse();
+
+ if (attributes == null) {
+ attributes = new HtmlAttributes(mappingLangToXmlLang);
+ }
+
+ /*
+ * When the user agent leaves the attribute name state (and before
+ * emitting the tag token, if appropriate), the complete attribute's
+ * name must be compared to the other attributes on the same token; if
+ * there is already an attribute on the token with the exact same name,
+ * then this is a parse error and the new attribute must be dropped,
+ * along with the value that gets associated with it (if any).
+ */
+ if (attributes.contains(attributeName)) {
+ errDuplicateAttribute();
+ attributeName.release();
+ attributeName = null;
+ }
+ }
+
+ private void addAttributeWithoutValue() throws SAXException {
+ noteAttributeWithoutValue();
+
+ // [NOCPP[
+ if (metaBoundaryPassed && AttributeName.CHARSET == attributeName
+ && ElementName.META == tagName) {
+ err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
+ }
+ // ]NOCPP]
+ if (attributeName != null) {
+ // [NOCPP[
+ if (html4) {
+ if (attributeName.isBoolean()) {
+ if (html4ModeCompatibleWithXhtml1Schemata) {
+ attributes.addAttribute(attributeName,
+ attributeName.getLocal(AttributeName.HTML),
+ xmlnsPolicy);
+ } else {
+ attributes.addAttribute(attributeName, "", xmlnsPolicy);
+ }
+ } else {
+ if (AttributeName.BORDER != attributeName) {
+ err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
+ attributes.addAttribute(attributeName, "", xmlnsPolicy);
+ }
+ }
+ } else {
+ if (AttributeName.SRC == attributeName
+ || AttributeName.HREF == attributeName) {
+ warn("Attribute \u201C"
+ + attributeName.getLocal(AttributeName.HTML)
+ + "\u201D without an explicit value seen. The attribute may be dropped by IE7.");
+ }
+ // ]NOCPP]
+ attributes.addAttribute(attributeName,
+ Portability.newEmptyString()
+ // [NOCPP[
+ , xmlnsPolicy
+ // ]NOCPP]
+ // CPPONLY: , attributeLine
+ );
+ // [NOCPP[
+ }
+ // ]NOCPP]
+ attributeName = null; // attributeName has been adopted by the
+ // |attributes| object
+ } else {
+ clearStrBufAfterUse();
+ }
+ }
+
+ private void addAttributeWithValue() throws SAXException {
+ // [NOCPP[
+ if (metaBoundaryPassed && ElementName.META == tagName
+ && AttributeName.CHARSET == attributeName) {
+ err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
+ }
+ // ]NOCPP]
+ if (attributeName != null) {
+ String val = strBufToString(); // Ownership transferred to
+ // HtmlAttributes
+ // CPPONLY: if (mViewSource) {
+ // CPPONLY: mViewSource.MaybeLinkifyAttributeValue(attributeName, val);
+ // CPPONLY: }
+ // [NOCPP[
+ if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
+ && attributeName.isCaseFolded()) {
+ val = newAsciiLowerCaseStringFromString(val);
+ }
+ // ]NOCPP]
+ attributes.addAttribute(attributeName, val
+ // [NOCPP[
+ , xmlnsPolicy
+ // ]NOCPP]
+ // CPPONLY: , attributeLine
+ );
+ attributeName = null; // attributeName has been adopted by the
+ // |attributes| object
+ } else {
+ // We have a duplicate attribute. Explicitly discard its value.
+ clearStrBufAfterUse();
+ }
+ }
+
+ // [NOCPP[
+
+ private static String newAsciiLowerCaseStringFromString(String str) {
+ if (str == null) {
+ return null;
+ }
+ char[] buf = new char[str.length()];
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ buf[i] = c;
+ }
+ return new String(buf);
+ }
+
+ protected void startErrorReporting() throws SAXException {
+
+ }
+
+ // ]NOCPP]
+
+ public void start() throws SAXException {
+ initializeWithoutStarting();
+ tokenHandler.startTokenization(this);
+ // [NOCPP[
+ startErrorReporting();
+ // ]NOCPP]
+ }
+
+ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
+ int state = stateSave;
+ int returnState = returnStateSave;
+ char c = '\u0000';
+ shouldSuspend = false;
+ lastCR = false;
+
+ int start = buffer.getStart();
+ int end = buffer.getEnd();
+
+ // In C++, the caller of tokenizeBuffer needs to do this explicitly.
+ // [NOCPP[
+ ensureBufferSpace(end - start);
+ // ]NOCPP]
+
+ /**
+ * The index of the last <code>char</code> read from <code>buf</code>.
+ */
+ int pos = start - 1;
+
+ /**
+ * The index of the first <code>char</code> in <code>buf</code> that is
+ * part of a coalesced run of character tokens or
+ * <code>Integer.MAX_VALUE</code> if there is not a current run being
+ * coalesced.
+ */
+ switch (state) {
+ case DATA:
+ case RCDATA:
+ case SCRIPT_DATA:
+ case PLAINTEXT:
+ case RAWTEXT:
+ case CDATA_SECTION:
+ case SCRIPT_DATA_ESCAPED:
+ case SCRIPT_DATA_ESCAPE_START:
+ case SCRIPT_DATA_ESCAPE_START_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START:
+ case SCRIPT_DATA_DOUBLE_ESCAPED:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END:
+ cstart = start;
+ break;
+ default:
+ cstart = Integer.MAX_VALUE;
+ break;
+ }
+
+ /**
+ * The number of <code>char</code>s in <code>buf</code> that have
+ * meaning. (The rest of the array is garbage and should not be
+ * examined.)
+ */
+ // CPPONLY: if (mViewSource) {
+ // CPPONLY: mViewSource.SetBuffer(buffer);
+ // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
+ // CPPONLY: } else {
+ // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: }
+ // [NOCPP[
+ pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
+ end);
+ // ]NOCPP]
+ if (pos == end) {
+ // exiting due to end of buffer
+ buffer.setStart(pos);
+ } else {
+ buffer.setStart(pos + 1);
+ }
+ return lastCR;
+ }
+
+ // [NOCPP[
+ private void ensureBufferSpace(int inputLength) throws SAXException {
+ // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
+ // Adding to the general worst case instead of only the
+ // TreeBuilder-exposed worst case to avoid re-introducing a bug when
+ // unifying the tokenizer and tree builder buffers in the future.
+ int worstCase = strBufLen + inputLength + charRefBufLen + 2;
+ tokenHandler.ensureBufferSpace(worstCase);
+ if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) {
+ // When altering infoset, if the comment contents are consecutive
+ // hyphens, each hyphen generates a space, too. These buffer
+ // contents never get emitted as characters() to the tokenHandler,
+ // which is why this calculation happens after the call to
+ // ensureBufferSpace on tokenHandler.
+ worstCase *= 2;
+ }
+ if (strBuf == null) {
+ // Add an arbitrary small value to avoid immediate reallocation
+ // once there are a few characters in the buffer.
+ strBuf = new char[worstCase + 128];
+ } else if (worstCase > strBuf.length) {
+ // HotSpot reportedly allocates memory with 8-byte accuracy, so
+ // there's no point in trying to do math here to avoid slop.
+ // Maybe we should add some small constant to worstCase here
+ // but not doing that without profiling. In C++ with jemalloc,
+ // the corresponding method should do math to round up here
+ // to avoid slop.
+ char[] newBuf = new char[worstCase];
+ System.arraycopy(strBuf, 0, newBuf, 0, strBufLen);
+ strBuf = newBuf;
+ }
+ }
+ // ]NOCPP]
+
+ @SuppressWarnings("unused") private int stateLoop(int state, char c,
+ int pos, @NoLength char[] buf, boolean reconsume, int returnState,
+ int endPos) throws SAXException {
+ /*
+ * Idioms used in this code:
+ *
+ *
+ * Consuming the next input character
+ *
+ * To consume the next input character, the code does this: if (++pos ==
+ * endPos) { break stateloop; } c = checkChar(buf, pos);
+ *
+ *
+ * Staying in a state
+ *
+ * When there's a state that the tokenizer may stay in over multiple
+ * input characters, the state has a wrapper |for(;;)| loop and staying
+ * in the state continues the loop.
+ *
+ *
+ * Switching to another state
+ *
+ * To switch to another state, the code sets the state variable to the
+ * magic number of the new state. Then it either continues stateloop or
+ * breaks out of the state's own wrapper loop if the target state is
+ * right after the current state in source order. (This is a partial
+ * workaround for Java's lack of goto.)
+ *
+ *
+ * Reconsume support
+ *
+ * The spec sometimes says that an input character is reconsumed in
+ * another state. If a state can ever be entered so that an input
+ * character can be reconsumed in it, the state's code starts with an
+ * |if (reconsume)| that sets reconsume to false and skips over the
+ * normal code for consuming a new character.
+ *
+ * To reconsume the current character in another state, the code sets
+ * |reconsume| to true and then switches to the other state.
+ *
+ *
+ * Emitting character tokens
+ *
+ * This method emits character tokens lazily. Whenever a new range of
+ * character tokens starts, the field cstart must be set to the start
+ * index of the range. The flushChars() method must be called at the end
+ * of a range to flush it.
+ *
+ *
+ * U+0000 handling
+ *
+ * The various states have to handle the replacement of U+0000 with
+ * U+FFFD. However, if U+0000 would be reconsumed in another state, the
+ * replacement doesn't need to happen, because it's handled by the
+ * reconsuming state.
+ *
+ *
+ * LF handling
+ *
+ * Every state needs to increment the line number upon LF unless the LF
+ * gets reconsumed by another state which increments the line number.
+ *
+ *
+ * CR handling
+ *
+ * Every state needs to handle CR unless the CR gets reconsumed and is
+ * handled by the reconsuming state. The CR needs to be handled as if it
+ * were and LF, the lastCR field must be set to true and then this
+ * method must return. The IO driver will then swallow the next
+ * character if it is an LF to coalesce CRLF.
+ */
+ stateloop: for (;;) {
+ switch (state) {
+ case DATA:
+ dataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in data state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the tag
+ * open state.
+ */
+ flushChars(buf, pos);
+
+ state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
+ break dataloop; // FALL THROUGH continue
+ // stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the input character as a
+ * character token.
+ *
+ * Stay in the data state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case TAG_OPEN:
+ tagopenloop: for (;;) {
+ /*
+ * The behavior of this state depends on the content
+ * model flag.
+ */
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * If the content model flag is set to the PCDATA state
+ * Consume the next input character:
+ */
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to U+005A
+ * LATIN CAPITAL LETTER Z Create a new start tag
+ * token,
+ */
+ endTag = false;
+ /*
+ * set its tag name to the lowercase version of the
+ * input character (add 0x0020 to the character's
+ * code point),
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf((char) (c + 0x20));
+ /* then switch to the tag name state. */
+ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
+ /*
+ * (Don't emit the token yet; further details will
+ * be filled in before it is emitted.)
+ */
+ break tagopenloop;
+ // continue stateloop;
+ } else if (c >= 'a' && c <= 'z') {
+ /*
+ * U+0061 LATIN SMALL LETTER A through to U+007A
+ * LATIN SMALL LETTER Z Create a new start tag
+ * token,
+ */
+ endTag = false;
+ /*
+ * set its tag name to the input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /* then switch to the tag name state. */
+ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
+ /*
+ * (Don't emit the token yet; further details will
+ * be filled in before it is emitted.)
+ */
+ break tagopenloop;
+ // continue stateloop;
+ }
+ switch (c) {
+ case '!':
+ /*
+ * U+0021 EXCLAMATION MARK (!) Switch to the
+ * markup declaration open state.
+ */
+ state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the close tag
+ * open state.
+ */
+ state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
+ continue stateloop;
+ case '?':
+ // CPPONLY: if (viewingXmlSource) {
+ // CPPONLY: state = transition(state,
+ // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
+ // CPPONLY: reconsume,
+ // CPPONLY: pos);
+ // CPPONLY: continue stateloop;
+ // CPPONLY: }
+ /*
+ * U+003F QUESTION MARK (?) Parse error.
+ */
+ errProcessingInstruction();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errLtGt();
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ * and a U+003E GREATER-THAN SIGN character
+ * token.
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 2);
+ /* Switch to the data state. */
+ cstart = pos + 1;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Anything else Parse error.
+ */
+ errBadCharAfterLt(c);
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in
+ * the data state.
+ */
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALL THROUGH DON'T REORDER
+ case TAG_NAME:
+ tagnameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ strBufToElementNameString();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before attribute name state.
+ */
+ strBufToElementNameString();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break tagnameloop;
+ // continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ strBufToElementNameString();
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ strBufToElementNameString();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Append the
+ * lowercase version of the current input
+ * character (add 0x0020 to the character's
+ * code point) to the current tag token's
+ * tag name.
+ */
+ c += 0x20;
+ }
+ /*
+ * Anything else Append the current input
+ * character to the current tag token's tag
+ * name.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the tag name state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_NAME:
+ beforeattributenameloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before attribute name state.
+ */
+ continue;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '\"':
+ case '\'':
+ case '<':
+ case '=':
+ /*
+ * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
+ * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
+ * SIGN (=) Parse error.
+ */
+ errBadCharBeforeAttributeNameOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ /*
+ * Anything else Start a new attribute in the
+ * current tag token.
+ */
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Set that
+ * attribute's name to the lowercase version
+ * of the current input character (add
+ * 0x0020 to the character's code point)
+ */
+ c += 0x20;
+ }
+ // CPPONLY: attributeLine = line;
+ /*
+ * Set that attribute's name to the current
+ * input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * and its value to the empty string.
+ */
+ // Will do later.
+ /*
+ * Switch to the attribute name state.
+ */
+ state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
+ break beforeattributenameloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_NAME:
+ attributenameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ attributeNameComplete();
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the after attribute name state.
+ */
+ attributeNameComplete();
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '=':
+ /*
+ * U+003D EQUALS SIGN (=) Switch to the before
+ * attribute value state.
+ */
+ attributeNameComplete();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
+ break attributenameloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '\"':
+ case '\'':
+ case '<':
+ /*
+ * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
+ * (') U+003C LESS-THAN SIGN (<) Parse error.
+ */
+ errQuoteOrLtInAttributeNameOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Append the
+ * lowercase version of the current input
+ * character (add 0x0020 to the character's
+ * code point) to the current attribute's
+ * name.
+ */
+ c += 0x20;
+ }
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's name.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute name state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_VALUE:
+ beforeattributevalueloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before attribute value state.
+ */
+ continue;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the
+ * attribute value (double-quoted) state.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
+ break beforeattributevalueloop;
+ // continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the attribute
+ * value (unquoted) state and reconsume this
+ * input character.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
+ noteUnquotedAttributeValue();
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the attribute
+ * value (single-quoted) state.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errAttributeValueMissing();
+ /*
+ * Emit the current tag token.
+ */
+ addAttributeWithoutValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '<':
+ case '=':
+ case '`':
+ /*
+ * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
+ * (=) U+0060 GRAVE ACCENT (`)
+ */
+ errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ // [NOCPP[
+ errHtml4NonNameInUnquotedAttribute(c);
+ // ]NOCPP]
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * Switch to the attribute value (unquoted)
+ * state.
+ */
+
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
+ noteUnquotedAttributeValue();
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ attributevaluedoublequotedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the after
+ * attribute value (quoted) state.
+ */
+ addAttributeWithValue();
+
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
+ break attributevaluedoublequotedloop;
+ // continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in attribute value state, with the
+ * additional allowed character being U+0022
+ * QUOTATION MARK (").
+ */
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\"');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute value (double-quoted)
+ * state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_ATTRIBUTE_VALUE_QUOTED:
+ afterattributevaluequotedloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before attribute name state.
+ */
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ break afterattributevaluequotedloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ default:
+ /*
+ * Anything else Parse error.
+ */
+ errNoSpaceBetweenAttributes();
+ /*
+ * Reconsume the character in the before
+ * attribute name state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case SELF_CLOSING_START_TAG:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Set the self-closing
+ * flag of the current tag token. Emit the current
+ * tag token.
+ */
+ // [NOCPP[
+ errHtml4XmlVoidSyntax();
+ // ]NOCPP]
+ state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ default:
+ /* Anything else Parse error. */
+ errSlashNotFollowedByGt();
+ /*
+ * Reconsume the character in the before attribute
+ * name state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_UNQUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ addAttributeWithValue();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before attribute name state.
+ */
+ addAttributeWithValue();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in attribute value state, with the
+ * additional allowed character being U+003E
+ * GREATER-THAN SIGN (>)
+ */
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('>');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ addAttributeWithValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '<':
+ case '\"':
+ case '\'':
+ case '=':
+ case '`':
+ /*
+ * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
+ * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
+ * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
+ */
+ errUnquotedAttributeValOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ // fall through
+ default:
+ // [NOCPP]
+ errHtml4NonNameInUnquotedAttribute(c);
+ // ]NOCPP]
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute value (unquoted) state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case AFTER_ATTRIBUTE_NAME:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the after attribute name state.
+ */
+ continue;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ addAttributeWithoutValue();
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '=':
+ /*
+ * U+003D EQUALS SIGN (=) Switch to the before
+ * attribute value state.
+ */
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ addAttributeWithoutValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '\"':
+ case '\'':
+ case '<':
+ errQuoteOrLtInAttributeNameOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ addAttributeWithoutValue();
+ /*
+ * Anything else Start a new attribute in the
+ * current tag token.
+ */
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Set that
+ * attribute's name to the lowercase version
+ * of the current input character (add
+ * 0x0020 to the character's code point)
+ */
+ c += 0x20;
+ }
+ /*
+ * Set that attribute's name to the current
+ * input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * and its value to the empty string.
+ */
+ // Will do later.
+ /*
+ * Switch to the attribute name state.
+ */
+ state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case MARKUP_DECLARATION_OPEN:
+ markupdeclarationopenloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * If the next two characters are both U+002D
+ * HYPHEN-MINUS characters (-), consume those two
+ * characters, create a comment token whose data is the
+ * empty string, and switch to the comment start state.
+ *
+ * Otherwise, if the next seven characters are an ASCII
+ * case-insensitive match for the word "DOCTYPE", then
+ * consume those characters and switch to the DOCTYPE
+ * state.
+ *
+ * Otherwise, if the insertion mode is
+ * "in foreign content" and the current node is not an
+ * element in the HTML namespace and the next seven
+ * characters are an case-sensitive match for the string
+ * "[CDATA[" (the five uppercase letters "CDATA" with a
+ * U+005B LEFT SQUARE BRACKET character before and
+ * after), then consume those characters and switch to
+ * the CDATA section state.
+ *
+ * Otherwise, is is a parse error. Switch to the bogus
+ * comment state. The next character that is consumed,
+ * if any, is the first character that will be in the
+ * comment.
+ */
+ switch (c) {
+ case '-':
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
+ break markupdeclarationopenloop;
+ // continue stateloop;
+ case 'd':
+ case 'D':
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
+ continue stateloop;
+ case '[':
+ if (tokenHandler.cdataSectionAllowed()) {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state = transition(state, Tokenizer.CDATA_START, reconsume, pos);
+ continue stateloop;
+ }
+ // else fall through
+ default:
+ errBogusComment();
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case MARKUP_DECLARATION_HYPHEN:
+ markupdeclarationhyphenloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '-':
+ clearStrBufAfterOneHyphen();
+ state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
+ break markupdeclarationhyphenloop;
+ // continue stateloop;
+ default:
+ errBogusComment();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_START:
+ commentstartloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment start state
+ *
+ *
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment
+ * start dash state.
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errPrematureEndOfComment();
+ /* Emit the comment token. */
+ emitComment(0, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break commentstartloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the input character to
+ * the comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break commentstartloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT:
+ commentloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment state Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment
+ * end dash state
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
+ break commentloop;
+ // continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the input character to
+ * the comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the comment state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END_DASH:
+ commentenddashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment end dash state Consume the next input
+ * character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment
+ * end state
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
+ break commentenddashloop;
+ // continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append a U+002D HYPHEN-MINUS
+ * (-) character and the input character to the
+ * comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END:
+ commentendloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment end dash state Consume the next input
+ * character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the comment
+ * token.
+ */
+ emitComment(2, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ /* U+002D HYPHEN-MINUS (-) Parse error. */
+ /*
+ * Append a U+002D HYPHEN-MINUS (-) character to
+ * the comment token's data.
+ */
+ adjustDoubleHyphenAndAppendToStrBufAndErr(c);
+ /*
+ * Stay in the comment end state.
+ */
+ continue;
+ case '\r':
+ adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ adjustDoubleHyphenAndAppendToStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ case '!':
+ errHyphenHyphenBang();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Append two U+002D HYPHEN-MINUS (-) characters
+ * and the input character to the comment
+ * token's data.
+ */
+ adjustDoubleHyphenAndAppendToStrBufAndErr(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case COMMENT_END_BANG:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment end bang state
+ *
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the comment
+ * token.
+ */
+ emitComment(3, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ /*
+ * Append two U+002D HYPHEN-MINUS (-) characters
+ * and a U+0021 EXCLAMATION MARK (!) character
+ * to the comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment end dash state.
+ */
+ state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append two U+002D HYPHEN-MINUS
+ * (-) characters, a U+0021 EXCLAMATION MARK (!)
+ * character, and the input character to the
+ * comment token's data. Switch to the comment
+ * state.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case COMMENT_START_DASH:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment start dash state
+ *
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment end
+ * state
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
+ continue stateloop;
+ case '>':
+ errPrematureEndOfComment();
+ /* Emit the comment token. */
+ emitComment(1, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Append a U+002D HYPHEN-MINUS character (-) and
+ * the current input character to the comment
+ * token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ // XXX reorder point
+ case CDATA_START:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (index < 6) { // CDATA_LSQB.length
+ if (c == Tokenizer.CDATA_LSQB[index]) {
+ appendStrBuf(c);
+ } else {
+ errBogusComment();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ } else {
+ clearStrBufAfterUse();
+ cstart = pos; // start coalescing
+ reconsume = true;
+ state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
+ break; // FALL THROUGH continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CDATA_SECTION:
+ cdatasectionloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case ']':
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
+ break cdatasectionloop; // FALL THROUGH
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ default:
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CDATA_RSQB:
+ cdatarsqb: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case ']':
+ state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
+ break cdatarsqb;
+ default:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0,
+ 1);
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CDATA_RSQB_RSQB:
+ cdatarsqbrsqb: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case ']':
+ // Saw a third ]. Emit one ] (logically the
+ // first one) and stay in this state to
+ // remember that the last two characters seen
+ // have been ]].
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
+ continue;
+ case '>':
+ cstart = pos + 1;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ attributevaluesinglequotedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the after
+ * attribute value (quoted) state.
+ */
+ addAttributeWithValue();
+
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in attribute value state, with the
+ * + additional allowed character being U+0027
+ * APOSTROPHE (').
+ */
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\'');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ break attributevaluesinglequotedloop;
+ // continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute value (double-quoted)
+ * state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case CONSUME_CHARACTER_REFERENCE:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Unlike the definition is the spec, this state does not
+ * return a value and never requires the caller to
+ * backtrack. This state takes care of emitting characters
+ * or appending to the current attribute value. It also
+ * takes care of that in the case when consuming the
+ * character reference fails.
+ */
+ /*
+ * This section defines how to consume a character
+ * reference. This definition is used when parsing character
+ * references in text and in attributes.
+ *
+ * The behavior depends on the identity of the next
+ * character (the one immediately after the U+0026 AMPERSAND
+ * character):
+ */
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r': // we'll reconsume!
+ case '\u000C':
+ case '<':
+ case '&':
+ case '\u0000':
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ case '#':
+ /*
+ * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
+ * SIGN.
+ */
+ appendCharRefBuf('#');
+ state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
+ continue stateloop;
+ default:
+ if (c == additional) {
+ emitOrAppendCharRefBuf(returnState);
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ if (c >= 'a' && c <= 'z') {
+ firstCharKey = c - 'a' + 26;
+ } else if (c >= 'A' && c <= 'Z') {
+ firstCharKey = c - 'A';
+ } else {
+ // No match
+ /*
+ * If no match can be made, then this is a parse
+ * error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ // Didn't fail yet
+ appendCharRefBuf(c);
+ state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CHARACTER_REFERENCE_HILO_LOOKUP:
+ {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * The data structure is as follows:
+ *
+ * HILO_ACCEL is a two-dimensional int array whose major
+ * index corresponds to the second character of the
+ * character reference (code point as index) and the
+ * minor index corresponds to the first character of the
+ * character reference (packed so that A-Z runs from 0
+ * to 25 and a-z runs from 26 to 51). This layout makes
+ * it easier to use the sparseness of the data structure
+ * to omit parts of it: The second dimension of the
+ * table is null when no character reference starts with
+ * the character corresponding to that row.
+ *
+ * The int value HILO_ACCEL (by these indeces) is zero
+ * if there exists no character reference starting with
+ * that two-letter prefix. Otherwise, the value is an
+ * int that packs two shorts so that the higher short is
+ * the index of the highest character reference name
+ * with that prefix in NAMES and the lower short
+ * corresponds to the index of the lowest character
+ * reference name with that prefix. (It happens that the
+ * first two character reference names share their
+ * prefix so the packed int cannot be 0 by packing the
+ * two shorts.)
+ *
+ * NAMES is an array of byte arrays where each byte
+ * array encodes the name of a character references as
+ * ASCII. The names omit the first two letters of the
+ * name. (Since storing the first two letters would be
+ * redundant with the data contained in HILO_ACCEL.) The
+ * entries are lexically sorted.
+ *
+ * For a given index in NAMES, the same index in VALUES
+ * contains the corresponding expansion as an array of
+ * two UTF-16 code units (either the character and
+ * U+0000 or a suggogate pair).
+ */
+ int hilo = 0;
+ if (c <= 'z') {
+ @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c];
+ if (row != null) {
+ hilo = row[firstCharKey];
+ }
+ }
+ if (hilo == 0) {
+ /*
+ * If no match can be made, then this is a parse
+ * error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ // Didn't fail yet
+ appendCharRefBuf(c);
+ lo = hilo & 0xFFFF;
+ hi = hilo >> 16;
+ entCol = -1;
+ candidate = -1;
+ charRefBufMark = 0;
+ state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ }
+ case CHARACTER_REFERENCE_TAIL:
+ outer: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ entCol++;
+ /*
+ * Consume the maximum number of characters possible,
+ * with the consumed characters matching one of the
+ * identifiers in the first column of the named
+ * character references table (in a case-sensitive
+ * manner).
+ */
+ loloop: for (;;) {
+ if (hi < lo) {
+ break outer;
+ }
+ if (entCol == NamedCharacters.NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ lo++;
+ } else if (entCol > NamedCharacters.NAMES[lo].length()) {
+ break outer;
+ } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
+ lo++;
+ } else {
+ break loloop;
+ }
+ }
+
+ hiloop: for (;;) {
+ if (hi < lo) {
+ break outer;
+ }
+ if (entCol == NamedCharacters.NAMES[hi].length()) {
+ break hiloop;
+ }
+ if (entCol > NamedCharacters.NAMES[hi].length()) {
+ break outer;
+ } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
+ hi--;
+ } else {
+ break hiloop;
+ }
+ }
+
+ if (c == ';') {
+ // If we see a semicolon, there cannot be a
+ // longer match. Break the loop. However, before
+ // breaking, take the longest match so far as the
+ // candidate, if we are just about to complete a
+ // match.
+ if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ }
+ break outer;
+ }
+
+ if (hi < lo) {
+ break outer;
+ }
+ appendCharRefBuf(c);
+ continue;
+ }
+
+ if (candidate == -1) {
+ // reconsume deals with CR, LF or nul
+ /*
+ * If no match can be made, then this is a parse error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ } else {
+ // c can't be CR, LF or nul if we got here
+ @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
+ if (candidateName.length() == 0
+ || candidateName.charAt(candidateName.length() - 1) != ';') {
+ /*
+ * If the last character matched is not a U+003B
+ * SEMICOLON (;), there is a parse error.
+ */
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ /*
+ * If the entity is being consumed as part of an
+ * attribute, and the last character matched is
+ * not a U+003B SEMICOLON (;),
+ */
+ char ch;
+ if (charRefBufMark == charRefBufLen) {
+ ch = c;
+ } else {
+ ch = charRefBuf[charRefBufMark];
+ }
+ if (ch == '=' || (ch >= '0' && ch <= '9')
+ || (ch >= 'A' && ch <= 'Z')
+ || (ch >= 'a' && ch <= 'z')) {
+ /*
+ * and the next character is either a U+003D
+ * EQUALS SIGN character (=) or in the range
+ * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
+ * U+0041 LATIN CAPITAL LETTER A to U+005A
+ * LATIN CAPITAL LETTER Z, or U+0061 LATIN
+ * SMALL LETTER A to U+007A LATIN SMALL
+ * LETTER Z, then, for historical reasons,
+ * all the characters that were matched
+ * after the U+0026 AMPERSAND (&) must be
+ * unconsumed, and nothing is returned.
+ */
+ errNoNamedCharacterMatch();
+ appendCharRefBufToStrBuf();
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ errUnescapedAmpersandInterpretedAsCharacterReference();
+ } else {
+ errNotSemicolonTerminated();
+ }
+ }
+
+ /*
+ * Otherwise, return a character token for the character
+ * corresponding to the entity name (as given by the
+ * second column of the named character references
+ * table).
+ */
+ // CPPONLY: completedNamedCharacterReference();
+ @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
+ if (
+ // [NOCPP[
+ val.length == 1
+ // ]NOCPP]
+ // CPPONLY: val[1] == 0
+ ) {
+ emitOrAppendOne(val, returnState);
+ } else {
+ emitOrAppendTwo(val, returnState);
+ }
+ // this is so complicated!
+ if (charRefBufMark < charRefBufLen) {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ } else {
+ tokenHandler.characters(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ }
+ }
+ // charRefBufLen will be zeroed below!
+
+ // Check if we broke out early with c being the last
+ // character that matched as opposed to being the
+ // first one that didn't match. In the case of an
+ // early break, the next run on text should start
+ // *after* the current character and the current
+ // character shouldn't be reconsumed.
+ boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
+ charRefBufLen = 0;
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = earlyBreak ? pos + 1 : pos;
+ }
+ reconsume = !earlyBreak;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ /*
+ * If the markup contains I'm &notit; I tell you, the
+ * entity is parsed as "not", as in, I'm ¬it; I tell
+ * you. But if the markup was I'm &notin; I tell you,
+ * the entity would be parsed as "notin;", resulting in
+ * I'm ∉ I tell you.
+ */
+ }
+ // XXX reorder point
+ case CONSUME_NCR:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ value = 0;
+ seenDigits = false;
+ /*
+ * The behavior further depends on the character after the
+ * U+0023 NUMBER SIGN:
+ */
+ switch (c) {
+ case 'x':
+ case 'X':
+
+ /*
+ * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
+ * LETTER X Consume the X.
+ *
+ * Follow the steps below, but using the range of
+ * characters U+0030 DIGIT ZERO through to U+0039
+ * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
+ * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
+ * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
+ * LETTER F (in other words, 0-9, A-F, a-f).
+ *
+ * When it comes to interpreting the number,
+ * interpret it as a hexadecimal number.
+ */
+ appendCharRefBuf(c);
+ state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Anything else Follow the steps below, but using
+ * the range of characters U+0030 DIGIT ZERO through
+ * to U+0039 DIGIT NINE (i.e. just 0-9).
+ *
+ * When it comes to interpreting the number,
+ * interpret it as a decimal number.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case DECIMAL_NRC_LOOP:
+ decimalloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume as many characters as match the range of
+ * characters given above.
+ */
+ assert value >= 0: "value must not become negative.";
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 10;
+ value += c - '0';
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ break decimalloop;
+ } else {
+ errNoDigitsInNCR();
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ } else {
+ /*
+ * If no characters match the range, then don't
+ * consume any characters (and unconsume the U+0023
+ * NUMBER SIGN character and, if appropriate, the X
+ * character). This is a parse error; nothing is
+ * returned.
+ *
+ * Otherwise, if the next character is a U+003B
+ * SEMICOLON, consume that too. If it isn't, there
+ * is a parse error.
+ */
+ if (!seenDigits) {
+ errNoDigitsInNCR();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ } else {
+ errCharRefLacksSemicolon();
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ break decimalloop;
+ }
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case HANDLE_NCR_VALUE:
+ // WARNING previous state sets reconsume
+ // We are not going to emit the contents of charRefBuf.
+ charRefBufLen = 0;
+ // XXX inline this case if the method size can take it
+ handleNcrValue(returnState);
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ // XXX reorder point
+ case HEX_NCR_LOOP:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume as many characters as match the range of
+ * characters given above.
+ */
+ assert value >= 0: "value must not become negative.";
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - '0';
+ }
+ continue;
+ } else if (c >= 'A' && c <= 'F') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'A' + 10;
+ }
+ continue;
+ } else if (c >= 'a' && c <= 'f') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'a' + 10;
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ continue stateloop;
+ } else {
+ errNoDigitsInNCR();
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ } else {
+ /*
+ * If no characters match the range, then don't
+ * consume any characters (and unconsume the U+0023
+ * NUMBER SIGN character and, if appropriate, the X
+ * character). This is a parse error; nothing is
+ * returned.
+ *
+ * Otherwise, if the next character is a U+003B
+ * SEMICOLON, consume that too. If it isn't, there
+ * is a parse error.
+ */
+ if (!seenDigits) {
+ errNoDigitsInNCR();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ } else {
+ errCharRefLacksSemicolon();
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ }
+ // XXX reorder point
+ case PLAINTEXT:
+ plaintextloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '\u0000':
+ emitPlaintextReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * RAWTEXT state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case CLOSE_TAG_OPEN:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Otherwise, if the content model flag is set to the PCDATA
+ * state, or if the next few characters do match that tag
+ * name, consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errLtSlashGt();
+ /*
+ * Switch to the data state.
+ */
+ cstart = pos + 1;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ silentCarriageReturn();
+ /* Anything else Parse error. */
+ errGarbageAfterLtSlash();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf('\n');
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ /* Anything else Parse error. */
+ errGarbageAfterLtSlash();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (c >= 'a' && c <= 'z') {
+ /*
+ * U+0061 LATIN SMALL LETTER A through to U+007A
+ * LATIN SMALL LETTER Z Create a new end tag
+ * token,
+ */
+ endTag = true;
+ /*
+ * set its tag name to the input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * then switch to the tag name state. (Don't
+ * emit the token yet; further details will be
+ * filled in before it is emitted.)
+ */
+ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
+ continue stateloop;
+ } else {
+ /* Anything else Parse error. */
+ errGarbageAfterLtSlash();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case RCDATA:
+ rcdataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in RCDATA state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * RCDATA less-than sign state.
+ */
+ flushChars(buf, pos);
+
+ returnState = state;
+ state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Emit the current input character as a
+ * character token. Stay in the RCDATA state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case RAWTEXT:
+ rawtextloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * RAWTEXT less-than sign state.
+ */
+ flushChars(buf, pos);
+
+ returnState = state;
+ state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
+ break rawtextloop;
+ // FALL THRU continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Emit the current input character as a
+ * character token. Stay in the RAWTEXT state.
+ */
+ continue;
+ }
+ }
+ // XXX fallthru don't reorder
+ case RAWTEXT_RCDATA_LESS_THAN_SIGN:
+ rawtextrcdatalessthansignloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Set the temporary buffer
+ * to the empty string. Switch to the script
+ * data end tag open state.
+ */
+ index = 0;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
+ break rawtextrcdatalessthansignloop;
+ // FALL THRU continue stateloop;
+ default:
+ /*
+ * Otherwise, emit a U+003C LESS-THAN SIGN
+ * character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in
+ * the data state.
+ */
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX fall thru. don't reorder.
+ case NON_DATA_END_TAG_NAME:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * ASSERT! when entering this state, set index to 0 and
+ * call clearStrBufBeforeUse(); Let's implement the above
+ * without lookahead. strBuf is the 'temporary buffer'.
+ */
+ if (endTagExpectationAsArray == null) {
+ tokenHandler.characters(Tokenizer.LT_SOLIDUS,
+ 0, 2);
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ } else if (index < endTagExpectationAsArray.length) {
+ char e = endTagExpectationAsArray[index];
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != e) {
+ // [NOCPP[
+ errHtml4LtSlashInRcdata(folded);
+ // ]NOCPP]
+ tokenHandler.characters(Tokenizer.LT_SOLIDUS,
+ 0, 2);
+ emitStrBuf();
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ appendStrBuf(c);
+ index++;
+ continue;
+ } else {
+ endTag = true;
+ // XXX replace contentModelElement with different
+ // type
+ tagName = endTagExpectation;
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE
+ * FEED (LF) U+000C FORM FEED (FF) U+0020
+ * SPACE If the current end tag token is an
+ * appropriate end tag token, then switch to
+ * the before attribute name state.
+ */
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) If the current end tag
+ * token is an appropriate end tag token,
+ * then switch to the self-closing start tag
+ * state.
+ */
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) If the
+ * current end tag token is an appropriate
+ * end tag token, then emit the current tag
+ * token and switch to the data state.
+ */
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ continue stateloop;
+ default:
+ /*
+ * Emit a U+003C LESS-THAN SIGN character
+ * token, a U+002F SOLIDUS character token,
+ * a character token for each of the
+ * characters in the temporary buffer (in
+ * the order they were added to the buffer),
+ * and reconsume the current input character
+ * in the RAWTEXT state.
+ */
+ // [NOCPP[
+ errWarnLtSlashInRcdata();
+ // ]NOCPP]
+ tokenHandler.characters(
+ Tokenizer.LT_SOLIDUS, 0, 2);
+ emitStrBuf();
+ cstart = pos; // don't drop the
+ // character
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ }
+ // XXX reorder point
+ // BEGIN HOTSPOT WORKAROUND
+ case BOGUS_COMMENT:
+ boguscommentloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume every character up to and including the first
+ * U+003E GREATER-THAN SIGN character (>) or the end of
+ * the file (EOF), whichever comes first. Emit a comment
+ * token whose data is the concatenation of all the
+ * characters starting from and including the character
+ * that caused the state machine to switch into the
+ * bogus comment state, up to and including the
+ * character immediately before the last consumed
+ * character (i.e. up to the character just before the
+ * U+003E or EOF character). (If the comment was started
+ * by the end of the file (EOF), the token is empty.)
+ *
+ * Switch to the data state.
+ *
+ * If the end of the file was reached, reconsume the EOF
+ * character.
+ */
+ switch (c) {
+ case '>':
+ emitComment(0, pos);
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
+ break boguscommentloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BOGUS_COMMENT_HYPHEN:
+ boguscommenthyphenloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '>':
+ // [NOCPP[
+ maybeAppendSpaceToBogusComment();
+ // ]NOCPP]
+ emitComment(0, pos);
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ appendSecondHyphenToBogusComment();
+ continue boguscommenthyphenloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case SCRIPT_DATA:
+ scriptdataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data less-than sign state.
+ */
+ flushChars(buf, pos);
+ returnState = state;
+ state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
+ break scriptdataloop; // FALL THRU continue
+ // stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * script data state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_LESS_THAN_SIGN:
+ scriptdatalessthansignloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Set the temporary buffer
+ * to the empty string. Switch to the script
+ * data end tag open state.
+ */
+ index = 0;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
+ continue stateloop;
+ case '!':
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ cstart = pos;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
+ break scriptdatalessthansignloop; // FALL THRU
+ // continue
+ // stateloop;
+ default:
+ /*
+ * Otherwise, emit a U+003C LESS-THAN SIGN
+ * character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in
+ * the data state.
+ */
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPE_START:
+ scriptdataescapestartloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escape start dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
+ break scriptdataescapestartloop; // FALL THRU
+ // continue
+ // stateloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPE_START_DASH:
+ scriptdataescapestartdashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escaped dash dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
+ break scriptdataescapestartdashloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED_DASH_DASH:
+ scriptdataescapeddashdashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Stay in the
+ * script data escaped dash dash state.
+ */
+ continue;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data escaped less-than sign state.
+ */
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit a U+003E
+ * GREATER-THAN SIGN character token. Switch to
+ * the script data state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break scriptdataescapeddashdashloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break scriptdataescapeddashdashloop;
+ // continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED:
+ scriptdataescapedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escaped dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
+ break scriptdataescapedloop; // FALL THRU
+ // continue
+ // stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data escaped less-than sign state.
+ */
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * script data escaped state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED_DASH:
+ scriptdataescapeddashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escaped dash dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data escaped less-than sign state.
+ */
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ break scriptdataescapeddashloop;
+ // continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
+ scriptdataescapedlessthanloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Set the temporary buffer
+ * to the empty string. Switch to the script
+ * data escaped end tag open state.
+ */
+ index = 0;
+ clearStrBufBeforeUse();
+ returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
+ state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
+ continue stateloop;
+ case 'S':
+ case 's':
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
+ * LESS-THAN SIGN character token and the
+ * current input character as a character token.
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ cstart = pos;
+ index = 1;
+ /*
+ * Set the temporary buffer to the empty string.
+ * Append the lowercase version of the current
+ * input character (add 0x0020 to the
+ * character's code point) to the temporary
+ * buffer. Switch to the script data double
+ * escape start state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
+ break scriptdataescapedlessthanloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Emit a U+003C LESS-THAN SIGN
+ * character token and reconsume the current
+ * input character in the script data escaped
+ * state.
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START:
+ scriptdatadoubleescapestartloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ assert index > 0;
+ if (index < 6) { // SCRIPT_ARR.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.SCRIPT_ARR[index]) {
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ case ' ':
+ case '\t':
+ case '\u000C':
+ case '/':
+ case '>':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
+ * (>) Emit the current input character as a
+ * character token. If the temporary buffer is
+ * the string "script", then switch to the
+ * script data double escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break scriptdatadoubleescapestartloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data escaped state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED:
+ scriptdatadoubleescapedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data double escaped dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
+ break scriptdatadoubleescapedloop; // FALL THRU
+ // continue
+ // stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Emit a U+003C
+ * LESS-THAN SIGN character token. Switch to the
+ * script data double escaped less-than sign
+ * state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * script data double escaped state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
+ scriptdatadoubleescapeddashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data double escaped dash dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
+ break scriptdatadoubleescapeddashloop;
+ // continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Emit a U+003C
+ * LESS-THAN SIGN character token. Switch to the
+ * script data double escaped less-than sign
+ * state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data double escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
+ scriptdatadoubleescapeddashdashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Stay in the
+ * script data double escaped dash dash state.
+ */
+ continue;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Emit a U+003C
+ * LESS-THAN SIGN character token. Switch to the
+ * script data double escaped less-than sign
+ * state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ break scriptdatadoubleescapeddashdashloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit a U+003E
+ * GREATER-THAN SIGN character token. Switch to
+ * the script data state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data double escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
+ scriptdatadoubleescapedlessthanloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
+ * character token. Set the temporary buffer to
+ * the empty string. Switch to the script data
+ * double escape end state.
+ */
+ index = 0;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
+ break scriptdatadoubleescapedlessthanloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data double escaped
+ * state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END:
+ scriptdatadoubleescapeendloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (index < 6) { // SCRIPT_ARR.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.SCRIPT_ARR[index]) {
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ case ' ':
+ case '\t':
+ case '\u000C':
+ case '/':
+ case '>':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
+ * (>) Emit the current input character as a
+ * character token. If the temporary buffer is
+ * the string "script", then switch to the
+ * script data escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Reconsume the current input character in the
+ * script data double escaped state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case MARKUP_DECLARATION_OCTYPE:
+ markupdeclarationdoctypeloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (index < 6) { // OCTYPE.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded == Tokenizer.OCTYPE[index]) {
+ appendStrBuf(c);
+ } else {
+ errBogusComment();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
+ break markupdeclarationdoctypeloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE:
+ doctypeloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ initDoctypeFields();
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before DOCTYPE name state.
+ */
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
+ break doctypeloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Parse error.
+ */
+ errMissingSpaceBeforeDoctypeName();
+ /*
+ * Reconsume the current character in the before
+ * DOCTYPE name state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
+ break doctypeloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_DOCTYPE_NAME:
+ beforedoctypenameloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before DOCTYPE name state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errNamelessDoctype();
+ /*
+ * Create a new DOCTYPE token. Set its
+ * force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit the token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Create a
+ * new DOCTYPE token. Set the token's name
+ * to the lowercase version of the input
+ * character (add 0x0020 to the character's
+ * code point).
+ */
+ c += 0x20;
+ }
+ /* Anything else Create a new DOCTYPE token. */
+ /*
+ * Set the token's name name to the current
+ * input character.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * Switch to the DOCTYPE name state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
+ break beforedoctypenameloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_NAME:
+ doctypenameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ strBufToDoctypeName();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the after DOCTYPE name state.
+ */
+ strBufToDoctypeName();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
+ break doctypenameloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ strBufToDoctypeName();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Append the
+ * lowercase version of the input character (add
+ * 0x0020 to the character's code point) to the
+ * current DOCTYPE token's name.
+ */
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x0020;
+ }
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * name.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE name state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_NAME:
+ afterdoctypenameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the after DOCTYPE name state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case 'p':
+ case 'P':
+ index = 0;
+ state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
+ break afterdoctypenameloop;
+ // continue stateloop;
+ case 's':
+ case 'S':
+ index = 0;
+ state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Otherwise, this is the parse error.
+ */
+ bogusDoctype();
+
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_UBLIC:
+ doctypeublicloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * If the six characters starting from the current input
+ * character are an ASCII case-insensitive match for the
+ * word "PUBLIC", then consume those characters and
+ * switch to the before DOCTYPE public identifier state.
+ */
+ if (index < 5) { // UBLIC.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.UBLIC[index]) {
+ bogusDoctype();
+ // forceQuirks = true;
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
+ break doctypeublicloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_PUBLIC_KEYWORD:
+ afterdoctypepublickeywordloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before DOCTYPE public
+ * identifier state.
+ */
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ break afterdoctypepublickeywordloop;
+ // FALL THROUGH continue stateloop
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Parse Error.
+ */
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's public identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Parse Error.
+ */
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's public identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
+ beforedoctypepublicidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before DOCTYPE public identifier
+ * state.
+ */
+ continue;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Set the DOCTYPE
+ * token's public identifier to the empty string
+ * (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ break beforedoctypepublicidentifierloop;
+ // continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Set the DOCTYPE token's
+ * public identifier to the empty string (not
+ * missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
+ doctypepublicidentifierdoublequotedloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the after
+ * DOCTYPE public identifier state.
+ */
+ publicIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ break doctypepublicidentifierdoublequotedloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errGtInPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * public identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
+ afterdoctypepublicidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the between DOCTYPE public and
+ * system identifiers state.
+ */
+ state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
+ break afterdoctypepublicidentifierloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Parse error.
+ */
+ errNoSpaceBetweenPublicAndSystemIds();
+ /*
+ * Set the DOCTYPE token's system identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Parse error.
+ */
+ errNoSpaceBetweenPublicAndSystemIds();
+ /*
+ * Set the DOCTYPE token's system identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
+ betweendoctypepublicandsystemidentifiersloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the between DOCTYPE public and system
+ * identifiers state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Set the DOCTYPE
+ * token's system identifier to the empty string
+ * (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ break betweendoctypepublicandsystemidentifiersloop;
+ // continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Set the DOCTYPE token's
+ * system identifier to the empty string (not
+ * missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
+ doctypesystemidentifierdoublequotedloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the after
+ * DOCTYPE system identifier state.
+ */
+ systemIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errGtInSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * system identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
+ afterdoctypesystemidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the after DOCTYPE system identifier state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Switch to the bogus DOCTYPE state. (This does
+ * not set the DOCTYPE token's force-quirks flag
+ * to on.)
+ */
+ bogusDoctypeWithoutQuirks();
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ break afterdoctypesystemidentifierloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BOGUS_DOCTYPE:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit that
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ default:
+ /*
+ * Anything else Stay in the bogus DOCTYPE
+ * state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case DOCTYPE_YSTEM:
+ doctypeystemloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Otherwise, if the six characters starting from the
+ * current input character are an ASCII case-insensitive
+ * match for the word "SYSTEM", then consume those
+ * characters and switch to the before DOCTYPE system
+ * identifier state.
+ */
+ if (index < 5) { // YSTEM.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.YSTEM[index]) {
+ bogusDoctype();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue stateloop;
+ } else {
+ reconsume = true;
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
+ break doctypeystemloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_SYSTEM_KEYWORD:
+ afterdoctypesystemkeywordloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before DOCTYPE public
+ * identifier state.
+ */
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ break afterdoctypesystemkeywordloop;
+ // FALL THROUGH continue stateloop
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Parse Error.
+ */
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's system identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Parse Error.
+ */
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's public identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
+ beforedoctypesystemidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before DOCTYPE system identifier
+ * state.
+ */
+ continue;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Set the DOCTYPE
+ * token's system identifier to the empty string
+ * (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Set the DOCTYPE token's
+ * system identifier to the empty string (not
+ * missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ break beforedoctypesystemidentifierloop;
+ // continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the after
+ * DOCTYPE system identifier state.
+ */
+ systemIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ continue stateloop;
+ case '>':
+ errGtInSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * system identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the after
+ * DOCTYPE public identifier state.
+ */
+ publicIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ continue stateloop;
+ case '>':
+ errGtInPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * public identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case PROCESSING_INSTRUCTION:
+ processinginstructionloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '?':
+ state = transition(
+ state,
+ Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
+ reconsume, pos);
+ break processinginstructionloop;
+ // continue stateloop;
+ default:
+ continue;
+ }
+ }
+ case PROCESSING_INSTRUCTION_QUESTION_MARK:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '>':
+ state = transition(state, Tokenizer.DATA,
+ reconsume, pos);
+ continue stateloop;
+ default:
+ state = transition(state,
+ Tokenizer.PROCESSING_INSTRUCTION,
+ reconsume, pos);
+ continue stateloop;
+ }
+ // END HOTSPOT WORKAROUND
+ }
+ }
+ flushChars(buf, pos);
+ /*
+ * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
+ */
+ // Save locals
+ stateSave = state;
+ returnStateSave = returnState;
+ return pos;
+ }
+
+ // HOTSPOT WORKAROUND INSERTION POINT
+
+ // [NOCPP[
+
+ protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException {
+ return to;
+ }
+
+ // ]NOCPP]
+
+ private void initDoctypeFields() {
+ // Discard the characters "DOCTYPE" accumulated as a potential bogus
+ // comment into strBuf.
+ clearStrBufAfterUse();
+ doctypeName = "";
+ if (systemIdentifier != null) {
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+ if (publicIdentifier != null) {
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ }
+ forceQuirks = false;
+ }
+
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
+ throws SAXException {
+ silentCarriageReturn();
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
+ }
+
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
+ throws SAXException {
+ silentLineFeed();
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
+ }
+
+ @Inline private void appendStrBufLineFeed() {
+ silentLineFeed();
+ appendStrBuf('\n');
+ }
+
+ @Inline private void appendStrBufCarriageReturn() {
+ silentCarriageReturn();
+ appendStrBuf('\n');
+ }
+
+ @Inline protected void silentCarriageReturn() {
+ ++line;
+ lastCR = true;
+ }
+
+ @Inline protected void silentLineFeed() {
+ ++line;
+ }
+
+ private void emitCarriageReturn(@NoLength char[] buf, int pos)
+ throws SAXException {
+ silentCarriageReturn();
+ flushChars(buf, pos);
+ tokenHandler.characters(Tokenizer.LF, 0, 1);
+ cstart = Integer.MAX_VALUE;
+ }
+
+ private void emitReplacementCharacter(@NoLength char[] buf, int pos)
+ throws SAXException {
+ flushChars(buf, pos);
+ tokenHandler.zeroOriginatingReplacementCharacter();
+ cstart = pos + 1;
+ }
+
+ private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos)
+ throws SAXException {
+ flushChars(buf, pos);
+ tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1);
+ cstart = pos + 1;
+ }
+
+ private void setAdditionalAndRememberAmpersandLocation(char add) {
+ additional = add;
+ // [NOCPP[
+ ampersandLocation = new LocatorImpl(this);
+ // ]NOCPP]
+ }
+
+ private void bogusDoctype() throws SAXException {
+ errBogusDoctype();
+ forceQuirks = true;
+ }
+
+ private void bogusDoctypeWithoutQuirks() throws SAXException {
+ errBogusDoctype();
+ forceQuirks = false;
+ }
+
+ private void handleNcrValue(int returnState) throws SAXException {
+ /*
+ * If one or more characters match the range, then take them all and
+ * interpret the string of characters as a number (either hexadecimal or
+ * decimal as appropriate).
+ */
+ if (value <= 0xFFFF) {
+ if (value >= 0x80 && value <= 0x9f) {
+ /*
+ * If that number is one of the numbers in the first column of
+ * the following table, then this is a parse error.
+ */
+ errNcrInC1Range();
+ /*
+ * Find the row with that number in the first column, and return
+ * a character token for the Unicode character given in the
+ * second column of that row.
+ */
+ @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
+ emitOrAppendOne(val, returnState);
+ // [NOCPP[
+ } else if (value == 0xC
+ && contentSpacePolicy != XmlViolationPolicy.ALLOW) {
+ if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) {
+ emitOrAppendOne(Tokenizer.SPACE, returnState);
+ } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) {
+ fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
+ }
+ // ]NOCPP]
+ } else if (value == 0x0) {
+ errNcrZero();
+ emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
+ } else if ((value & 0xF800) == 0xD800) {
+ errNcrSurrogate();
+ emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
+ } else {
+ /*
+ * Otherwise, return a character token for the Unicode character
+ * whose code point is that number.
+ */
+ char ch = (char) value;
+ // [NOCPP[
+ if (value == 0x0D) {
+ errNcrCr();
+ } else if ((value <= 0x0008) || (value == 0x000B)
+ || (value >= 0x000E && value <= 0x001F)) {
+ ch = errNcrControlChar(ch);
+ } else if (value >= 0xFDD0 && value <= 0xFDEF) {
+ errNcrUnassigned();
+ } else if ((value & 0xFFFE) == 0xFFFE) {
+ ch = errNcrNonCharacter(ch);
+ } else if (value >= 0x007F && value <= 0x009F) {
+ errNcrControlChar();
+ } else {
+ maybeWarnPrivateUse(ch);
+ }
+ // ]NOCPP]
+ bmpChar[0] = ch;
+ emitOrAppendOne(bmpChar, returnState);
+ }
+ } else if (value <= 0x10FFFF) {
+ // [NOCPP[
+ maybeWarnPrivateUseAstral();
+ if ((value & 0xFFFE) == 0xFFFE) {
+ errAstralNonCharacter(value);
+ }
+ // ]NOCPP]
+ astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10));
+ astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
+ emitOrAppendTwo(astralChar, returnState);
+ } else {
+ errNcrOutOfRange();
+ emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
+ }
+ }
+
+ public void eof() throws SAXException {
+ int state = stateSave;
+ int returnState = returnStateSave;
+
+ eofloop: for (;;) {
+ switch (state) {
+ case SCRIPT_DATA_LESS_THAN_SIGN:
+ case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
+ /*
+ * Otherwise, emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in the data
+ * state.
+ */
+ break eofloop;
+ case TAG_OPEN:
+ /*
+ * The behavior of this state depends on the content model
+ * flag.
+ */
+ /*
+ * Anything else Parse error.
+ */
+ errEofAfterLt();
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in the data
+ * state.
+ */
+ break eofloop;
+ case RAWTEXT_RCDATA_LESS_THAN_SIGN:
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in the RCDATA
+ * state.
+ */
+ break eofloop;
+ case NON_DATA_END_TAG_NAME:
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token, a U+002F
+ * SOLIDUS character token,
+ */
+ tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
+ /*
+ * a character token for each of the characters in the
+ * temporary buffer (in the order they were added to the
+ * buffer),
+ */
+ emitStrBuf();
+ /*
+ * and reconsume the current input character in the RCDATA
+ * state.
+ */
+ break eofloop;
+ case CLOSE_TAG_OPEN:
+ /* EOF Parse error. */
+ errEofAfterLt();
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token and a U+002F
+ * SOLIDUS character token.
+ */
+ tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case TAG_NAME:
+ /*
+ * EOF Parse error.
+ */
+ errEofInTagName();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case BEFORE_ATTRIBUTE_NAME:
+ case AFTER_ATTRIBUTE_VALUE_QUOTED:
+ case SELF_CLOSING_START_TAG:
+ /* EOF Parse error. */
+ errEofWithoutGt();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case ATTRIBUTE_NAME:
+ /*
+ * EOF Parse error.
+ */
+ errEofInAttributeName();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case AFTER_ATTRIBUTE_NAME:
+ case BEFORE_ATTRIBUTE_VALUE:
+ /* EOF Parse error. */
+ errEofWithoutGt();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ case ATTRIBUTE_VALUE_UNQUOTED:
+ /* EOF Parse error. */
+ errEofInAttributeValue();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case BOGUS_COMMENT:
+ emitComment(0, 0);
+ break eofloop;
+ case BOGUS_COMMENT_HYPHEN:
+ // [NOCPP[
+ maybeAppendSpaceToBogusComment();
+ // ]NOCPP]
+ emitComment(0, 0);
+ break eofloop;
+ case MARKUP_DECLARATION_OPEN:
+ errBogusComment();
+ emitComment(0, 0);
+ break eofloop;
+ case MARKUP_DECLARATION_HYPHEN:
+ errBogusComment();
+ emitComment(0, 0);
+ break eofloop;
+ case MARKUP_DECLARATION_OCTYPE:
+ if (index < 6) {
+ errBogusComment();
+ emitComment(0, 0);
+ } else {
+ /* EOF Parse error. */
+ errEofInDoctype();
+ /*
+ * Create a new DOCTYPE token. Set its force-quirks flag
+ * to on.
+ */
+ doctypeName = "";
+ if (systemIdentifier != null) {
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+ if (publicIdentifier != null) {
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ }
+ forceQuirks = true;
+ /*
+ * Emit the token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ }
+ break eofloop;
+ case COMMENT_START:
+ case COMMENT:
+ /*
+ * EOF Parse error.
+ */
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(0, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case COMMENT_END:
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(2, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case COMMENT_END_DASH:
+ case COMMENT_START_DASH:
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(1, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case COMMENT_END_BANG:
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(3, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE:
+ case BEFORE_DOCTYPE_NAME:
+ errEofInDoctype();
+ /*
+ * Create a new DOCTYPE token. Set its force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit the token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_NAME:
+ errEofInDoctype();
+ strBufToDoctypeName();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_UBLIC:
+ case DOCTYPE_YSTEM:
+ case AFTER_DOCTYPE_NAME:
+ case AFTER_DOCTYPE_PUBLIC_KEYWORD:
+ case AFTER_DOCTYPE_SYSTEM_KEYWORD:
+ case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
+ errEofInDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
+ case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
+ /* EOF Parse error. */
+ errEofInPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
+ case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
+ case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
+ errEofInDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
+ case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
+ /* EOF Parse error. */
+ errEofInSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
+ errEofInDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case BOGUS_DOCTYPE:
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case CONSUME_CHARACTER_REFERENCE:
+ /*
+ * Unlike the definition is the spec, this state does not
+ * return a value and never requires the caller to
+ * backtrack. This state takes care of emitting characters
+ * or appending to the current attribute value. It also
+ * takes care of that in the case when consuming the entity
+ * fails.
+ */
+ /*
+ * This section defines how to consume an entity. This
+ * definition is used when parsing entities in text and in
+ * attributes.
+ *
+ * The behavior depends on the identity of the next
+ * character (the one immediately after the U+0026 AMPERSAND
+ * character):
+ */
+
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue;
+ case CHARACTER_REFERENCE_HILO_LOOKUP:
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue;
+ case CHARACTER_REFERENCE_TAIL:
+ outer: for (;;) {
+ char c = '\u0000';
+ entCol++;
+ /*
+ * Consume the maximum number of characters possible,
+ * with the consumed characters matching one of the
+ * identifiers in the first column of the named
+ * character references table (in a case-sensitive
+ * manner).
+ */
+ hiloop: for (;;) {
+ if (hi == -1) {
+ break hiloop;
+ }
+ if (entCol == NamedCharacters.NAMES[hi].length()) {
+ break hiloop;
+ }
+ if (entCol > NamedCharacters.NAMES[hi].length()) {
+ break outer;
+ } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
+ hi--;
+ } else {
+ break hiloop;
+ }
+ }
+
+ loloop: for (;;) {
+ if (hi < lo) {
+ break outer;
+ }
+ if (entCol == NamedCharacters.NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ lo++;
+ } else if (entCol > NamedCharacters.NAMES[lo].length()) {
+ break outer;
+ } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
+ lo++;
+ } else {
+ break loloop;
+ }
+ }
+ if (hi < lo) {
+ break outer;
+ }
+ continue;
+ }
+
+ if (candidate == -1) {
+ /*
+ * If no match can be made, then this is a parse error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue eofloop;
+ } else {
+ @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
+ if (candidateName.length() == 0
+ || candidateName.charAt(candidateName.length() - 1) != ';') {
+ /*
+ * If the last character matched is not a U+003B
+ * SEMICOLON (;), there is a parse error.
+ */
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ /*
+ * If the entity is being consumed as part of an
+ * attribute, and the last character matched is
+ * not a U+003B SEMICOLON (;),
+ */
+ char ch;
+ if (charRefBufMark == charRefBufLen) {
+ ch = '\u0000';
+ } else {
+ ch = charRefBuf[charRefBufMark];
+ }
+ if ((ch >= '0' && ch <= '9')
+ || (ch >= 'A' && ch <= 'Z')
+ || (ch >= 'a' && ch <= 'z')) {
+ /*
+ * and the next character is in the range
+ * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
+ * U+0041 LATIN CAPITAL LETTER A to U+005A
+ * LATIN CAPITAL LETTER Z, or U+0061 LATIN
+ * SMALL LETTER A to U+007A LATIN SMALL
+ * LETTER Z, then, for historical reasons,
+ * all the characters that were matched
+ * after the U+0026 AMPERSAND (&) must be
+ * unconsumed, and nothing is returned.
+ */
+ errNoNamedCharacterMatch();
+ appendCharRefBufToStrBuf();
+ state = returnState;
+ continue eofloop;
+ }
+ }
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ errUnescapedAmpersandInterpretedAsCharacterReference();
+ } else {
+ errNotSemicolonTerminated();
+ }
+ }
+
+ /*
+ * Otherwise, return a character token for the character
+ * corresponding to the entity name (as given by the
+ * second column of the named character references
+ * table).
+ */
+ @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
+ if (
+ // [NOCPP[
+ val.length == 1
+ // ]NOCPP]
+ // CPPONLY: val[1] == 0
+ ) {
+ emitOrAppendOne(val, returnState);
+ } else {
+ emitOrAppendTwo(val, returnState);
+ }
+ // this is so complicated!
+ if (charRefBufMark < charRefBufLen) {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ } else {
+ tokenHandler.characters(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ }
+ }
+ charRefBufLen = 0;
+ state = returnState;
+ continue eofloop;
+ /*
+ * If the markup contains I'm &notit; I tell you, the
+ * entity is parsed as "not", as in, I'm ¬it; I tell
+ * you. But if the markup was I'm &notin; I tell you,
+ * the entity would be parsed as "notin;", resulting in
+ * I'm ∉ I tell you.
+ */
+ }
+ case CONSUME_NCR:
+ case DECIMAL_NRC_LOOP:
+ case HEX_NCR_LOOP:
+ /*
+ * If no characters match the range, then don't consume any
+ * characters (and unconsume the U+0023 NUMBER SIGN
+ * character and, if appropriate, the X character). This is
+ * a parse error; nothing is returned.
+ *
+ * Otherwise, if the next character is a U+003B SEMICOLON,
+ * consume that too. If it isn't, there is a parse error.
+ */
+ if (!seenDigits) {
+ errNoDigitsInNCR();
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue;
+ } else {
+ errCharRefLacksSemicolon();
+ }
+ // WARNING previous state sets reconsume
+ handleNcrValue(returnState);
+ state = returnState;
+ continue;
+ case CDATA_RSQB:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
+ break eofloop;
+ case CDATA_RSQB_RSQB:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
+ break eofloop;
+ case DATA:
+ default:
+ break eofloop;
+ }
+ }
+ // case DATA:
+ /*
+ * EOF Emit an end-of-file token.
+ */
+ tokenHandler.eof();
+ return;
+ }
+
+ private void emitDoctypeToken(int pos) throws SAXException {
+ cstart = pos + 1;
+ tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
+ forceQuirks);
+ // It is OK and sufficient to release these here, since
+ // there's no way out of the doctype states than through paths
+ // that call this method.
+ doctypeName = null;
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+
+ @Inline protected char checkChar(@NoLength char[] buf, int pos)
+ throws SAXException {
+ return buf[pos];
+ }
+
+ public boolean internalEncodingDeclaration(String internalCharset)
+ throws SAXException {
+ if (encodingDeclarationHandler != null) {
+ return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset);
+ }
+ return false;
+ }
+
+ /**
+ * @param val
+ * @throws SAXException
+ */
+ private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
+ throws SAXException {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(val[0]);
+ appendStrBuf(val[1]);
+ } else {
+ tokenHandler.characters(val, 0, 2);
+ }
+ }
+
+ private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
+ throws SAXException {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(val[0]);
+ } else {
+ tokenHandler.characters(val, 0, 1);
+ }
+ }
+
+ public void end() throws SAXException {
+ strBuf = null;
+ doctypeName = null;
+ if (systemIdentifier != null) {
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+ if (publicIdentifier != null) {
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ }
+ if (tagName != null) {
+ tagName.release();
+ tagName = null;
+ }
+ if (attributeName != null) {
+ attributeName.release();
+ attributeName = null;
+ }
+ tokenHandler.endTokenization();
+ if (attributes != null) {
+ // [NOCPP[
+ attributes = null;
+ // ]NOCPP]
+ // CPPONLY: attributes.clear(mappingLangToXmlLang);
+ }
+ }
+
+ public void requestSuspension() {
+ shouldSuspend = true;
+ }
+
+ // [NOCPP[
+
+ public void becomeConfident() {
+ confident = true;
+ }
+
+ /**
+ * Returns the nextCharOnNewLine.
+ *
+ * @return the nextCharOnNewLine
+ */
+ public boolean isNextCharOnNewLine() {
+ return false;
+ }
+
+ public boolean isPrevCR() {
+ return lastCR;
+ }
+
+ /**
+ * Returns the line.
+ *
+ * @return the line
+ */
+ public int getLine() {
+ return -1;
+ }
+
+ /**
+ * Returns the col.
+ *
+ * @return the col
+ */
+ public int getCol() {
+ return -1;
+ }
+
+ // ]NOCPP]
+
+ public boolean isInDataState() {
+ return (stateSave == DATA);
+ }
+
+ public void resetToDataState() {
+ clearStrBufAfterUse();
+ charRefBufLen = 0;
+ stateSave = Tokenizer.DATA;
+ // line = 1; XXX line numbers
+ lastCR = false;
+ index = 0;
+ forceQuirks = false;
+ additional = '\u0000';
+ entCol = -1;
+ firstCharKey = -1;
+ lo = 0;
+ hi = 0; // will always be overwritten before use anyway
+ candidate = -1;
+ charRefBufMark = 0;
+ value = 0;
+ seenDigits = false;
+ endTag = false;
+ shouldSuspend = false;
+ initDoctypeFields();
+ if (tagName != null) {
+ tagName.release();
+ tagName = null;
+ }
+ if (attributeName != null) {
+ attributeName.release();
+ attributeName = null;
+ }
+ if (newAttributesEachTime) {
+ if (attributes != null) {
+ Portability.delete(attributes);
+ attributes = null;
+ }
+ }
+ }
+
+ public void loadState(Tokenizer other) throws SAXException {
+ strBufLen = other.strBufLen;
+ if (strBufLen > strBuf.length) {
+ strBuf = new char[strBufLen];
+ }
+ System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen);
+
+ charRefBufLen = other.charRefBufLen;
+ System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen);
+
+ stateSave = other.stateSave;
+ returnStateSave = other.returnStateSave;
+ endTagExpectation = other.endTagExpectation;
+ endTagExpectationAsArray = other.endTagExpectationAsArray;
+ // line = 1; XXX line numbers
+ lastCR = other.lastCR;
+ index = other.index;
+ forceQuirks = other.forceQuirks;
+ additional = other.additional;
+ entCol = other.entCol;
+ firstCharKey = other.firstCharKey;
+ lo = other.lo;
+ hi = other.hi;
+ candidate = other.candidate;
+ charRefBufMark = other.charRefBufMark;
+ value = other.value;
+ seenDigits = other.seenDigits;
+ endTag = other.endTag;
+ shouldSuspend = false;
+
+ if (other.doctypeName == null) {
+ doctypeName = null;
+ } else {
+ doctypeName = Portability.newLocalFromLocal(other.doctypeName,
+ interner);
+ }
+
+ Portability.releaseString(systemIdentifier);
+ if (other.systemIdentifier == null) {
+ systemIdentifier = null;
+ } else {
+ systemIdentifier = Portability.newStringFromString(other.systemIdentifier);
+ }
+
+ Portability.releaseString(publicIdentifier);
+ if (other.publicIdentifier == null) {
+ publicIdentifier = null;
+ } else {
+ publicIdentifier = Portability.newStringFromString(other.publicIdentifier);
+ }
+
+ if (tagName != null) {
+ tagName.release();
+ }
+ if (other.tagName == null) {
+ tagName = null;
+ } else {
+ tagName = other.tagName.cloneElementName(interner);
+ }
+
+ if (attributeName != null) {
+ attributeName.release();
+ }
+ if (other.attributeName == null) {
+ attributeName = null;
+ } else {
+ attributeName = other.attributeName.cloneAttributeName(interner);
+ }
+
+ Portability.delete(attributes);
+ if (other.attributes == null) {
+ attributes = null;
+ } else {
+ attributes = other.attributes.cloneAttributes(interner);
+ }
+ }
+
+ public void initializeWithoutStarting() throws SAXException {
+ confident = false;
+ strBuf = null;
+ line = 1;
+ // CPPONLY: attributeLine = 1;
+ // [NOCPP[
+ html4 = false;
+ metaBoundaryPassed = false;
+ wantsComments = tokenHandler.wantsComments();
+ if (!newAttributesEachTime) {
+ attributes = new HtmlAttributes(mappingLangToXmlLang);
+ }
+ // ]NOCPP]
+ resetToDataState();
+ }
+
+ protected void errGarbageAfterLtSlash() throws SAXException {
+ }
+
+ protected void errLtSlashGt() throws SAXException {
+ }
+
+ protected void errWarnLtSlashInRcdata() throws SAXException {
+ }
+
+ protected void errHtml4LtSlashInRcdata(char folded) throws SAXException {
+ }
+
+ protected void errCharRefLacksSemicolon() throws SAXException {
+ }
+
+ protected void errNoDigitsInNCR() throws SAXException {
+ }
+
+ protected void errGtInSystemId() throws SAXException {
+ }
+
+ protected void errGtInPublicId() throws SAXException {
+ }
+
+ protected void errNamelessDoctype() throws SAXException {
+ }
+
+ protected void errConsecutiveHyphens() throws SAXException {
+ }
+
+ protected void errPrematureEndOfComment() throws SAXException {
+ }
+
+ protected void errBogusComment() throws SAXException {
+ }
+
+ protected void errUnquotedAttributeValOrNull(char c) throws SAXException {
+ }
+
+ protected void errSlashNotFollowedByGt() throws SAXException {
+ }
+
+ protected void errHtml4XmlVoidSyntax() throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenAttributes() throws SAXException {
+ }
+
+ protected void errHtml4NonNameInUnquotedAttribute(char c)
+ throws SAXException {
+ }
+
+ protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
+ throws SAXException {
+ }
+
+ protected void errAttributeValueMissing() throws SAXException {
+ }
+
+ protected void errBadCharBeforeAttributeNameOrNull(char c)
+ throws SAXException {
+ }
+
+ protected void errEqualsSignBeforeAttributeName() throws SAXException {
+ }
+
+ protected void errBadCharAfterLt(char c) throws SAXException {
+ }
+
+ protected void errLtGt() throws SAXException {
+ }
+
+ protected void errProcessingInstruction() throws SAXException {
+ }
+
+ protected void errUnescapedAmpersandInterpretedAsCharacterReference()
+ throws SAXException {
+ }
+
+ protected void errNotSemicolonTerminated() throws SAXException {
+ }
+
+ protected void errNoNamedCharacterMatch() throws SAXException {
+ }
+
+ protected void errQuoteBeforeAttributeName(char c) throws SAXException {
+ }
+
+ protected void errQuoteOrLtInAttributeNameOrNull(char c)
+ throws SAXException {
+ }
+
+ protected void errExpectedPublicId() throws SAXException {
+ }
+
+ protected void errBogusDoctype() throws SAXException {
+ }
+
+ protected void maybeWarnPrivateUseAstral() throws SAXException {
+ }
+
+ protected void maybeWarnPrivateUse(char ch) throws SAXException {
+ }
+
+ protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
+ throws SAXException {
+ }
+
+ protected void maybeErrSlashInEndTag(boolean selfClosing)
+ throws SAXException {
+ }
+
+ protected char errNcrNonCharacter(char ch) throws SAXException {
+ return ch;
+ }
+
+ protected void errAstralNonCharacter(int ch) throws SAXException {
+ }
+
+ protected void errNcrSurrogate() throws SAXException {
+ }
+
+ protected char errNcrControlChar(char ch) throws SAXException {
+ return ch;
+ }
+
+ protected void errNcrCr() throws SAXException {
+ }
+
+ protected void errNcrInC1Range() throws SAXException {
+ }
+
+ protected void errEofInPublicId() throws SAXException {
+ }
+
+ protected void errEofInComment() throws SAXException {
+ }
+
+ protected void errEofInDoctype() throws SAXException {
+ }
+
+ protected void errEofInAttributeValue() throws SAXException {
+ }
+
+ protected void errEofInAttributeName() throws SAXException {
+ }
+
+ protected void errEofWithoutGt() throws SAXException {
+ }
+
+ protected void errEofInTagName() throws SAXException {
+ }
+
+ protected void errEofInEndTag() throws SAXException {
+ }
+
+ protected void errEofAfterLt() throws SAXException {
+ }
+
+ protected void errNcrOutOfRange() throws SAXException {
+ }
+
+ protected void errNcrUnassigned() throws SAXException {
+ }
+
+ protected void errDuplicateAttribute() throws SAXException {
+ }
+
+ protected void errEofInSystemId() throws SAXException {
+ }
+
+ protected void errExpectedSystemId() throws SAXException {
+ }
+
+ protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
+ }
+
+ protected void errHyphenHyphenBang() throws SAXException {
+ }
+
+ protected void errNcrControlChar() throws SAXException {
+ }
+
+ protected void errNcrZero() throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
+ throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
+ throws SAXException {
+ }
+
+ protected void noteAttributeWithoutValue() throws SAXException {
+ }
+
+ protected void noteUnquotedAttributeValue() throws SAXException {
+ }
+
+ /**
+ * Sets the encodingDeclarationHandler.
+ *
+ * @param encodingDeclarationHandler
+ * the encodingDeclarationHandler to set
+ */
+ public void setEncodingDeclarationHandler(
+ EncodingDeclarationHandler encodingDeclarationHandler) {
+ this.encodingDeclarationHandler = encodingDeclarationHandler;
+ }
+
+ void destructor() {
+ // The translator will write refcount tracing stuff here
+ Portability.delete(attributes);
+ attributes = null;
+ }
+
+ // [NOCPP[
+
+ /**
+ * Sets an offset to be added to the position reported to
+ * <code>TransitionHandler</code>.
+ *
+ * @param offset the offset
+ */
+ public void setTransitionBaseOffset(int offset) {
+
+ }
+
+ // ]NOCPP]
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java
new file mode 100644
index 000000000..de7d8478d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java
@@ -0,0 +1,6553 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2015 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
+ * Foundation, and Opera Software ASA.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * The comments following this one that use the same comment syntax as this
+ * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007
+ * amended as of June 28 2007.
+ * That document came with this statement:
+ * "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and
+ * Opera Software ASA. You are granted a license to use, reproduce and
+ * create derivative works of this document."
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.Const;
+import nu.validator.htmlparser.annotation.IdType;
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Literal;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.annotation.NsUri;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Interner;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+public abstract class TreeBuilder<T> implements TokenHandler,
+ TreeBuilderState<T> {
+
+ /**
+ * Array version of U+FFFD.
+ */
+ private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+
+ // Start dispatch groups
+
+ final static int OTHER = 0;
+
+ final static int A = 1;
+
+ final static int BASE = 2;
+
+ final static int BODY = 3;
+
+ final static int BR = 4;
+
+ final static int BUTTON = 5;
+
+ final static int CAPTION = 6;
+
+ final static int COL = 7;
+
+ final static int COLGROUP = 8;
+
+ final static int FORM = 9;
+
+ final static int FRAME = 10;
+
+ final static int FRAMESET = 11;
+
+ final static int IMAGE = 12;
+
+ final static int INPUT = 13;
+
+ final static int ISINDEX = 14;
+
+ final static int LI = 15;
+
+ final static int LINK_OR_BASEFONT_OR_BGSOUND = 16;
+
+ final static int MATH = 17;
+
+ final static int META = 18;
+
+ final static int SVG = 19;
+
+ final static int HEAD = 20;
+
+ final static int HR = 22;
+
+ final static int HTML = 23;
+
+ final static int NOBR = 24;
+
+ final static int NOFRAMES = 25;
+
+ final static int NOSCRIPT = 26;
+
+ final static int OPTGROUP = 27;
+
+ final static int OPTION = 28;
+
+ final static int P = 29;
+
+ final static int PLAINTEXT = 30;
+
+ final static int SCRIPT = 31;
+
+ final static int SELECT = 32;
+
+ final static int STYLE = 33;
+
+ final static int TABLE = 34;
+
+ final static int TEXTAREA = 35;
+
+ final static int TITLE = 36;
+
+ final static int TR = 37;
+
+ final static int XMP = 38;
+
+ final static int TBODY_OR_THEAD_OR_TFOOT = 39;
+
+ final static int TD_OR_TH = 40;
+
+ final static int DD_OR_DT = 41;
+
+ final static int H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42;
+
+ final static int MARQUEE_OR_APPLET = 43;
+
+ final static int PRE_OR_LISTING = 44;
+
+ final static int B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45;
+
+ final static int UL_OR_OL_OR_DL = 46;
+
+ final static int IFRAME = 47;
+
+ final static int EMBED = 48;
+
+ final static int AREA_OR_WBR = 49;
+
+ final static int DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50;
+
+ final static int ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY = 51;
+
+ final static int RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52;
+
+ final static int RB_OR_RTC = 53;
+
+ final static int PARAM_OR_SOURCE_OR_TRACK = 55;
+
+ final static int MGLYPH_OR_MALIGNMARK = 56;
+
+ final static int MI_MO_MN_MS_MTEXT = 57;
+
+ final static int ANNOTATION_XML = 58;
+
+ final static int FOREIGNOBJECT_OR_DESC = 59;
+
+ final static int NOEMBED = 60;
+
+ final static int FIELDSET = 61;
+
+ final static int OUTPUT = 62;
+
+ final static int OBJECT = 63;
+
+ final static int FONT = 64;
+
+ final static int KEYGEN = 65;
+
+ final static int MENUITEM = 66;
+
+ final static int TEMPLATE = 67;
+
+ final static int IMG = 68;
+
+ final static int RT_OR_RP = 69;
+
+ // start insertion modes
+
+ private static final int IN_ROW = 0;
+
+ private static final int IN_TABLE_BODY = 1;
+
+ private static final int IN_TABLE = 2;
+
+ private static final int IN_CAPTION = 3;
+
+ private static final int IN_CELL = 4;
+
+ private static final int FRAMESET_OK = 5;
+
+ private static final int IN_BODY = 6;
+
+ private static final int IN_HEAD = 7;
+
+ private static final int IN_HEAD_NOSCRIPT = 8;
+
+ // no fall-through
+
+ private static final int IN_COLUMN_GROUP = 9;
+
+ // no fall-through
+
+ private static final int IN_SELECT_IN_TABLE = 10;
+
+ private static final int IN_SELECT = 11;
+
+ // no fall-through
+
+ private static final int AFTER_BODY = 12;
+
+ // no fall-through
+
+ private static final int IN_FRAMESET = 13;
+
+ private static final int AFTER_FRAMESET = 14;
+
+ // no fall-through
+
+ private static final int INITIAL = 15;
+
+ // could add fall-through
+
+ private static final int BEFORE_HTML = 16;
+
+ // could add fall-through
+
+ private static final int BEFORE_HEAD = 17;
+
+ // no fall-through
+
+ private static final int AFTER_HEAD = 18;
+
+ // no fall-through
+
+ private static final int AFTER_AFTER_BODY = 19;
+
+ // no fall-through
+
+ private static final int AFTER_AFTER_FRAMESET = 20;
+
+ // no fall-through
+
+ private static final int TEXT = 21;
+
+ private static final int IN_TEMPLATE = 22;
+
+ // start charset states
+
+ private static final int CHARSET_INITIAL = 0;
+
+ private static final int CHARSET_C = 1;
+
+ private static final int CHARSET_H = 2;
+
+ private static final int CHARSET_A = 3;
+
+ private static final int CHARSET_R = 4;
+
+ private static final int CHARSET_S = 5;
+
+ private static final int CHARSET_E = 6;
+
+ private static final int CHARSET_T = 7;
+
+ private static final int CHARSET_EQUALS = 8;
+
+ private static final int CHARSET_SINGLE_QUOTED = 9;
+
+ private static final int CHARSET_DOUBLE_QUOTED = 10;
+
+ private static final int CHARSET_UNQUOTED = 11;
+
+ // end pseudo enums
+
+ // [NOCPP[
+
+ private final static String[] HTML4_PUBLIC_IDS = {
+ "-//W3C//DTD HTML 4.0 Frameset//EN",
+ "-//W3C//DTD HTML 4.0 Transitional//EN",
+ "-//W3C//DTD HTML 4.0//EN", "-//W3C//DTD HTML 4.01 Frameset//EN",
+ "-//W3C//DTD HTML 4.01 Transitional//EN",
+ "-//W3C//DTD HTML 4.01//EN" };
+
+ // ]NOCPP]
+
+ @Literal private final static String[] QUIRKY_PUBLIC_IDS = {
+ "+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//" };
+
+ private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE;
+
+ // [NOCPP[
+
+ private static final @Local String HTML_LOCAL = "html";
+
+ // ]NOCPP]
+
+ private int mode = INITIAL;
+
+ private int originalMode = INITIAL;
+
+ /**
+ * Used only when moving back to IN_BODY.
+ */
+ private boolean framesetOk = true;
+
+ protected Tokenizer tokenizer;
+
+ // [NOCPP[
+
+ protected ErrorHandler errorHandler;
+
+ private DocumentModeHandler documentModeHandler;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private LocatorImpl firstCommentLocation;
+
+ // ]NOCPP]
+
+ private boolean scriptingEnabled = false;
+
+ private boolean needToDropLF;
+
+ // [NOCPP[
+
+ private boolean wantingComments;
+
+ // ]NOCPP]
+
+ private boolean fragment;
+
+ private @Local String contextName;
+
+ private @NsUri String contextNamespace;
+
+ private T contextNode;
+
+ /**
+ * Stack of template insertion modes
+ */
+ private @Auto int[] templateModeStack;
+
+ /**
+ * Current template mode stack pointer.
+ */
+ private int templateModePtr = -1;
+
+ private @Auto StackNode<T>[] stack;
+
+ private int currentPtr = -1;
+
+ private @Auto StackNode<T>[] listOfActiveFormattingElements;
+
+ private int listPtr = -1;
+
+ private T formPointer;
+
+ private T headPointer;
+
+ /**
+ * Used to work around Gecko limitations. Not used in Java.
+ */
+ private T deepTreeSurrogateParent;
+
+ protected @Auto char[] charBuffer;
+
+ protected int charBufferLen = 0;
+
+ private boolean quirks = false;
+
+ private boolean isSrcdocDocument = false;
+
+ // [NOCPP[
+
+ private boolean reportingDoctype = true;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private final Map<String, LocatorImpl> idLocations = new HashMap<String, LocatorImpl>();
+
+ private boolean html4;
+
+ // ]NOCPP]
+
+ protected TreeBuilder() {
+ fragment = false;
+ }
+
+ /**
+ * Reports an condition that would make the infoset incompatible with XML
+ * 1.0 as fatal.
+ *
+ * @throws SAXException
+ * @throws SAXParseException
+ */
+ protected void fatal() throws SAXException {
+ }
+
+ // [NOCPP[
+
+ protected final void fatal(Exception e) throws SAXException {
+ SAXParseException spe = new SAXParseException(e.getMessage(),
+ tokenizer, e);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ final void fatal(String s) throws SAXException {
+ SAXParseException spe = new SAXParseException(s, tokenizer);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ /**
+ * Reports a Parse Error.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void err(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck(message);
+ }
+
+ /**
+ * Reports a Parse Error without checking if an error handler is present.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void errNoCheck(String message) throws SAXException {
+ SAXParseException spe = new SAXParseException(message, tokenizer);
+ errorHandler.error(spe);
+ }
+
+ private void errListUnclosedStartTags(int eltPos) throws SAXException {
+ if (currentPtr != -1) {
+ for (int i = currentPtr; i > eltPos; i--) {
+ reportUnclosedElementNameAndLocation(i);
+ }
+ }
+ }
+
+ /**
+ * Reports the name and location of an unclosed element.
+ *
+ * @throws SAXException
+ */
+ private final void reportUnclosedElementNameAndLocation(int pos) throws SAXException {
+ StackNode<T> node = stack[pos];
+ if (node.isOptionalEndTag()) {
+ return;
+ }
+ TaintableLocatorImpl locator = node.getLocator();
+ if (locator.isTainted()) {
+ return;
+ }
+ locator.markTainted();
+ SAXParseException spe = new SAXParseException(
+ "Unclosed element \u201C" + node.popName + "\u201D.", locator);
+ errorHandler.error(spe);
+ }
+
+ /**
+ * Reports a warning
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void warn(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, tokenizer);
+ errorHandler.warning(spe);
+ }
+
+ /**
+ * Reports a warning with an explicit locator
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void warn(String message, Locator locator) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, locator);
+ errorHandler.warning(spe);
+ }
+
+ // ]NOCPP]
+
+ @SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException {
+ tokenizer = self;
+ stack = new StackNode[64];
+ templateModeStack = new int[64];
+ listOfActiveFormattingElements = new StackNode[64];
+ needToDropLF = false;
+ originalMode = INITIAL;
+ templateModePtr = -1;
+ currentPtr = -1;
+ listPtr = -1;
+ formPointer = null;
+ headPointer = null;
+ deepTreeSurrogateParent = null;
+ // [NOCPP[
+ html4 = false;
+ idLocations.clear();
+ wantingComments = wantsComments();
+ firstCommentLocation = null;
+ // ]NOCPP]
+ start(fragment);
+ charBufferLen = 0;
+ charBuffer = null;
+ framesetOk = true;
+ if (fragment) {
+ T elt;
+ if (contextNode != null) {
+ elt = contextNode;
+ } else {
+ elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes());
+ }
+ // When the context node is not in the HTML namespace, contrary
+ // to the spec, the first node on the stack is not set to "html"
+ // in the HTML namespace. Instead, it is set to a node that has
+ // the characteristics of the appropriate "adjusted current node".
+ // This way, there is no need to perform "adjusted current node"
+ // checks during tree construction. Instead, it's sufficient to
+ // just look at the current node. However, this also means that it
+ // is not safe to treat "html" in the HTML namespace as a sentinel
+ // that ends stack popping. Instead, stack popping loops that are
+ // meant not to pop the first element on the stack need to check
+ // for currentPos becoming zero.
+ if (contextNamespace == "http://www.w3.org/2000/svg") {
+ ElementName elementName = ElementName.SVG;
+ if ("title" == contextName || "desc" == contextName
+ || "foreignObject" == contextName) {
+ // These elements are all alike and we don't care about
+ // the exact name.
+ elementName = ElementName.FOREIGNOBJECT;
+ }
+ // This is the SVG variant of the StackNode constructor.
+ StackNode<T> node = new StackNode<T>(elementName,
+ elementName.camelCaseName, elt
+ // [NOCPP[
+ , errorHandler == null ? null
+ : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ currentPtr++;
+ stack[currentPtr] = node;
+ tokenizer.setState(Tokenizer.DATA);
+ // The frameset-ok flag is set even though <frameset> never
+ // ends up being allowed as HTML frameset in the fragment case.
+ mode = FRAMESET_OK;
+ } else if (contextNamespace == "http://www.w3.org/1998/Math/MathML") {
+ ElementName elementName = ElementName.MATH;
+ if ("mi" == contextName || "mo" == contextName
+ || "mn" == contextName || "ms" == contextName
+ || "mtext" == contextName) {
+ // These elements are all alike and we don't care about
+ // the exact name.
+ elementName = ElementName.MTEXT;
+ } else if ("annotation-xml" == contextName) {
+ elementName = ElementName.ANNOTATION_XML;
+ // Blink does not check the encoding attribute of the
+ // annotation-xml element innerHTML is being set on.
+ // Let's do the same at least until
+ // https://www.w3.org/Bugs/Public/show_bug.cgi?id=26783
+ // is resolved.
+ }
+ // This is the MathML variant of the StackNode constructor.
+ StackNode<T> node = new StackNode<T>(elementName, elt,
+ elementName.name, false
+ // [NOCPP[
+ , errorHandler == null ? null
+ : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ currentPtr++;
+ stack[currentPtr] = node;
+ tokenizer.setState(Tokenizer.DATA);
+ // The frameset-ok flag is set even though <frameset> never
+ // ends up being allowed as HTML frameset in the fragment case.
+ mode = FRAMESET_OK;
+ } else { // html
+ StackNode<T> node = new StackNode<T>(ElementName.HTML, elt
+ // [NOCPP[
+ , errorHandler == null ? null
+ : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ currentPtr++;
+ stack[currentPtr] = node;
+ if ("template" == contextName) {
+ pushTemplateMode(IN_TEMPLATE);
+ }
+ resetTheInsertionMode();
+ formPointer = getFormPointerForContext(contextNode);
+ if ("title" == contextName || "textarea" == contextName) {
+ tokenizer.setState(Tokenizer.RCDATA);
+ } else if ("style" == contextName || "xmp" == contextName
+ || "iframe" == contextName || "noembed" == contextName
+ || "noframes" == contextName
+ || (scriptingEnabled && "noscript" == contextName)) {
+ tokenizer.setState(Tokenizer.RAWTEXT);
+ } else if ("plaintext" == contextName) {
+ tokenizer.setState(Tokenizer.PLAINTEXT);
+ } else if ("script" == contextName) {
+ tokenizer.setState(Tokenizer.SCRIPT_DATA);
+ } else {
+ tokenizer.setState(Tokenizer.DATA);
+ }
+ }
+ contextName = null;
+ contextNode = null;
+ } else {
+ mode = INITIAL;
+ // If we are viewing XML source, put a foreign element permanently
+ // on the stack so that cdataSectionAllowed() returns true.
+ // CPPONLY: if (tokenizer.isViewingXmlSource()) {
+ // CPPONLY: T elt = createElement("http://www.w3.org/2000/svg",
+ // CPPONLY: "svg",
+ // CPPONLY: tokenizer.emptyAttributes(), null);
+ // CPPONLY: StackNode<T> node = new StackNode<T>(ElementName.SVG,
+ // CPPONLY: "svg",
+ // CPPONLY: elt);
+ // CPPONLY: currentPtr++;
+ // CPPONLY: stack[currentPtr] = node;
+ // CPPONLY: }
+ }
+ }
+
+ public final void doctype(@Local String name, String publicIdentifier,
+ String systemIdentifier, boolean forceQuirks) throws SAXException {
+ needToDropLF = false;
+ if (!isInForeign() && mode == INITIAL) {
+ // [NOCPP[
+ if (reportingDoctype) {
+ // ]NOCPP]
+ String emptyString = Portability.newEmptyString();
+ appendDoctypeToDocument(name == null ? "" : name,
+ publicIdentifier == null ? emptyString
+ : publicIdentifier,
+ systemIdentifier == null ? emptyString
+ : systemIdentifier);
+ Portability.releaseString(emptyString);
+ // [NOCPP[
+ }
+ switch (doctypeExpectation) {
+ case HTML:
+ // ]NOCPP]
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ errQuirkyDoctype();
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ // [NOCPP[
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ // ]NOCPP]
+ errAlmostStandardsDoctype();
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else {
+ // [NOCPP[
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ((Portability.literalEqualsString(
+ "-//W3C//DTD HTML 4.0//EN", publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
+ "http://www.w3.org/TR/REC-html40/strict.dtd",
+ systemIdentifier)))
+ || (Portability.literalEqualsString(
+ "-//W3C//DTD HTML 4.01//EN",
+ publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
+ "http://www.w3.org/TR/html4/strict.dtd",
+ systemIdentifier)))
+ || (Portability.literalEqualsString(
+ "-//W3C//DTD XHTML 1.0 Strict//EN",
+ publicIdentifier) && Portability.literalEqualsString(
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd",
+ systemIdentifier))
+ || (Portability.literalEqualsString(
+ "-//W3C//DTD XHTML 1.1//EN",
+ publicIdentifier) && Portability.literalEqualsString(
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd",
+ systemIdentifier))
+
+ ) {
+ warn("Obsolete doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ } else if (!((systemIdentifier == null || Portability.literalEqualsString(
+ "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) {
+ err("Legacy doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ // ]NOCPP]
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ }
+ // [NOCPP[
+ break;
+ case HTML401_STRICT:
+ html4 = true;
+ tokenizer.turnOnAdditionalHtml4Errors();
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ err("Almost standards mode doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ }
+ } else {
+ err("The doctype was not the HTML 4.01 Strict doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ }
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ }
+ break;
+ case HTML401_TRANSITIONAL:
+ html4 = true;
+ tokenizer.turnOnAdditionalHtml4Errors();
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)
+ && systemIdentifier != null) {
+ if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ }
+ } else {
+ err("The doctype was not a non-quirky HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ }
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ err("The doctype was not the HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ }
+ break;
+ case AUTO:
+ html4 = isHtml4Doctype(publicIdentifier);
+ if (html4) {
+ tokenizer.turnOnAdditionalHtml4Errors();
+ }
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ err("Quirky doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, html4);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ }
+ } else {
+ err("Almost standards mode doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ }
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, html4);
+ } else {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ }
+ } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the XHTML 1.0 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\u201D.");
+ }
+ } else if ("//W3C//DTD XHTML 1.1//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the XHTML 1.1 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\u201D.");
+ }
+ } else if (!((systemIdentifier == null || Portability.literalEqualsString(
+ "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) {
+ err("Unexpected doctype. Expected, e.g., \u201C<!DOCTYPE html>\u201D.");
+ }
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, html4);
+ }
+ break;
+ case NO_DOCTYPE_ERRORS:
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else {
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ }
+ break;
+ }
+ // ]NOCPP]
+
+ /*
+ *
+ * Then, switch to the root element mode of the tree construction
+ * stage.
+ */
+ mode = BEFORE_HTML;
+ return;
+ }
+ /*
+ * A DOCTYPE token Parse error.
+ */
+ errStrayDoctype();
+ /*
+ * Ignore the token.
+ */
+ return;
+ }
+
+ // [NOCPP[
+
+ private boolean isHtml4Doctype(String publicIdentifier) {
+ if (publicIdentifier != null
+ && (Arrays.binarySearch(TreeBuilder.HTML4_PUBLIC_IDS,
+ publicIdentifier) > -1)) {
+ return true;
+ }
+ return false;
+ }
+
+ // ]NOCPP]
+
+ public final void comment(@NoLength char[] buf, int start, int length)
+ throws SAXException {
+ needToDropLF = false;
+ // [NOCPP[
+ if (firstCommentLocation == null) {
+ firstCommentLocation = new LocatorImpl(tokenizer);
+ }
+ if (!wantingComments) {
+ return;
+ }
+ // ]NOCPP]
+ if (!isInForeign()) {
+ switch (mode) {
+ case INITIAL:
+ case BEFORE_HTML:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ /*
+ * A comment token Append a Comment node to the Document
+ * object with the data attribute set to the data given in
+ * the comment token.
+ */
+ appendCommentToDocument(buf, start, length);
+ return;
+ case AFTER_BODY:
+ /*
+ * A comment token Append a Comment node to the first
+ * element in the stack of open elements (the html element),
+ * with the data attribute set to the data given in the
+ * comment token.
+ */
+ flushCharacters();
+ appendComment(stack[0].node, buf, start, length);
+ return;
+ default:
+ break;
+ }
+ }
+ /*
+ * A comment token Append a Comment node to the current node with the
+ * data attribute set to the data given in the comment token.
+ */
+ flushCharacters();
+ appendComment(stack[currentPtr].node, buf, start, length);
+ return;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int,
+ * int)
+ */
+ public final void characters(@Const @NoLength char[] buf, int start, int length)
+ throws SAXException {
+ // Note: Can't attach error messages to EOF in C++ yet
+
+ // CPPONLY: if (tokenizer.isViewingXmlSource()) {
+ // CPPONLY: return;
+ // CPPONLY: }
+ if (needToDropLF) {
+ needToDropLF = false;
+ if (buf[start] == '\n') {
+ start++;
+ length--;
+ if (length == 0) {
+ return;
+ }
+ }
+ }
+
+ // optimize the most common case
+ switch (mode) {
+ case IN_BODY:
+ case IN_CELL:
+ case IN_CAPTION:
+ if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
+ reconstructTheActiveFormattingElements();
+ }
+ // fall through
+ case TEXT:
+ accumulateCharacters(buf, start, length);
+ return;
+ case IN_TABLE:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ accumulateCharactersForced(buf, start, length);
+ return;
+ default:
+ int end = start + length;
+ charactersloop: for (int i = start; i < end; i++) {
+ switch (buf[i]) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\u000C':
+ /*
+ * A character token that is one of one of U+0009
+ * CHARACTER TABULATION, U+000A LINE FEED (LF),
+ * U+000C FORM FEED (FF), or U+0020 SPACE
+ */
+ switch (mode) {
+ case INITIAL:
+ case BEFORE_HTML:
+ case BEFORE_HEAD:
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ case IN_HEAD:
+ case IN_HEAD_NOSCRIPT:
+ case AFTER_HEAD:
+ case IN_COLUMN_GROUP:
+ case IN_FRAMESET:
+ case AFTER_FRAMESET:
+ /*
+ * Append the character to the current node.
+ */
+ continue;
+ case FRAMESET_OK:
+ case IN_TEMPLATE:
+ case IN_BODY:
+ case IN_CELL:
+ case IN_CAPTION:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+
+ /*
+ * Reconstruct the active formatting
+ * elements, if any.
+ */
+ if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
+ flushCharacters();
+ reconstructTheActiveFormattingElements();
+ }
+ /*
+ * Append the token's character to the
+ * current node.
+ */
+ break charactersloop;
+ case IN_SELECT:
+ case IN_SELECT_IN_TABLE:
+ break charactersloop;
+ case IN_TABLE:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ accumulateCharactersForced(buf, i, 1);
+ start = i + 1;
+ continue;
+ case AFTER_BODY:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Reconstruct the active formatting
+ * elements, if any.
+ */
+ flushCharacters();
+ reconstructTheActiveFormattingElements();
+ /*
+ * Append the token's character to the
+ * current node.
+ */
+ continue;
+ }
+ default:
+ /*
+ * A character token that is not one of one of
+ * U+0009 CHARACTER TABULATION, U+000A LINE FEED
+ * (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ */
+ switch (mode) {
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("Non-space characters found without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ // XXX figure out a way to report this in the Gecko View Source case
+ err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML401_STRICT:
+ err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(
+ DocumentMode.QUIRKS_MODE, null,
+ null, false);
+ /*
+ * Then, switch to the root element mode of
+ * the tree construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ i--;
+ continue;
+ case BEFORE_HTML:
+ /*
+ * Create an HTMLElement node with the tag
+ * name html, in the HTML namespace. Append
+ * it to the Document object.
+ */
+ // No need to flush characters here,
+ // because there's nothing to flush.
+ appendHtmlElementToDocumentAndPush();
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ i--;
+ continue;
+ case BEFORE_HEAD:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * /Act as if a start tag token with the tag
+ * name "head" and no attributes had been
+ * seen,
+ */
+ flushCharacters();
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ /*
+ * then reprocess the current token.
+ *
+ * This will result in an empty head element
+ * being generated, with the current token
+ * being reprocessed in the "after head"
+ * insertion mode.
+ */
+ i--;
+ continue;
+ case IN_HEAD:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Act as if an end tag token with the tag
+ * name "head" had been seen,
+ */
+ flushCharacters();
+ pop();
+ mode = AFTER_HEAD;
+ /*
+ * and reprocess the current token.
+ */
+ i--;
+ continue;
+ case IN_HEAD_NOSCRIPT:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Parse error. Act as if an end tag with
+ * the tag name "noscript" had been seen
+ */
+ errNonSpaceInNoscriptInHead();
+ flushCharacters();
+ pop();
+ mode = IN_HEAD;
+ /*
+ * and reprocess the current token.
+ */
+ i--;
+ continue;
+ case AFTER_HEAD:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Act as if a start tag token with the tag
+ * name "body" and no attributes had been
+ * seen,
+ */
+ flushCharacters();
+ appendToCurrentNodeAndPushBodyElement();
+ mode = FRAMESET_OK;
+ /*
+ * and then reprocess the current token.
+ */
+ i--;
+ continue;
+ case FRAMESET_OK:
+ framesetOk = false;
+ mode = IN_BODY;
+ i--;
+ continue;
+ case IN_TEMPLATE:
+ case IN_BODY:
+ case IN_CELL:
+ case IN_CAPTION:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Reconstruct the active formatting
+ * elements, if any.
+ */
+ if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
+ flushCharacters();
+ reconstructTheActiveFormattingElements();
+ }
+ /*
+ * Append the token's character to the
+ * current node.
+ */
+ break charactersloop;
+ case IN_TABLE:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ accumulateCharactersForced(buf, i, 1);
+ start = i + 1;
+ continue;
+ case IN_COLUMN_GROUP:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Act as if an end tag with the tag name
+ * "colgroup" had been seen, and then, if
+ * that token wasn't ignored, reprocess the
+ * current token.
+ */
+ if (currentPtr == 0 || stack[currentPtr].getGroup() ==
+ TreeBuilder.TEMPLATE) {
+ errNonSpaceInColgroupInFragment();
+ start = i + 1;
+ continue;
+ }
+ flushCharacters();
+ pop();
+ mode = IN_TABLE;
+ i--;
+ continue;
+ case IN_SELECT:
+ case IN_SELECT_IN_TABLE:
+ break charactersloop;
+ case AFTER_BODY:
+ errNonSpaceAfterBody();
+ fatal();
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ i--;
+ continue;
+ case IN_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ // start index is adjusted below.
+ }
+ /*
+ * Parse error.
+ */
+ errNonSpaceInFrameset();
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ case AFTER_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ // start index is adjusted below.
+ }
+ /*
+ * Parse error.
+ */
+ errNonSpaceAfterFrameset();
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ case AFTER_AFTER_BODY:
+ /*
+ * Parse error.
+ */
+ errNonSpaceInTrailer();
+ /*
+ * Switch back to the main mode and
+ * reprocess the token.
+ */
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ i--;
+ continue;
+ case AFTER_AFTER_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ // start index is adjusted below.
+ }
+ /*
+ * Parse error.
+ */
+ errNonSpaceInTrailer();
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ }
+ }
+ }
+ if (start < end) {
+ accumulateCharacters(buf, start, end - start);
+ }
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter()
+ */
+ public void zeroOriginatingReplacementCharacter() throws SAXException {
+ if (mode == TEXT) {
+ accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
+ return;
+ }
+ if (currentPtr >= 0) {
+ if (isSpecialParentInForeign(stack[currentPtr])) {
+ return;
+ }
+ accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
+ }
+ }
+
+ public final void eof() throws SAXException {
+ flushCharacters();
+ // Note: Can't attach error messages to EOF in C++ yet
+ eofloop: for (;;) {
+ switch (mode) {
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("End of file seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML401_STRICT:
+ err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
+ false);
+ /*
+ * Then, switch to the root element mode of the tree
+ * construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ continue;
+ case BEFORE_HTML:
+ /*
+ * Create an HTMLElement node with the tag name html, in the
+ * HTML namespace. Append it to the Document object.
+ */
+ appendHtmlElementToDocumentAndPush();
+ // XXX application cache manifest
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ continue;
+ case BEFORE_HEAD:
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ continue;
+ case IN_HEAD:
+ // [NOCPP[
+ if (errorHandler != null && currentPtr > 1) {
+ errEofWithUnclosedElements();
+ }
+ // ]NOCPP]
+ while (currentPtr > 0) {
+ popOnEof();
+ }
+ mode = AFTER_HEAD;
+ continue;
+ case IN_HEAD_NOSCRIPT:
+ // [NOCPP[
+ errEofWithUnclosedElements();
+ // ]NOCPP]
+ while (currentPtr > 1) {
+ popOnEof();
+ }
+ mode = IN_HEAD;
+ continue;
+ case AFTER_HEAD:
+ appendToCurrentNodeAndPushBodyElement();
+ mode = IN_BODY;
+ continue;
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ case IN_TABLE:
+ case IN_SELECT_IN_TABLE:
+ case IN_SELECT:
+ case IN_COLUMN_GROUP:
+ case FRAMESET_OK:
+ case IN_CAPTION:
+ case IN_CELL:
+ case IN_BODY:
+ // [NOCPP[
+ // i > 0 to stop in time in the foreign fragment case.
+ openelementloop: for (int i = currentPtr; i > 0; i--) {
+ int group = stack[i].getGroup();
+ switch (group) {
+ case DD_OR_DT:
+ case LI:
+ case P:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case BODY:
+ case HTML:
+ break;
+ default:
+ errEofWithUnclosedElements();
+ break openelementloop;
+ }
+ }
+ // ]NOCPP]
+
+ if (isTemplateModeStackEmpty()) {
+ break eofloop;
+ }
+
+ // fall through to IN_TEMPLATE
+ case IN_TEMPLATE:
+ int eltPos = findLast("template");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break eofloop;
+ }
+ if (errorHandler != null) {
+ errUnclosedElements(eltPos, "template");
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ popTemplateMode();
+ resetTheInsertionMode();
+
+ // Reprocess token.
+ continue;
+ case TEXT:
+ // [NOCPP[
+ if (errorHandler != null) {
+ errNoCheck("End of file seen when expecting text or an end tag.");
+ errListUnclosedStartTags(0);
+ }
+ // ]NOCPP]
+ // XXX mark script as already executed
+ if (originalMode == AFTER_HEAD) {
+ popOnEof();
+ }
+ popOnEof();
+ mode = originalMode;
+ continue;
+ case IN_FRAMESET:
+ // [NOCPP[
+ if (errorHandler != null && currentPtr > 0) {
+ errEofWithUnclosedElements();
+ }
+ // ]NOCPP]
+ break eofloop;
+ case AFTER_BODY:
+ case AFTER_FRAMESET:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ default:
+ // [NOCPP[
+ if (currentPtr == 0) { // This silliness is here to poison
+ // buggy compiler optimizations in
+ // GWT
+ System.currentTimeMillis();
+ }
+ // ]NOCPP]
+ break eofloop;
+ }
+ }
+ while (currentPtr > 0) {
+ popOnEof();
+ }
+ if (!fragment) {
+ popOnEof();
+ }
+ /* Stop parsing. */
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#endTokenization()
+ */
+ public final void endTokenization() throws SAXException {
+ formPointer = null;
+ headPointer = null;
+ deepTreeSurrogateParent = null;
+ templateModeStack = null;
+ if (stack != null) {
+ while (currentPtr > -1) {
+ stack[currentPtr].release();
+ currentPtr--;
+ }
+ stack = null;
+ }
+ if (listOfActiveFormattingElements != null) {
+ while (listPtr > -1) {
+ if (listOfActiveFormattingElements[listPtr] != null) {
+ listOfActiveFormattingElements[listPtr].release();
+ }
+ listPtr--;
+ }
+ listOfActiveFormattingElements = null;
+ }
+ // [NOCPP[
+ idLocations.clear();
+ // ]NOCPP]
+ charBuffer = null;
+ end();
+ }
+
+ public final void startTag(ElementName elementName,
+ HtmlAttributes attributes, boolean selfClosing) throws SAXException {
+ flushCharacters();
+
+ // [NOCPP[
+ if (errorHandler != null) {
+ // ID uniqueness
+ @IdType String id = attributes.getId();
+ if (id != null) {
+ LocatorImpl oldLoc = idLocations.get(id);
+ if (oldLoc != null) {
+ err("Duplicate ID \u201C" + id + "\u201D.");
+ errorHandler.warning(new SAXParseException(
+ "The first occurrence of ID \u201C" + id
+ + "\u201D was here.", oldLoc));
+ } else {
+ idLocations.put(id, new LocatorImpl(tokenizer));
+ }
+ }
+ }
+ // ]NOCPP]
+
+ int eltPos;
+ needToDropLF = false;
+ starttagloop: for (;;) {
+ int group = elementName.getGroup();
+ @Local String name = elementName.name;
+ if (isInForeign()) {
+ StackNode<T> currentNode = stack[currentPtr];
+ @NsUri String currNs = currentNode.ns;
+ if (!(currentNode.isHtmlIntegrationPoint() || (currNs == "http://www.w3.org/1998/Math/MathML" && ((currentNode.getGroup() == MI_MO_MN_MS_MTEXT && group != MGLYPH_OR_MALIGNMARK) || (currentNode.getGroup() == ANNOTATION_XML && group == SVG))))) {
+ switch (group) {
+ case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+ case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+ case BODY:
+ case BR:
+ case RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR:
+ case DD_OR_DT:
+ case UL_OR_OL_OR_DL:
+ case EMBED:
+ case IMG:
+ case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+ case HEAD:
+ case HR:
+ case LI:
+ case META:
+ case NOBR:
+ case P:
+ case PRE_OR_LISTING:
+ case TABLE:
+ case FONT:
+ // re-check FONT to deal with the special case
+ if (!(group == FONT && !(attributes.contains(AttributeName.COLOR)
+ || attributes.contains(AttributeName.FACE) || attributes.contains(AttributeName.SIZE)))) {
+ errHtmlStartTagInForeignContext(name);
+ if (!fragment) {
+ while (!isSpecialParentInForeign(stack[currentPtr])) {
+ pop();
+ }
+ continue starttagloop;
+ } // else fall thru
+ }
+ // else fall thru
+ default:
+ if ("http://www.w3.org/2000/svg" == currNs) {
+ attributes.adjustForSvg();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterSVG(
+ elementName, attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterSVG(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ } else {
+ attributes.adjustForMath();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterMathML(
+ elementName, attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterMathML(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ } // switch
+ } // foreignObject / annotation-xml
+ }
+ switch (mode) {
+ case IN_TEMPLATE:
+ switch (group) {
+ case COL:
+ popTemplateMode();
+ pushTemplateMode(IN_COLUMN_GROUP);
+ mode = IN_COLUMN_GROUP;
+ // Reprocess token.
+ continue;
+ case CAPTION:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ popTemplateMode();
+ pushTemplateMode(IN_TABLE);
+ mode = IN_TABLE;
+ // Reprocess token.
+ continue;
+ case TR:
+ popTemplateMode();
+ pushTemplateMode(IN_TABLE_BODY);
+ mode = IN_TABLE_BODY;
+ // Reprocess token.
+ continue;
+ case TD_OR_TH:
+ popTemplateMode();
+ pushTemplateMode(IN_ROW);
+ mode = IN_ROW;
+ // Reprocess token.
+ continue;
+ case META:
+ checkMetaCharset(attributes);
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case TITLE:
+ startTagTitleInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case SCRIPT:
+ startTagScriptInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOFRAMES:
+ case STYLE:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ popTemplateMode();
+ pushTemplateMode(IN_BODY);
+ mode = IN_BODY;
+ // Reprocess token.
+ continue;
+ }
+ case IN_ROW:
+ switch (group) {
+ case TD_OR_TH:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TR));
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_CELL;
+ insertMarker();
+ attributes = null; // CPP
+ break starttagloop;
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break starttagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ continue;
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE_BODY:
+ switch (group) {
+ case TR:
+ clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot());
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_ROW;
+ attributes = null; // CPP
+ break starttagloop;
+ case TD_OR_TH:
+ errStartTagInTableBody(name);
+ clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot());
+ appendToCurrentNodeAndPushElement(
+ ElementName.TR,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_ROW;
+ continue;
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot();
+ if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errStrayStartTag(name);
+ break starttagloop;
+ } else {
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE;
+ continue;
+ }
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE:
+ intableloop: for (;;) {
+ switch (group) {
+ case CAPTION:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ insertMarker();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_CAPTION;
+ attributes = null; // CPP
+ break starttagloop;
+ case COLGROUP:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_COLUMN_GROUP;
+ attributes = null; // CPP
+ break starttagloop;
+ case COL:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ ElementName.COLGROUP,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_COLUMN_GROUP;
+ continue starttagloop;
+ case TBODY_OR_THEAD_OR_TFOOT:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_TABLE_BODY;
+ attributes = null; // CPP
+ break starttagloop;
+ case TR:
+ case TD_OR_TH:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ ElementName.TBODY,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_TABLE_BODY;
+ continue starttagloop;
+ case TEMPLATE:
+ // fall through to IN_HEAD
+ break intableloop;
+ case TABLE:
+ errTableSeenWhileTableOpen();
+ eltPos = findLastInTableScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment || isTemplateContents();
+ break starttagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent("table")) {
+ errNoCheckUnclosedElementsOnStack();
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue starttagloop;
+ case SCRIPT:
+ // XXX need to manage much more stuff
+ // here if
+ // supporting
+ // document.write()
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.SCRIPT_DATA, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case INPUT:
+ errStartTagInTable(name);
+ if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "hidden",
+ attributes.getValue(AttributeName.TYPE))) {
+ break intableloop;
+ }
+ appendVoidElementToCurrent(
+ name, attributes,
+ formPointer);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case FORM:
+ if (formPointer != null || isTemplateContents()) {
+ errFormWhenFormOpen();
+ break starttagloop;
+ } else {
+ errStartTagInTable(name);
+ appendVoidFormToCurrent(attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ default:
+ errStartTagInTable(name);
+ // fall through to IN_BODY
+ break intableloop;
+ }
+ }
+ case IN_CAPTION:
+ switch (group) {
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ errStrayStartTag(name);
+ eltPos = findLastInTableScope("caption");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ break starttagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && currentPtr != eltPos) {
+ errNoCheckUnclosedElementsOnStack();
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_TABLE;
+ continue;
+ default:
+ // fall through to IN_BODY
+ }
+ case IN_CELL:
+ switch (group) {
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ eltPos = findLastInTableScopeTdTh();
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoCellToClose();
+ break starttagloop;
+ } else {
+ closeTheCell(eltPos);
+ continue;
+ }
+ default:
+ // fall through to IN_BODY
+ }
+ case FRAMESET_OK:
+ switch (group) {
+ case FRAMESET:
+ if (mode == FRAMESET_OK) {
+ if (currentPtr == 0 || stack[1].getGroup() != BODY) {
+ assert fragment || isTemplateContents();
+ errStrayStartTag(name);
+ break starttagloop;
+ } else {
+ errFramesetStart();
+ detachFromParent(stack[1].node);
+ while (currentPtr > 0) {
+ pop();
+ }
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_FRAMESET;
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ } else {
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ // NOT falling through!
+ case PRE_OR_LISTING:
+ case LI:
+ case DD_OR_DT:
+ case BUTTON:
+ case MARQUEE_OR_APPLET:
+ case OBJECT:
+ case TABLE:
+ case AREA_OR_WBR:
+ case BR:
+ case EMBED:
+ case IMG:
+ case INPUT:
+ case KEYGEN:
+ case HR:
+ case TEXTAREA:
+ case XMP:
+ case IFRAME:
+ case SELECT:
+ if (mode == FRAMESET_OK
+ && !(group == INPUT && Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "hidden",
+ attributes.getValue(AttributeName.TYPE)))) {
+ framesetOk = false;
+ mode = IN_BODY;
+ }
+ // fall through to IN_BODY
+ default:
+ // fall through to IN_BODY
+ }
+ case IN_BODY:
+ inbodyloop: for (;;) {
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ case META:
+ case STYLE:
+ case SCRIPT:
+ case TITLE:
+ case TEMPLATE:
+ // Fall through to IN_HEAD
+ break inbodyloop;
+ case BODY:
+ if (currentPtr == 0 || stack[1].getGroup() != BODY || isTemplateContents()) {
+ assert fragment || isTemplateContents();
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ errFooSeenWhenFooOpen(name);
+ framesetOk = false;
+ if (mode == FRAMESET_OK) {
+ mode = IN_BODY;
+ }
+ if (addAttributesToBody(attributes)) {
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case P:
+ case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+ case UL_OR_OL_OR_DL:
+ case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+ implicitlyCloseP();
+ if (stack[currentPtr].getGroup() == H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) {
+ errHeadingWhenHeadingOpen();
+ pop();
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case FIELDSET:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ attributes = null; // CPP
+ break starttagloop;
+ case PRE_OR_LISTING:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ needToDropLF = true;
+ attributes = null; // CPP
+ break starttagloop;
+ case FORM:
+ if (formPointer != null && !isTemplateContents()) {
+ errFormWhenFormOpen();
+ break starttagloop;
+ } else {
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushFormElementMayFoster(attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ case LI:
+ case DD_OR_DT:
+ eltPos = currentPtr;
+ for (;;) {
+ StackNode<T> node = stack[eltPos]; // weak
+ // ref
+ if (node.getGroup() == group) { // LI or
+ // DD_OR_DT
+ generateImpliedEndTagsExceptFor(node.name);
+ if (errorHandler != null
+ && eltPos != currentPtr) {
+ errUnclosedElementsImplied(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break;
+ } else if (eltPos == 0 || (node.isSpecial()
+ && (node.ns != "http://www.w3.org/1999/xhtml"
+ || (node.name != "p"
+ && node.name != "address"
+ && node.name != "div")))) {
+ break;
+ }
+ eltPos--;
+ }
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case PLAINTEXT:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.PLAINTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case A:
+ int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a");
+ if (activeAPos != -1) {
+ errFooSeenWhenFooOpen(name);
+ StackNode<T> activeA = listOfActiveFormattingElements[activeAPos];
+ activeA.retain();
+ adoptionAgencyEndTag("a");
+ removeFromStack(activeA);
+ activeAPos = findInListOfActiveFormattingElements(activeA);
+ if (activeAPos != -1) {
+ removeFromListOfActiveFormattingElements(activeAPos);
+ }
+ activeA.release();
+ }
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushFormattingElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+ case FONT:
+ reconstructTheActiveFormattingElements();
+ maybeForgetEarlierDuplicateFormattingElement(elementName.name, attributes);
+ appendToCurrentNodeAndPushFormattingElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOBR:
+ reconstructTheActiveFormattingElements();
+ if (TreeBuilder.NOT_FOUND_ON_STACK != findLastInScope("nobr")) {
+ errFooSeenWhenFooOpen(name);
+ adoptionAgencyEndTag("nobr");
+ reconstructTheActiveFormattingElements();
+ }
+ appendToCurrentNodeAndPushFormattingElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case BUTTON:
+ eltPos = findLastInScope(name);
+ if (eltPos != TreeBuilder.NOT_FOUND_ON_STACK) {
+ errFooSeenWhenFooOpen(name);
+ generateImpliedEndTags();
+ if (errorHandler != null
+ && !isCurrent(name)) {
+ errUnclosedElementsImplied(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ continue starttagloop;
+ } else {
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ case OBJECT:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ insertMarker();
+ attributes = null; // CPP
+ break starttagloop;
+ case MARQUEE_OR_APPLET:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ insertMarker();
+ attributes = null; // CPP
+ break starttagloop;
+ case TABLE:
+ // The only quirk. Blame Hixie and
+ // Acid2.
+ if (!quirks) {
+ implicitlyCloseP();
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ mode = IN_TABLE;
+ attributes = null; // CPP
+ break starttagloop;
+ case BR:
+ case EMBED:
+ case AREA_OR_WBR:
+ reconstructTheActiveFormattingElements();
+ // FALL THROUGH to PARAM_OR_SOURCE_OR_TRACK
+ // CPPONLY: case MENUITEM:
+ case PARAM_OR_SOURCE_OR_TRACK:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case HR:
+ implicitlyCloseP();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case IMAGE:
+ errImage();
+ elementName = ElementName.IMG;
+ continue starttagloop;
+ case IMG:
+ case KEYGEN:
+ case INPUT:
+ reconstructTheActiveFormattingElements();
+ appendVoidElementToCurrentMayFoster(
+ name, attributes,
+ formPointer);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case ISINDEX:
+ errIsindex();
+ if (formPointer != null && !isTemplateContents()) {
+ break starttagloop;
+ }
+ implicitlyCloseP();
+ HtmlAttributes formAttrs = new HtmlAttributes(0);
+ int actionIndex = attributes.getIndex(AttributeName.ACTION);
+ if (actionIndex > -1) {
+ formAttrs.addAttribute(
+ AttributeName.ACTION,
+ attributes.getValueNoBoundsCheck(actionIndex)
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , attributes.getLineNoBoundsCheck(actionIndex)
+ );
+ }
+ appendToCurrentNodeAndPushFormElementMayFoster(formAttrs);
+ appendVoidElementToCurrentMayFoster(
+ ElementName.HR,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ appendToCurrentNodeAndPushElementMayFoster(
+ ElementName.LABEL,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ int promptIndex = attributes.getIndex(AttributeName.PROMPT);
+ if (promptIndex > -1) {
+ @Auto char[] prompt = Portability.newCharArrayFromString(attributes.getValueNoBoundsCheck(promptIndex));
+ appendCharacters(stack[currentPtr].node,
+ prompt, 0, prompt.length);
+ } else {
+ appendIsindexPrompt(stack[currentPtr].node);
+ }
+ HtmlAttributes inputAttributes = new HtmlAttributes(
+ 0);
+ inputAttributes.addAttribute(
+ AttributeName.NAME,
+ Portability.newStringFromLiteral("isindex")
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , tokenizer.getLineNumber()
+ );
+ for (int i = 0; i < attributes.getLength(); i++) {
+ AttributeName attributeQName = attributes.getAttributeNameNoBoundsCheck(i);
+ if (AttributeName.NAME == attributeQName
+ || AttributeName.PROMPT == attributeQName) {
+ attributes.releaseValue(i);
+ } else if (AttributeName.ACTION != attributeQName) {
+ inputAttributes.addAttribute(
+ attributeQName,
+ attributes.getValueNoBoundsCheck(i)
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , attributes.getLineNoBoundsCheck(i)
+ );
+ }
+ }
+ attributes.clearWithoutReleasingContents();
+ appendVoidElementToCurrentMayFoster(
+ "input",
+ inputAttributes, formPointer);
+ pop(); // label
+ appendVoidElementToCurrentMayFoster(
+ ElementName.HR,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ pop(); // form
+
+ if (!isTemplateContents()) {
+ formPointer = null;
+ }
+
+ selfClosing = false;
+ // Portability.delete(formAttrs);
+ // Portability.delete(inputAttributes);
+ // Don't delete attributes, they are deleted
+ // later
+ break starttagloop;
+ case TEXTAREA:
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RCDATA, elementName);
+ originalMode = mode;
+ mode = TEXT;
+ needToDropLF = true;
+ attributes = null; // CPP
+ break starttagloop;
+ case XMP:
+ implicitlyCloseP();
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOSCRIPT:
+ if (!scriptingEnabled) {
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ } else {
+ // fall through
+ }
+ case NOFRAMES:
+ case IFRAME:
+ case NOEMBED:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case SELECT:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ switch (mode) {
+ case IN_TABLE:
+ case IN_CAPTION:
+ case IN_COLUMN_GROUP:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ case IN_CELL:
+ mode = IN_SELECT_IN_TABLE;
+ break;
+ default:
+ mode = IN_SELECT;
+ break;
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case OPTGROUP:
+ case OPTION:
+ if (isCurrent("option")) {
+ pop();
+ }
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case RB_OR_RTC:
+ eltPos = findLastInScope("ruby");
+ if (eltPos != NOT_FOUND_ON_STACK) {
+ generateImpliedEndTags();
+ }
+ if (eltPos != currentPtr) {
+ if (eltPos == NOT_FOUND_ON_STACK) {
+ errStartTagSeenWithoutRuby(name);
+ } else {
+ errUnclosedChildrenInRuby();
+ }
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case RT_OR_RP:
+ eltPos = findLastInScope("ruby");
+ if (eltPos != NOT_FOUND_ON_STACK) {
+ generateImpliedEndTagsExceptFor("rtc");
+ }
+ if (eltPos != currentPtr) {
+ if (!isCurrent("rtc")) {
+ if (eltPos == NOT_FOUND_ON_STACK) {
+ errStartTagSeenWithoutRuby(name);
+ } else {
+ errUnclosedChildrenInRuby();
+ }
+ }
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case MATH:
+ reconstructTheActiveFormattingElements();
+ attributes.adjustForMath();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterMathML(
+ elementName, attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterMathML(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case SVG:
+ reconstructTheActiveFormattingElements();
+ attributes.adjustForSvg();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterSVG(
+ elementName,
+ attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterSVG(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ case FRAME:
+ case FRAMESET:
+ case HEAD:
+ errStrayStartTag(name);
+ break starttagloop;
+ case OUTPUT:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ }
+ case IN_HEAD:
+ inheadloop: for (;;) {
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case META:
+ // Fall through to IN_HEAD_NOSCRIPT
+ break inheadloop;
+ case TITLE:
+ startTagTitleInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOSCRIPT:
+ if (scriptingEnabled) {
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ } else {
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ mode = IN_HEAD_NOSCRIPT;
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case SCRIPT:
+ startTagScriptInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ case NOFRAMES:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case HEAD:
+ /* Parse error. */
+ errFooSeenWhenFooOpen(name);
+ /* Ignore the token. */
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ pop();
+ mode = AFTER_HEAD;
+ continue starttagloop;
+ }
+ }
+ case IN_HEAD_NOSCRIPT:
+ switch (group) {
+ case HTML:
+ // XXX did Hixie really mean to omit "base"
+ // here?
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case META:
+ checkMetaCharset(attributes);
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ case NOFRAMES:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case HEAD:
+ errFooSeenWhenFooOpen(name);
+ break starttagloop;
+ case NOSCRIPT:
+ errFooSeenWhenFooOpen(name);
+ break starttagloop;
+ default:
+ errBadStartTagInHead(name);
+ pop();
+ mode = IN_HEAD;
+ continue;
+ }
+ case IN_COLUMN_GROUP:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case COL:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ if (currentPtr == 0 || stack[currentPtr].getGroup() == TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errGarbageInColgroup();
+ break starttagloop;
+ }
+ pop();
+ mode = IN_TABLE;
+ continue;
+ }
+ case IN_SELECT_IN_TABLE:
+ switch (group) {
+ case CAPTION:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ case TABLE:
+ errStartTagWithSelectOpen(name);
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break starttagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue;
+ default:
+ // fall through to IN_SELECT
+ }
+ case IN_SELECT:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case OPTION:
+ if (isCurrent("option")) {
+ pop();
+ }
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case OPTGROUP:
+ if (isCurrent("option")) {
+ pop();
+ }
+ if (isCurrent("optgroup")) {
+ pop();
+ }
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case SELECT:
+ errStartSelectWhereEndSelectExpected();
+ eltPos = findLastInTableScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ errNoSelectInTableScope();
+ break starttagloop;
+ } else {
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ break starttagloop;
+ }
+ case INPUT:
+ case TEXTAREA:
+ case KEYGEN:
+ errStartTagWithSelectOpen(name);
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break starttagloop;
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue;
+ case SCRIPT:
+ startTagScriptInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ case AFTER_BODY:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ }
+ case IN_FRAMESET:
+ switch (group) {
+ case FRAMESET:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case FRAME:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ // fall through to AFTER_FRAMESET
+ }
+ case AFTER_FRAMESET:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case NOFRAMES:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("Start tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ // ]NOCPP]
+ errStartTagWithoutDoctype();
+ // [NOCPP[
+ break;
+ case HTML401_STRICT:
+ err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
+ false);
+ /*
+ * Then, switch to the root element mode of the tree
+ * construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ continue;
+ case BEFORE_HTML:
+ switch (group) {
+ case HTML:
+ // optimize error check and streaming SAX by
+ // hoisting
+ // "html" handling here.
+ if (attributes == HtmlAttributes.EMPTY_ATTRIBUTES) {
+ // This has the right magic side effect
+ // that
+ // it
+ // makes attributes in SAX Tree mutable.
+ appendHtmlElementToDocumentAndPush();
+ } else {
+ appendHtmlElementToDocumentAndPush(attributes);
+ }
+ // XXX application cache should fire here
+ mode = BEFORE_HEAD;
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ /*
+ * Create an HTMLElement node with the tag name
+ * html, in the HTML namespace. Append it to the
+ * Document object.
+ */
+ appendHtmlElementToDocumentAndPush();
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ continue;
+ }
+ case BEFORE_HEAD:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case HEAD:
+ /*
+ * A start tag whose tag name is "head"
+ *
+ * Create an element for the token.
+ *
+ * Set the head element pointer to this new element
+ * node.
+ *
+ * Append the new element to the current node and
+ * push it onto the stack of open elements.
+ */
+ appendToCurrentNodeAndPushHeadElement(attributes);
+ /*
+ * Change the insertion mode to "in head".
+ */
+ mode = IN_HEAD;
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ /*
+ * Any other start tag token
+ *
+ * Act as if a start tag token with the tag name
+ * "head" and no attributes had been seen,
+ */
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ /*
+ * then reprocess the current token.
+ *
+ * This will result in an empty head element being
+ * generated, with the current token being
+ * reprocessed in the "after head" insertion mode.
+ */
+ continue;
+ }
+ case AFTER_HEAD:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case BODY:
+ if (attributes.getLength() == 0) {
+ // This has the right magic side effect
+ // that
+ // it
+ // makes attributes in SAX Tree mutable.
+ appendToCurrentNodeAndPushBodyElement();
+ } else {
+ appendToCurrentNodeAndPushBodyElement(attributes);
+ }
+ framesetOk = false;
+ mode = IN_BODY;
+ attributes = null; // CPP
+ break starttagloop;
+ case FRAMESET:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_FRAMESET;
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ StackNode<T> headOnStack = stack[currentPtr];
+ startTagTemplateInHead(elementName, attributes);
+ removeFromStack(headOnStack);
+ attributes = null; // CPP
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ pop(); // head
+ attributes = null; // CPP
+ break starttagloop;
+ case META:
+ errFooBetweenHeadAndBody(name);
+ checkMetaCharset(attributes);
+ pushHeadPointerOntoStack();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ pop(); // head
+ attributes = null; // CPP
+ break starttagloop;
+ case SCRIPT:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.SCRIPT_DATA, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ case NOFRAMES:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case TITLE:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RCDATA, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case HEAD:
+ errStrayStartTag(name);
+ break starttagloop;
+ default:
+ appendToCurrentNodeAndPushBodyElement();
+ mode = FRAMESET_OK;
+ continue;
+ }
+ case AFTER_AFTER_BODY:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ fatal();
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ }
+ case AFTER_AFTER_FRAMESET:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case NOFRAMES:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ case TEXT:
+ assert false;
+ break starttagloop; // Avoid infinite loop if the assertion
+ // fails
+ }
+ }
+ if (selfClosing) {
+ errSelfClosing();
+ }
+ // CPPONLY: if (mBuilder == null && attributes != HtmlAttributes.EMPTY_ATTRIBUTES) {
+ // CPPONLY: Portability.delete(attributes);
+ // CPPONLY: }
+ }
+
+ private void startTagTitleInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, elementName);
+ }
+
+ private void startTagGenericRawText(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, elementName);
+ }
+
+ private void startTagScriptInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ // XXX need to manage much more stuff here if supporting document.write()
+ appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA, elementName);
+ }
+
+ private void startTagTemplateInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ appendToCurrentNodeAndPushElement(elementName, attributes);
+ insertMarker();
+ framesetOk = false;
+ originalMode = mode;
+ mode = IN_TEMPLATE;
+ pushTemplateMode(IN_TEMPLATE);
+ }
+
+ private boolean isTemplateContents() {
+ return TreeBuilder.NOT_FOUND_ON_STACK != findLast("template");
+ }
+
+ private boolean isTemplateModeStackEmpty() {
+ return templateModePtr == -1;
+ }
+
+ private boolean isSpecialParentInForeign(StackNode<T> stackNode) {
+ @NsUri String ns = stackNode.ns;
+ return ("http://www.w3.org/1999/xhtml" == ns)
+ || (stackNode.isHtmlIntegrationPoint())
+ || (("http://www.w3.org/1998/Math/MathML" == ns) && (stackNode.getGroup() == MI_MO_MN_MS_MTEXT));
+ }
+
+ /**
+ *
+ * <p>
+ * C++ memory note: The return value must be released.
+ *
+ * @return
+ * @throws SAXException
+ * @throws StopSniffingException
+ */
+ public static String extractCharsetFromContent(String attributeValue
+ // CPPONLY: , TreeBuilder tb
+ ) {
+ // This is a bit ugly. Converting the string to char array in order to
+ // make the portability layer smaller.
+ int charsetState = CHARSET_INITIAL;
+ int start = -1;
+ int end = -1;
+ @Auto char[] buffer = Portability.newCharArrayFromString(attributeValue);
+
+ charsetloop: for (int i = 0; i < buffer.length; i++) {
+ char c = buffer[i];
+ switch (charsetState) {
+ case CHARSET_INITIAL:
+ switch (c) {
+ case 'c':
+ case 'C':
+ charsetState = CHARSET_C;
+ continue;
+ default:
+ continue;
+ }
+ case CHARSET_C:
+ switch (c) {
+ case 'h':
+ case 'H':
+ charsetState = CHARSET_H;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_H:
+ switch (c) {
+ case 'a':
+ case 'A':
+ charsetState = CHARSET_A;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_A:
+ switch (c) {
+ case 'r':
+ case 'R':
+ charsetState = CHARSET_R;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_R:
+ switch (c) {
+ case 's':
+ case 'S':
+ charsetState = CHARSET_S;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_S:
+ switch (c) {
+ case 'e':
+ case 'E':
+ charsetState = CHARSET_E;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_E:
+ switch (c) {
+ case 't':
+ case 'T':
+ charsetState = CHARSET_T;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_T:
+ switch (c) {
+ case '\t':
+ case '\n':
+ case '\u000C':
+ case '\r':
+ case ' ':
+ continue;
+ case '=':
+ charsetState = CHARSET_EQUALS;
+ continue;
+ default:
+ return null;
+ }
+ case CHARSET_EQUALS:
+ switch (c) {
+ case '\t':
+ case '\n':
+ case '\u000C':
+ case '\r':
+ case ' ':
+ continue;
+ case '\'':
+ start = i + 1;
+ charsetState = CHARSET_SINGLE_QUOTED;
+ continue;
+ case '\"':
+ start = i + 1;
+ charsetState = CHARSET_DOUBLE_QUOTED;
+ continue;
+ default:
+ start = i;
+ charsetState = CHARSET_UNQUOTED;
+ continue;
+ }
+ case CHARSET_SINGLE_QUOTED:
+ switch (c) {
+ case '\'':
+ end = i;
+ break charsetloop;
+ default:
+ continue;
+ }
+ case CHARSET_DOUBLE_QUOTED:
+ switch (c) {
+ case '\"':
+ end = i;
+ break charsetloop;
+ default:
+ continue;
+ }
+ case CHARSET_UNQUOTED:
+ switch (c) {
+ case '\t':
+ case '\n':
+ case '\u000C':
+ case '\r':
+ case ' ':
+ case ';':
+ end = i;
+ break charsetloop;
+ default:
+ continue;
+ }
+ }
+ }
+ String charset = null;
+ if (start != -1) {
+ if (end == -1) {
+ end = buffer.length;
+ }
+ charset = Portability.newStringFromBuffer(buffer, start, end
+ - start
+ // CPPONLY: , tb
+ );
+ }
+ return charset;
+ }
+
+ private void checkMetaCharset(HtmlAttributes attributes)
+ throws SAXException {
+ String charset = attributes.getValue(AttributeName.CHARSET);
+ if (charset != null) {
+ if (tokenizer.internalEncodingDeclaration(charset)) {
+ requestSuspension();
+ return;
+ }
+ return;
+ }
+ if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "content-type",
+ attributes.getValue(AttributeName.HTTP_EQUIV))) {
+ return;
+ }
+ String content = attributes.getValue(AttributeName.CONTENT);
+ if (content != null) {
+ String extract = TreeBuilder.extractCharsetFromContent(content
+ // CPPONLY: , this
+ );
+ // remember not to return early without releasing the string
+ if (extract != null) {
+ if (tokenizer.internalEncodingDeclaration(extract)) {
+ requestSuspension();
+ }
+ }
+ Portability.releaseString(extract);
+ }
+ }
+
+ public final void endTag(ElementName elementName) throws SAXException {
+ flushCharacters();
+ needToDropLF = false;
+ int eltPos;
+ int group = elementName.getGroup();
+ @Local String name = elementName.name;
+ endtagloop: for (;;) {
+ if (isInForeign()) {
+ if (stack[currentPtr].name != name) {
+ if (currentPtr == 0) {
+ errStrayEndTag(name);
+ } else {
+ errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName);
+ }
+ }
+ eltPos = currentPtr;
+ for (;;) {
+ if (eltPos == 0) {
+ assert fragment: "We can get this close to the root of the stack in foreign content only in the fragment case.";
+ break endtagloop;
+ }
+ if (stack[eltPos].name == name) {
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ }
+ if (stack[--eltPos].ns == "http://www.w3.org/1999/xhtml") {
+ break;
+ }
+ }
+ }
+ switch (mode) {
+ case IN_TEMPLATE:
+ switch (group) {
+ case TEMPLATE:
+ // fall through to IN_HEAD
+ break;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case IN_ROW:
+ switch (group) {
+ case TR:
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ break endtagloop;
+ case TABLE:
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ continue;
+ case TBODY_OR_THEAD_OR_TFOOT:
+ if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ continue;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TD_OR_TH:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE_BODY:
+ switch (group) {
+ case TBODY_OR_THEAD_OR_TFOOT:
+ eltPos = findLastOrRoot(name);
+ if (eltPos == 0) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE;
+ break endtagloop;
+ case TABLE:
+ eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot();
+ if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE;
+ continue;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TD_OR_TH:
+ case TR:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE:
+ switch (group) {
+ case TABLE:
+ eltPos = findLast("table");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ break endtagloop;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case TR:
+ errStrayEndTag(name);
+ break endtagloop;
+ case TEMPLATE:
+ // fall through to IN_HEAD
+ break;
+ default:
+ errStrayEndTag(name);
+ // fall through to IN_BODY
+ }
+ case IN_CAPTION:
+ switch (group) {
+ case CAPTION:
+ eltPos = findLastInTableScope("caption");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && currentPtr != eltPos) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_TABLE;
+ break endtagloop;
+ case TABLE:
+ errTableClosedWhileCaptionOpen();
+ eltPos = findLastInTableScope("caption");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && currentPtr != eltPos) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_TABLE;
+ continue;
+ case BODY:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case TR:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_BODY
+ }
+ case IN_CELL:
+ switch (group) {
+ case TD_OR_TH:
+ eltPos = findLastInTableScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_ROW;
+ break endtagloop;
+ case TABLE:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert name == "tbody" || name == "tfoot" || name == "thead" || fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ closeTheCell(findLastInTableScopeTdTh());
+ continue;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_BODY
+ }
+ case FRAMESET_OK:
+ case IN_BODY:
+ switch (group) {
+ case BODY:
+ if (!isSecondOnStackBody()) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ assert currentPtr >= 1;
+ if (errorHandler != null) {
+ uncloseloop1: for (int i = 2; i <= currentPtr; i++) {
+ switch (stack[i].getGroup()) {
+ case DD_OR_DT:
+ case LI:
+ case OPTGROUP:
+ case OPTION: // is this possible?
+ case P:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ case TD_OR_TH:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ break;
+ default:
+ errEndWithUnclosedElements(name);
+ break uncloseloop1;
+ }
+ }
+ }
+ mode = AFTER_BODY;
+ break endtagloop;
+ case HTML:
+ if (!isSecondOnStackBody()) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ if (errorHandler != null) {
+ uncloseloop2: for (int i = 0; i <= currentPtr; i++) {
+ switch (stack[i].getGroup()) {
+ case DD_OR_DT:
+ case LI:
+ case P:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case BODY:
+ case HTML:
+ break;
+ default:
+ errEndWithUnclosedElements(name);
+ break uncloseloop2;
+ }
+ }
+ }
+ mode = AFTER_BODY;
+ continue;
+ case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+ case UL_OR_OL_OR_DL:
+ case PRE_OR_LISTING:
+ case FIELDSET:
+ case BUTTON:
+ case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ } else {
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case FORM:
+ if (!isTemplateContents()) {
+ if (formPointer == null) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ formPointer = null;
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ removeFromStack(eltPos);
+ break endtagloop;
+ } else {
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ }
+ case P:
+ eltPos = findLastInButtonScope("p");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoElementToCloseButEndTagSeen("p");
+ // XXX Can the 'in foreign' case happen anymore?
+ if (isInForeign()) {
+ errHtmlStartTagInForeignContext(name);
+ // Check for currentPtr for the fragment
+ // case.
+ while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
+ pop();
+ }
+ }
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ break endtagloop;
+ }
+ generateImpliedEndTagsExceptFor("p");
+ assert eltPos != TreeBuilder.NOT_FOUND_ON_STACK;
+ if (errorHandler != null && eltPos != currentPtr) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ case LI:
+ eltPos = findLastInListScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoElementToCloseButEndTagSeen(name);
+ } else {
+ generateImpliedEndTagsExceptFor(name);
+ if (errorHandler != null
+ && eltPos != currentPtr) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case DD_OR_DT:
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoElementToCloseButEndTagSeen(name);
+ } else {
+ generateImpliedEndTagsExceptFor(name);
+ if (errorHandler != null
+ && eltPos != currentPtr) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+ eltPos = findLastInScopeHn();
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ } else {
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case OBJECT:
+ case MARQUEE_OR_APPLET:
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ } else {
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ }
+ break endtagloop;
+ case BR:
+ errEndTagBr();
+ if (isInForeign()) {
+ // XXX can this happen anymore?
+ errHtmlStartTagInForeignContext(name);
+ // Check for currentPtr for the fragment
+ // case.
+ while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
+ pop();
+ }
+ }
+ reconstructTheActiveFormattingElements();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ break endtagloop;
+ case TEMPLATE:
+ // fall through to IN_HEAD;
+ break;
+ case AREA_OR_WBR:
+ // CPPONLY: case MENUITEM:
+ case PARAM_OR_SOURCE_OR_TRACK:
+ case EMBED:
+ case IMG:
+ case IMAGE:
+ case INPUT:
+ case KEYGEN: // XXX??
+ case HR:
+ case ISINDEX:
+ case IFRAME:
+ case NOEMBED: // XXX???
+ case NOFRAMES: // XXX??
+ case SELECT:
+ case TABLE:
+ case TEXTAREA: // XXX??
+ errStrayEndTag(name);
+ break endtagloop;
+ case NOSCRIPT:
+ if (scriptingEnabled) {
+ errStrayEndTag(name);
+ break endtagloop;
+ } else {
+ // fall through
+ }
+ case A:
+ case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+ case FONT:
+ case NOBR:
+ if (adoptionAgencyEndTag(name)) {
+ break endtagloop;
+ }
+ // else handle like any other tag
+ default:
+ if (isCurrent(name)) {
+ pop();
+ break endtagloop;
+ }
+
+ eltPos = currentPtr;
+ for (;;) {
+ StackNode<T> node = stack[eltPos];
+ if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) {
+ generateImpliedEndTags();
+ if (errorHandler != null
+ && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ } else if (eltPos == 0 || node.isSpecial()) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ eltPos--;
+ }
+ }
+ case IN_HEAD:
+ switch (group) {
+ case HEAD:
+ pop();
+ mode = AFTER_HEAD;
+ break endtagloop;
+ case BR:
+ case HTML:
+ case BODY:
+ pop();
+ mode = AFTER_HEAD;
+ continue;
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case IN_HEAD_NOSCRIPT:
+ switch (group) {
+ case NOSCRIPT:
+ pop();
+ mode = IN_HEAD;
+ break endtagloop;
+ case BR:
+ errStrayEndTag(name);
+ pop();
+ mode = IN_HEAD;
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case IN_COLUMN_GROUP:
+ switch (group) {
+ case COLGROUP:
+ if (currentPtr == 0 || stack[currentPtr].getGroup() ==
+ TreeBuilder.TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errGarbageInColgroup();
+ break endtagloop;
+ }
+ pop();
+ mode = IN_TABLE;
+ break endtagloop;
+ case COL:
+ errStrayEndTag(name);
+ break endtagloop;
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ default:
+ if (currentPtr == 0 || stack[currentPtr].getGroup() ==
+ TreeBuilder.TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errGarbageInColgroup();
+ break endtagloop;
+ }
+ pop();
+ mode = IN_TABLE;
+ continue;
+ }
+ case IN_SELECT_IN_TABLE:
+ switch (group) {
+ case CAPTION:
+ case TABLE:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ errEndTagSeenWithSelectOpen(name);
+ if (findLastInTableScope(name) != TreeBuilder.NOT_FOUND_ON_STACK) {
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break endtagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue;
+ } else {
+ break endtagloop;
+ }
+ default:
+ // fall through to IN_SELECT
+ }
+ case IN_SELECT:
+ switch (group) {
+ case OPTION:
+ if (isCurrent("option")) {
+ pop();
+ break endtagloop;
+ } else {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case OPTGROUP:
+ if (isCurrent("option")
+ && "optgroup" == stack[currentPtr - 1].name) {
+ pop();
+ }
+ if (isCurrent("optgroup")) {
+ pop();
+ } else {
+ errStrayEndTag(name);
+ }
+ break endtagloop;
+ case SELECT:
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ break endtagloop;
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_BODY:
+ switch (group) {
+ case HTML:
+ if (fragment) {
+ errStrayEndTag(name);
+ break endtagloop;
+ } else {
+ mode = AFTER_AFTER_BODY;
+ break endtagloop;
+ }
+ default:
+ errEndTagAfterBody();
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ }
+ case IN_FRAMESET:
+ switch (group) {
+ case FRAMESET:
+ if (currentPtr == 0) {
+ assert fragment;
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ pop();
+ if ((!fragment) && !isCurrent("frameset")) {
+ mode = AFTER_FRAMESET;
+ }
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_FRAMESET:
+ switch (group) {
+ case HTML:
+ mode = AFTER_AFTER_FRAMESET;
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("End tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ // ]NOCPP]
+ errEndTagSeenWithoutDoctype();
+ // [NOCPP[
+ break;
+ case HTML401_STRICT:
+ err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
+ false);
+ /*
+ * Then, switch to the root element mode of the tree
+ * construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ continue;
+ case BEFORE_HTML:
+ switch (group) {
+ case HEAD:
+ case BR:
+ case HTML:
+ case BODY:
+ /*
+ * Create an HTMLElement node with the tag name
+ * html, in the HTML namespace. Append it to the
+ * Document object.
+ */
+ appendHtmlElementToDocumentAndPush();
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case BEFORE_HEAD:
+ switch (group) {
+ case HEAD:
+ case BR:
+ case HTML:
+ case BODY:
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_HEAD:
+ switch (group) {
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ case HTML:
+ case BODY:
+ case BR:
+ appendToCurrentNodeAndPushBodyElement();
+ mode = FRAMESET_OK;
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_AFTER_BODY:
+ errStrayEndTag(name);
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ case AFTER_AFTER_FRAMESET:
+ errStrayEndTag(name);
+ break endtagloop;
+ case TEXT:
+ // XXX need to manage insertion point here
+ pop();
+ if (originalMode == AFTER_HEAD) {
+ silentPop();
+ }
+ mode = originalMode;
+ break endtagloop;
+ }
+ } // endtagloop
+ }
+
+ private void endTagTemplateInHead() throws SAXException {
+ int eltPos = findLast("template");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag("template");
+ return;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent("template")) {
+ errUnclosedElements(eltPos, "template");
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ popTemplateMode();
+ resetTheInsertionMode();
+ }
+
+ private int findLastInTableScopeOrRootTemplateTbodyTheadTfoot() {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].getGroup() == TreeBuilder.TBODY_OR_THEAD_OR_TFOOT ||
+ stack[i].getGroup() == TreeBuilder.TEMPLATE) {
+ return i;
+ }
+ }
+ return 0;
+ }
+
+ private int findLast(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
+ return i;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInTableScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if (stack[i].name == name) {
+ return i;
+ } else if (stack[i].name == "table" || stack[i].name == "template") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInButtonScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if (stack[i].name == name) {
+ return i;
+ } else if (stack[i].name == "button") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+
+ if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
+ return i;
+ } else if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInListScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if (stack[i].name == name) {
+ return i;
+ } else if (stack[i].name == "ul" || stack[i].name == "ol") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+
+ if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInScopeHn() {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].getGroup() == TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) {
+ return i;
+ } else if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private void generateImpliedEndTagsExceptFor(@Local String name)
+ throws SAXException {
+ for (;;) {
+ StackNode<T> node = stack[currentPtr];
+ switch (node.getGroup()) {
+ case P:
+ case LI:
+ case DD_OR_DT:
+ case OPTION:
+ case OPTGROUP:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) {
+ return;
+ }
+ pop();
+ continue;
+ default:
+ return;
+ }
+ }
+ }
+
+ private void generateImpliedEndTags() throws SAXException {
+ for (;;) {
+ switch (stack[currentPtr].getGroup()) {
+ case P:
+ case LI:
+ case DD_OR_DT:
+ case OPTION:
+ case OPTGROUP:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ pop();
+ continue;
+ default:
+ return;
+ }
+ }
+ }
+
+ private boolean isSecondOnStackBody() {
+ return currentPtr >= 1 && stack[1].getGroup() == TreeBuilder.BODY;
+ }
+
+ private void documentModeInternal(DocumentMode m, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+
+ if (isSrcdocDocument) {
+ // Srcdoc documents are always rendered in standards mode.
+ quirks = false;
+ if (documentModeHandler != null) {
+ documentModeHandler.documentMode(
+ DocumentMode.STANDARDS_MODE
+ // [NOCPP[
+ , null, null, false
+ // ]NOCPP]
+ );
+ }
+ return;
+ }
+
+ quirks = (m == DocumentMode.QUIRKS_MODE);
+ if (documentModeHandler != null) {
+ documentModeHandler.documentMode(
+ m
+ // [NOCPP[
+ , publicIdentifier, systemIdentifier,
+ html4SpecificAdditionalErrorChecks
+ // ]NOCPP]
+ );
+ }
+ // [NOCPP[
+ documentMode(m, publicIdentifier, systemIdentifier,
+ html4SpecificAdditionalErrorChecks);
+ // ]NOCPP]
+ }
+
+ private boolean isAlmostStandards(String publicIdentifier,
+ String systemIdentifier) {
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd xhtml 1.0 transitional//en", publicIdentifier)) {
+ return true;
+ }
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd xhtml 1.0 frameset//en", publicIdentifier)) {
+ return true;
+ }
+ if (systemIdentifier != null) {
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) {
+ return true;
+ }
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean isQuirky(@Local String name, String publicIdentifier,
+ String systemIdentifier, boolean forceQuirks) {
+ if (forceQuirks) {
+ return true;
+ }
+ if (name != HTML_LOCAL) {
+ return true;
+ }
+ if (publicIdentifier != null) {
+ for (int i = 0; i < TreeBuilder.QUIRKY_PUBLIC_IDS.length; i++) {
+ if (Portability.lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(
+ TreeBuilder.QUIRKY_PUBLIC_IDS[i], publicIdentifier)) {
+ return true;
+ }
+ }
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3o//dtd w3 html strict 3.0//en//", publicIdentifier)
+ || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-/w3c/dtd html 4.0 transitional/en",
+ publicIdentifier)
+ || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "html", publicIdentifier)) {
+ return true;
+ }
+ }
+ if (systemIdentifier == null) {
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) {
+ return true;
+ } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) {
+ return true;
+ }
+ } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd",
+ systemIdentifier)) {
+ return true;
+ }
+ return false;
+ }
+
+ private void closeTheCell(int eltPos) throws SAXException {
+ generateImpliedEndTags();
+ if (errorHandler != null && eltPos != currentPtr) {
+ errUnclosedElementsCell(eltPos);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_ROW;
+ return;
+ }
+
+ private int findLastInTableScopeTdTh() {
+ for (int i = currentPtr; i > 0; i--) {
+ @Local String name = stack[i].name;
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if ("td" == name || "th" == name) {
+ return i;
+ } else if (name == "table" || name == "template") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private void clearStackBackTo(int eltPos) throws SAXException {
+ int eltGroup = stack[eltPos].getGroup();
+ while (currentPtr > eltPos) { // > not >= intentional
+ if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml"
+ && stack[currentPtr].getGroup() == TEMPLATE
+ && (eltGroup == TABLE || eltGroup == TBODY_OR_THEAD_OR_TFOOT|| eltGroup == TR || eltPos == 0)) {
+ return;
+ }
+ pop();
+ }
+ }
+
+ private void resetTheInsertionMode() {
+ StackNode<T> node;
+ @Local String name;
+ @NsUri String ns;
+ for (int i = currentPtr; i >= 0; i--) {
+ node = stack[i];
+ name = node.name;
+ ns = node.ns;
+ if (i == 0) {
+ if (!(contextNamespace == "http://www.w3.org/1999/xhtml" && (contextName == "td" || contextName == "th"))) {
+ if (fragment) {
+ // Make sure we are parsing a fragment otherwise the context element doesn't make sense.
+ name = contextName;
+ ns = contextNamespace;
+ }
+ } else {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY; // XXX from Hixie's email
+ return;
+ }
+ }
+ if ("select" == name) {
+ int ancestorIndex = i;
+ while (ancestorIndex > 0) {
+ StackNode<T> ancestor = stack[ancestorIndex--];
+ if ("http://www.w3.org/1999/xhtml" == ancestor.ns) {
+ if ("template" == ancestor.name) {
+ break;
+ }
+ if ("table" == ancestor.name) {
+ mode = IN_SELECT_IN_TABLE;
+ return;
+ }
+ }
+ }
+ mode = IN_SELECT;
+ return;
+ } else if ("td" == name || "th" == name) {
+ mode = IN_CELL;
+ return;
+ } else if ("tr" == name) {
+ mode = IN_ROW;
+ return;
+ } else if ("tbody" == name || "thead" == name || "tfoot" == name) {
+ mode = IN_TABLE_BODY;
+ return;
+ } else if ("caption" == name) {
+ mode = IN_CAPTION;
+ return;
+ } else if ("colgroup" == name) {
+ mode = IN_COLUMN_GROUP;
+ return;
+ } else if ("table" == name) {
+ mode = IN_TABLE;
+ return;
+ } else if ("http://www.w3.org/1999/xhtml" != ns) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ return;
+ } else if ("template" == name) {
+ assert templateModePtr >= 0;
+ mode = templateModeStack[templateModePtr];
+ return;
+ } else if ("head" == name) {
+ if (name == contextName) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY; // really
+ } else {
+ mode = IN_HEAD;
+ }
+ return;
+ } else if ("body" == name) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ return;
+ } else if ("frameset" == name) {
+ // TODO: Fragment case. Add error reporting.
+ mode = IN_FRAMESET;
+ return;
+ } else if ("html" == name) {
+ if (headPointer == null) {
+ // TODO: Fragment case. Add error reporting.
+ mode = BEFORE_HEAD;
+ } else {
+ mode = AFTER_HEAD;
+ }
+ return;
+ } else if (i == 0) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ return;
+ }
+ }
+ }
+
+ /**
+ * @throws SAXException
+ *
+ */
+ private void implicitlyCloseP() throws SAXException {
+ int eltPos = findLastInButtonScope("p");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ return;
+ }
+ generateImpliedEndTagsExceptFor("p");
+ if (errorHandler != null && eltPos != currentPtr) {
+ errUnclosedElementsImplied(eltPos, "p");
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+
+ private boolean debugOnlyClearLastStackSlot() {
+ stack[currentPtr] = null;
+ return true;
+ }
+
+ private boolean debugOnlyClearLastListSlot() {
+ listOfActiveFormattingElements[listPtr] = null;
+ return true;
+ }
+
+ private void pushTemplateMode(int mode) {
+ templateModePtr++;
+ if (templateModePtr == templateModeStack.length) {
+ int[] newStack = new int[templateModeStack.length + 64];
+ System.arraycopy(templateModeStack, 0, newStack, 0, templateModeStack.length);
+ templateModeStack = newStack;
+ }
+ templateModeStack[templateModePtr] = mode;
+ }
+
+ @SuppressWarnings("unchecked") private void push(StackNode<T> node) throws SAXException {
+ currentPtr++;
+ if (currentPtr == stack.length) {
+ StackNode<T>[] newStack = new StackNode[stack.length + 64];
+ System.arraycopy(stack, 0, newStack, 0, stack.length);
+ stack = newStack;
+ }
+ stack[currentPtr] = node;
+ elementPushed(node.ns, node.popName, node.node);
+ }
+
+ @SuppressWarnings("unchecked") private void silentPush(StackNode<T> node) throws SAXException {
+ currentPtr++;
+ if (currentPtr == stack.length) {
+ StackNode<T>[] newStack = new StackNode[stack.length + 64];
+ System.arraycopy(stack, 0, newStack, 0, stack.length);
+ stack = newStack;
+ }
+ stack[currentPtr] = node;
+ }
+
+ @SuppressWarnings("unchecked") private void append(StackNode<T> node) {
+ listPtr++;
+ if (listPtr == listOfActiveFormattingElements.length) {
+ StackNode<T>[] newList = new StackNode[listOfActiveFormattingElements.length + 64];
+ System.arraycopy(listOfActiveFormattingElements, 0, newList, 0,
+ listOfActiveFormattingElements.length);
+ listOfActiveFormattingElements = newList;
+ }
+ listOfActiveFormattingElements[listPtr] = node;
+ }
+
+ @Inline private void insertMarker() {
+ append(null);
+ }
+
+ private void clearTheListOfActiveFormattingElementsUpToTheLastMarker() {
+ while (listPtr > -1) {
+ if (listOfActiveFormattingElements[listPtr] == null) {
+ --listPtr;
+ return;
+ }
+ listOfActiveFormattingElements[listPtr].release();
+ --listPtr;
+ }
+ }
+
+ @Inline private boolean isCurrent(@Local String name) {
+ return stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" &&
+ name == stack[currentPtr].name;
+ }
+
+ private void removeFromStack(int pos) throws SAXException {
+ if (currentPtr == pos) {
+ pop();
+ } else {
+ fatal();
+ stack[pos].release();
+ System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ }
+ }
+
+ private void removeFromStack(StackNode<T> node) throws SAXException {
+ if (stack[currentPtr] == node) {
+ pop();
+ } else {
+ int pos = currentPtr - 1;
+ while (pos >= 0 && stack[pos] != node) {
+ pos--;
+ }
+ if (pos == -1) {
+ // dead code?
+ return;
+ }
+ fatal();
+ node.release();
+ System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
+ currentPtr--;
+ }
+ }
+
+ private void removeFromListOfActiveFormattingElements(int pos) {
+ assert listOfActiveFormattingElements[pos] != null;
+ listOfActiveFormattingElements[pos].release();
+ if (pos == listPtr) {
+ assert debugOnlyClearLastListSlot();
+ listPtr--;
+ return;
+ }
+ assert pos < listPtr;
+ System.arraycopy(listOfActiveFormattingElements, pos + 1,
+ listOfActiveFormattingElements, pos, listPtr - pos);
+ assert debugOnlyClearLastListSlot();
+ listPtr--;
+ }
+
+ /**
+ * Adoption agency algorithm.
+ *
+ * @param name subject as described in the specified algorithm.
+ * @return Returns true if the algorithm has completed and there is nothing remaining to
+ * be done. Returns false if the algorithm needs to "act as described in the 'any other
+ * end tag' entry" as described in the specified algorithm.
+ * @throws SAXException
+ */
+ private boolean adoptionAgencyEndTag(@Local String name) throws SAXException {
+ // This check intends to ensure that for properly nested tags, closing tags will match
+ // against the stack instead of the listOfActiveFormattingElements.
+ if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" &&
+ stack[currentPtr].name == name &&
+ findInListOfActiveFormattingElements(stack[currentPtr]) == -1) {
+ // If the current element matches the name but isn't on the list of active
+ // formatting elements, then it is possible that the list was mangled by the Noah's Ark
+ // clause. In this case, we want to match the end tag against the stack instead of
+ // proceeding with the AAA algorithm that may match against the list of
+ // active formatting elements (and possibly mangle the tree in unexpected ways).
+ pop();
+ return true;
+ }
+
+ // If you crash around here, perhaps some stack node variable claimed to
+ // be a weak ref isn't.
+ for (int i = 0; i < 8; ++i) {
+ int formattingEltListPos = listPtr;
+ while (formattingEltListPos > -1) {
+ StackNode<T> listNode = listOfActiveFormattingElements[formattingEltListPos]; // weak ref
+ if (listNode == null) {
+ formattingEltListPos = -1;
+ break;
+ } else if (listNode.name == name) {
+ break;
+ }
+ formattingEltListPos--;
+ }
+ if (formattingEltListPos == -1) {
+ return false;
+ }
+ // this *looks* like a weak ref to the list of formatting elements
+ StackNode<T> formattingElt = listOfActiveFormattingElements[formattingEltListPos];
+ int formattingEltStackPos = currentPtr;
+ boolean inScope = true;
+ while (formattingEltStackPos > -1) {
+ StackNode<T> node = stack[formattingEltStackPos]; // weak ref
+ if (node == formattingElt) {
+ break;
+ } else if (node.isScoping()) {
+ inScope = false;
+ }
+ formattingEltStackPos--;
+ }
+ if (formattingEltStackPos == -1) {
+ errNoElementToCloseButEndTagSeen(name);
+ removeFromListOfActiveFormattingElements(formattingEltListPos);
+ return true;
+ }
+ if (!inScope) {
+ errNoElementToCloseButEndTagSeen(name);
+ return true;
+ }
+ // stackPos now points to the formatting element and it is in scope
+ if (formattingEltStackPos != currentPtr) {
+ errEndTagViolatesNestingRules(name);
+ }
+ int furthestBlockPos = formattingEltStackPos + 1;
+ while (furthestBlockPos <= currentPtr) {
+ StackNode<T> node = stack[furthestBlockPos]; // weak ref
+ assert furthestBlockPos > 0: "How is formattingEltStackPos + 1 not > 0?";
+ if (node.isSpecial()) {
+ break;
+ }
+ furthestBlockPos++;
+ }
+ if (furthestBlockPos > currentPtr) {
+ // no furthest block
+ while (currentPtr >= formattingEltStackPos) {
+ pop();
+ }
+ removeFromListOfActiveFormattingElements(formattingEltListPos);
+ return true;
+ }
+ StackNode<T> commonAncestor = stack[formattingEltStackPos - 1]; // weak ref
+ StackNode<T> furthestBlock = stack[furthestBlockPos]; // weak ref
+ // detachFromParent(furthestBlock.node); XXX AAA CHANGE
+ int bookmark = formattingEltListPos;
+ int nodePos = furthestBlockPos;
+ StackNode<T> lastNode = furthestBlock; // weak ref
+ int j = 0;
+ for (;;) {
+ ++j;
+ nodePos--;
+ if (nodePos == formattingEltStackPos) {
+ break;
+ }
+ StackNode<T> node = stack[nodePos]; // weak ref
+ int nodeListPos = findInListOfActiveFormattingElements(node);
+
+ if (j > 3 && nodeListPos != -1) {
+ removeFromListOfActiveFormattingElements(nodeListPos);
+
+ // Adjust the indices into the list to account
+ // for the removal of nodeListPos.
+ if (nodeListPos <= formattingEltListPos) {
+ formattingEltListPos--;
+ }
+ if (nodeListPos <= bookmark) {
+ bookmark--;
+ }
+
+ // Update position to reflect removal from list.
+ nodeListPos = -1;
+ }
+
+ if (nodeListPos == -1) {
+ assert formattingEltStackPos < nodePos;
+ assert bookmark < nodePos;
+ assert furthestBlockPos > nodePos;
+ removeFromStack(nodePos); // node is now a bad pointer in C++
+ furthestBlockPos--;
+ continue;
+ }
+ // now node is both on stack and in the list
+ if (nodePos == furthestBlockPos) {
+ bookmark = nodeListPos + 1;
+ }
+ // if (hasChildren(node.node)) { XXX AAA CHANGE
+ assert node == listOfActiveFormattingElements[nodeListPos];
+ assert node == stack[nodePos];
+ T clone = createElement("http://www.w3.org/1999/xhtml",
+ node.name, node.attributes.cloneAttributes(null), commonAncestor.node);
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ node.name, clone, node.popName, node.attributes
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ ); // creation ownership goes to stack
+ node.dropAttributes(); // adopt ownership to newNode
+ stack[nodePos] = newNode;
+ newNode.retain(); // retain for list
+ listOfActiveFormattingElements[nodeListPos] = newNode;
+ node.release(); // release from stack
+ node.release(); // release from list
+ node = newNode;
+ // } XXX AAA CHANGE
+ detachFromParent(lastNode.node);
+ appendElement(lastNode.node, node.node);
+ lastNode = node;
+ }
+ if (commonAncestor.isFosterParenting()) {
+ fatal();
+ detachFromParent(lastNode.node);
+ insertIntoFosterParent(lastNode.node);
+ } else {
+ detachFromParent(lastNode.node);
+ appendElement(lastNode.node, commonAncestor.node);
+ }
+ T clone = createElement("http://www.w3.org/1999/xhtml",
+ formattingElt.name,
+ formattingElt.attributes.cloneAttributes(null), furthestBlock.node);
+ StackNode<T> formattingClone = new StackNode<T>(
+ formattingElt.getFlags(), formattingElt.ns,
+ formattingElt.name, clone, formattingElt.popName,
+ formattingElt.attributes
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ ); // Ownership transfers to stack below
+ formattingElt.dropAttributes(); // transfer ownership to
+ // formattingClone
+ appendChildrenToNewParent(furthestBlock.node, clone);
+ appendElement(clone, furthestBlock.node);
+ removeFromListOfActiveFormattingElements(formattingEltListPos);
+ insertIntoListOfActiveFormattingElements(formattingClone, bookmark);
+ assert formattingEltStackPos < furthestBlockPos;
+ removeFromStack(formattingEltStackPos);
+ // furthestBlockPos is now off by one and points to the slot after
+ // it
+ insertIntoStack(formattingClone, furthestBlockPos);
+ }
+ return true;
+ }
+
+ private void insertIntoStack(StackNode<T> node, int position)
+ throws SAXException {
+ assert currentPtr + 1 < stack.length;
+ assert position <= currentPtr + 1;
+ if (position == currentPtr + 1) {
+ push(node);
+ } else {
+ System.arraycopy(stack, position, stack, position + 1,
+ (currentPtr - position) + 1);
+ currentPtr++;
+ stack[position] = node;
+ }
+ }
+
+ private void insertIntoListOfActiveFormattingElements(
+ StackNode<T> formattingClone, int bookmark) {
+ formattingClone.retain();
+ assert listPtr + 1 < listOfActiveFormattingElements.length;
+ if (bookmark <= listPtr) {
+ System.arraycopy(listOfActiveFormattingElements, bookmark,
+ listOfActiveFormattingElements, bookmark + 1,
+ (listPtr - bookmark) + 1);
+ }
+ listPtr++;
+ listOfActiveFormattingElements[bookmark] = formattingClone;
+ }
+
+ private int findInListOfActiveFormattingElements(StackNode<T> node) {
+ for (int i = listPtr; i >= 0; i--) {
+ if (node == listOfActiveFormattingElements[i]) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ private int findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker(
+ @Local String name) {
+ for (int i = listPtr; i >= 0; i--) {
+ StackNode<T> node = listOfActiveFormattingElements[i];
+ if (node == null) {
+ return -1;
+ } else if (node.name == name) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+
+ private void maybeForgetEarlierDuplicateFormattingElement(
+ @Local String name, HtmlAttributes attributes) throws SAXException {
+ int candidate = -1;
+ int count = 0;
+ for (int i = listPtr; i >= 0; i--) {
+ StackNode<T> node = listOfActiveFormattingElements[i];
+ if (node == null) {
+ break;
+ }
+ if (node.name == name && node.attributes.equalsAnother(attributes)) {
+ candidate = i;
+ ++count;
+ }
+ }
+ if (count >= 3) {
+ removeFromListOfActiveFormattingElements(candidate);
+ }
+ }
+
+ private int findLastOrRoot(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
+ return i;
+ }
+ }
+ return 0;
+ }
+
+ private int findLastOrRoot(int group) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].getGroup() == group) {
+ return i;
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Attempt to add attribute to the body element.
+ * @param attributes the attributes
+ * @return <code>true</code> iff the attributes were added
+ * @throws SAXException
+ */
+ private boolean addAttributesToBody(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ if (currentPtr >= 1) {
+ StackNode<T> body = stack[1];
+ if (body.getGroup() == TreeBuilder.BODY) {
+ addAttributesToElement(body.node, attributes);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void addAttributesToHtml(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ addAttributesToElement(stack[0].node, attributes);
+ }
+
+ private void pushHeadPointerOntoStack() throws SAXException {
+ assert headPointer != null;
+ assert mode == AFTER_HEAD;
+ fatal();
+ silentPush(new StackNode<T>(ElementName.HEAD, headPointer
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ ));
+ }
+
+ /**
+ * @throws SAXException
+ *
+ */
+ private void reconstructTheActiveFormattingElements() throws SAXException {
+ if (listPtr == -1) {
+ return;
+ }
+ StackNode<T> mostRecent = listOfActiveFormattingElements[listPtr];
+ if (mostRecent == null || isInStack(mostRecent)) {
+ return;
+ }
+ int entryPos = listPtr;
+ for (;;) {
+ entryPos--;
+ if (entryPos == -1) {
+ break;
+ }
+ if (listOfActiveFormattingElements[entryPos] == null) {
+ break;
+ }
+ if (isInStack(listOfActiveFormattingElements[entryPos])) {
+ break;
+ }
+ }
+ while (entryPos < listPtr) {
+ entryPos++;
+ StackNode<T> entry = listOfActiveFormattingElements[entryPos];
+ StackNode<T> currentNode = stack[currentPtr];
+
+ T clone;
+ if (currentNode.isFosterParenting()) {
+ clone = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", entry.name,
+ entry.attributes.cloneAttributes(null));
+ } else {
+ clone = createElement("http://www.w3.org/1999/xhtml", entry.name,
+ entry.attributes.cloneAttributes(null), currentNode.node);
+ appendElement(clone, currentNode.node);
+ }
+
+ StackNode<T> entryClone = new StackNode<T>(entry.getFlags(),
+ entry.ns, entry.name, clone, entry.popName,
+ entry.attributes
+ // [NOCPP[
+ , entry.getLocator()
+ // ]NOCPP]
+ );
+
+ entry.dropAttributes(); // transfer ownership to entryClone
+
+ push(entryClone);
+ // stack takes ownership of the local variable
+ listOfActiveFormattingElements[entryPos] = entryClone;
+ // overwriting the old entry on the list, so release & retain
+ entry.release();
+ entryClone.retain();
+ }
+ }
+
+ private void insertIntoFosterParent(T child) throws SAXException {
+ int tablePos = findLastOrRoot(TreeBuilder.TABLE);
+ int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
+
+ if (templatePos >= tablePos) {
+ appendElement(child, stack[templatePos].node);
+ return;
+ }
+
+ StackNode<T> node = stack[tablePos];
+ insertFosterParentedChild(child, node.node, stack[tablePos - 1].node);
+ }
+
+ private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes) throws SAXException {
+ return createAndInsertFosterParentedElement(ns, name, attributes, null);
+ }
+
+ private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T form) throws SAXException {
+ int tablePos = findLastOrRoot(TreeBuilder.TABLE);
+ int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
+
+ if (templatePos >= tablePos) {
+ T child = createElement(ns, name, attributes, form, stack[templatePos].node);
+ appendElement(child, stack[templatePos].node);
+ return child;
+ }
+
+ StackNode<T> node = stack[tablePos];
+ return createAndInsertFosterParentedElement(ns, name, attributes, form, node.node, stack[tablePos - 1].node);
+ }
+
+ private boolean isInStack(StackNode<T> node) {
+ for (int i = currentPtr; i >= 0; i--) {
+ if (stack[i] == node) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void popTemplateMode() {
+ templateModePtr--;
+ }
+
+ private void pop() throws SAXException {
+ StackNode<T> node = stack[currentPtr];
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ elementPopped(node.ns, node.popName, node.node);
+ node.release();
+ }
+
+ private void silentPop() throws SAXException {
+ StackNode<T> node = stack[currentPtr];
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ node.release();
+ }
+
+ private void popOnEof() throws SAXException {
+ StackNode<T> node = stack[currentPtr];
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ markMalformedIfScript(node.node);
+ elementPopped(node.ns, node.popName, node.node);
+ node.release();
+ }
+
+ // [NOCPP[
+ private void checkAttributes(HtmlAttributes attributes, @NsUri String ns)
+ throws SAXException {
+ if (errorHandler != null) {
+ int len = attributes.getXmlnsLength();
+ for (int i = 0; i < len; i++) {
+ AttributeName name = attributes.getXmlnsAttributeName(i);
+ if (name == AttributeName.XMLNS) {
+ if (html4) {
+ err("Attribute \u201Cxmlns\u201D not allowed here. (HTML4-only error.)");
+ } else {
+ String xmlns = attributes.getXmlnsValue(i);
+ if (!ns.equals(xmlns)) {
+ err("Bad value \u201C"
+ + xmlns
+ + "\u201D for the attribute \u201Cxmlns\u201D (only \u201C"
+ + ns + "\u201D permitted here).");
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ // fall through
+ case ALLOW:
+ warn("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0.");
+ break;
+ case FATAL:
+ fatal("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0.");
+ break;
+ }
+ }
+ }
+ } else if (ns != "http://www.w3.org/1999/xhtml"
+ && name == AttributeName.XMLNS_XLINK) {
+ String xmlns = attributes.getXmlnsValue(i);
+ if (!"http://www.w3.org/1999/xlink".equals(xmlns)) {
+ err("Bad value \u201C"
+ + xmlns
+ + "\u201D for the attribute \u201Cxmlns:link\u201D (only \u201Chttp://www.w3.org/1999/xlink\u201D permitted here).");
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ // fall through
+ case ALLOW:
+ warn("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
+ break;
+ case FATAL:
+ fatal("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
+ break;
+ }
+ }
+ } else {
+ err("Attribute \u201C" + attributes.getXmlnsLocalName(i)
+ + "\u201D not allowed here.");
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ // fall through
+ case ALLOW:
+ warn("Attribute with the local name \u201C"
+ + attributes.getXmlnsLocalName(i)
+ + "\u201D is not serializable as XML 1.0.");
+ break;
+ case FATAL:
+ fatal("Attribute with the local name \u201C"
+ + attributes.getXmlnsLocalName(i)
+ + "\u201D is not serializable as XML 1.0.");
+ break;
+ }
+ }
+ }
+ }
+ attributes.processNonNcNames(this, namePolicy);
+ }
+
+ private String checkPopName(@Local String name) throws SAXException {
+ if (NCName.isNCName(name)) {
+ return name;
+ } else {
+ switch (namePolicy) {
+ case ALLOW:
+ warn("Element name \u201C" + name
+ + "\u201D cannot be represented as XML 1.0.");
+ return name;
+ case ALTER_INFOSET:
+ warn("Element name \u201C" + name
+ + "\u201D cannot be represented as XML 1.0.");
+ return NCName.escapeName(name);
+ case FATAL:
+ fatal("Element name \u201C" + name
+ + "\u201D cannot be represented as XML 1.0.");
+ }
+ }
+ return null; // keep compiler happy
+ }
+
+ // ]NOCPP]
+
+ private void appendHtmlElementToDocumentAndPush(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ T elt = createHtmlElementSetAsRoot(attributes);
+ StackNode<T> node = new StackNode<T>(ElementName.HTML,
+ elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendHtmlElementToDocumentAndPush() throws SAXException {
+ appendHtmlElementToDocumentAndPush(tokenizer.emptyAttributes());
+ }
+
+ private void appendToCurrentNodeAndPushHeadElement(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", "head", attributes, currentNode);
+ appendElement(elt, currentNode);
+ headPointer = elt;
+ StackNode<T> node = new StackNode<T>(ElementName.HEAD,
+ elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushBodyElement(HtmlAttributes attributes)
+ throws SAXException {
+ appendToCurrentNodeAndPushElement(ElementName.BODY,
+ attributes);
+ }
+
+ private void appendToCurrentNodeAndPushBodyElement() throws SAXException {
+ appendToCurrentNodeAndPushBodyElement(tokenizer.emptyAttributes());
+ }
+
+ private void appendToCurrentNodeAndPushFormElementMayFoster(
+ HtmlAttributes attributes) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", "form", attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", "form", attributes, current.node);
+ appendElement(elt, current.node);
+ }
+
+ if (!isTemplateContents()) {
+ formPointer = elt;
+ }
+
+ StackNode<T> node = new StackNode<T>(ElementName.FORM,
+ elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushFormattingElementMayFoster(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // This method can't be called for custom elements
+ HtmlAttributes clone = attributes.cloneAttributes(null);
+ // Attributes must not be read after calling createElement, because
+ // createElement may delete attributes in C++.
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt, clone
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ append(node);
+ node.retain(); // append doesn't retain itself
+ }
+
+ private void appendToCurrentNodeAndPushElement(ElementName elementName,
+ HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // This method can't be called for custom elements
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, currentNode);
+ appendElement(elt, currentNode);
+ if (ElementName.TEMPLATE == elementName) {
+ elt = getDocumentFragmentForTemplate(elt);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName,
+ HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt, popName
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFosterMathML(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ boolean markAsHtmlIntegrationPoint = false;
+ if (ElementName.ANNOTATION_XML == elementName
+ && annotationXmlEncodingPermitsHtml(attributes)) {
+ markAsHtmlIntegrationPoint = true;
+ }
+ // Attributes must not be read after calling createElement(), since
+ // createElement may delete the object in C++.
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt, popName,
+ markAsHtmlIntegrationPoint
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ // [NOCPP[
+ T getDocumentFragmentForTemplate(T template) {
+ return template;
+ }
+
+ T getFormPointerForContext(T context) {
+ return null;
+ }
+ // ]NOCPP]
+
+ private boolean annotationXmlEncodingPermitsHtml(HtmlAttributes attributes) {
+ String encoding = attributes.getValue(AttributeName.ENCODING);
+ if (encoding == null) {
+ return false;
+ }
+ return Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "application/xhtml+xml", encoding)
+ || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "text/html", encoding);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFosterSVG(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.camelCaseName;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/2000/svg");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, popName, elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName,
+ HtmlAttributes attributes, T form)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // Can't be called for custom elements
+ T elt;
+ T formOwner = form == null || fragment || isTemplateContents() ? null : form;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name,
+ attributes, formOwner);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", elementName.name,
+ attributes, formOwner, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendVoidElementToCurrentMayFoster(
+ @Local String name, HtmlAttributes attributes, T form) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // Can't be called for custom elements
+ T elt;
+ T formOwner = form == null || fragment || isTemplateContents() ? null : form;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", name,
+ attributes, formOwner);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", name,
+ attributes, formOwner, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/1999/xhtml", name, elt);
+ elementPopped("http://www.w3.org/1999/xhtml", name, elt);
+ }
+
+ private void appendVoidElementToCurrentMayFoster(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/1999/xhtml", popName, elt);
+ elementPopped("http://www.w3.org/1999/xhtml", popName, elt);
+ }
+
+ private void appendVoidElementToCurrentMayFosterSVG(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.camelCaseName;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/2000/svg");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/2000/svg", popName, elt);
+ elementPopped("http://www.w3.org/2000/svg", popName, elt);
+ }
+
+ private void appendVoidElementToCurrentMayFosterMathML(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/1998/Math/MathML", popName, elt);
+ elementPopped("http://www.w3.org/1998/Math/MathML", popName, elt);
+ }
+
+ private void appendVoidElementToCurrent(
+ @Local String name, HtmlAttributes attributes, T form) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // Can't be called for custom elements
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", name, attributes,
+ form == null || fragment || isTemplateContents() ? null : form, currentNode);
+ appendElement(elt, currentNode);
+ elementPushed("http://www.w3.org/1999/xhtml", name, elt);
+ elementPopped("http://www.w3.org/1999/xhtml", name, elt);
+ }
+
+ private void appendVoidFormToCurrent(HtmlAttributes attributes) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", "form",
+ attributes, currentNode);
+ formPointer = elt;
+ // ownership transferred to form pointer
+ appendElement(elt, currentNode);
+ elementPushed("http://www.w3.org/1999/xhtml", "form", elt);
+ elementPopped("http://www.w3.org/1999/xhtml", "form", elt);
+ }
+
+ // [NOCPP[
+
+ private final void accumulateCharactersForced(@Const @NoLength char[] buf,
+ int start, int length) throws SAXException {
+ System.arraycopy(buf, start, charBuffer, charBufferLen, length);
+ charBufferLen += length;
+ }
+
+ @Override public void ensureBufferSpace(int inputLength)
+ throws SAXException {
+ // TODO: Unify Tokenizer.strBuf and TreeBuilder.charBuffer so that
+ // this method becomes unnecessary.
+ int worstCase = charBufferLen + inputLength;
+ if (charBuffer == null) {
+ // Add an arbitrary small value to avoid immediate reallocation
+ // once there are a few characters in the buffer.
+ charBuffer = new char[worstCase + 128];
+ } else if (worstCase > charBuffer.length) {
+ // HotSpot reportedly allocates memory with 8-byte accuracy, so
+ // there's no point in trying to do math here to avoid slop.
+ // Maybe we should add some small constant to worstCase here
+ // but not doing that without profiling. In C++ with jemalloc,
+ // the corresponding method should do math to round up here
+ // to avoid slop.
+ char[] newBuf = new char[worstCase];
+ System.arraycopy(charBuffer, 0, newBuf, 0, charBufferLen);
+ charBuffer = newBuf;
+ }
+ }
+
+ // ]NOCPP]
+
+ protected void accumulateCharacters(@Const @NoLength char[] buf, int start,
+ int length) throws SAXException {
+ appendCharacters(stack[currentPtr].node, buf, start, length);
+ }
+
+ // ------------------------------- //
+
+ protected final void requestSuspension() {
+ tokenizer.requestSuspension();
+ }
+
+ protected abstract T createElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T intendedParent) throws SAXException;
+
+ protected T createElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T form, T intendedParent) throws SAXException {
+ return createElement("http://www.w3.org/1999/xhtml", name, attributes, intendedParent);
+ }
+
+ protected abstract T createHtmlElementSetAsRoot(HtmlAttributes attributes)
+ throws SAXException;
+
+ protected abstract void detachFromParent(T element) throws SAXException;
+
+ protected abstract boolean hasChildren(T element) throws SAXException;
+
+ protected abstract void appendElement(T child, T newParent)
+ throws SAXException;
+
+ protected abstract void appendChildrenToNewParent(T oldParent, T newParent)
+ throws SAXException;
+
+ protected abstract void insertFosterParentedChild(T child, T table,
+ T stackParent) throws SAXException;
+
+ // We don't generate CPP code for this method because it is not used in generated CPP
+ // code. Instead, the form owner version of this method is called with a null form owner.
+ // [NOCPP[
+
+ protected abstract T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T table, T stackParent) throws SAXException;
+
+ // ]NOCPP]
+
+ protected T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T form, T table, T stackParent) throws SAXException {
+ return createAndInsertFosterParentedElement(ns, name, attributes, table, stackParent);
+ };
+
+ protected abstract void insertFosterParentedCharacters(
+ @NoLength char[] buf, int start, int length, T table, T stackParent)
+ throws SAXException;
+
+ protected abstract void appendCharacters(T parent, @NoLength char[] buf,
+ int start, int length) throws SAXException;
+
+ protected abstract void appendIsindexPrompt(T parent) throws SAXException;
+
+ protected abstract void appendComment(T parent, @NoLength char[] buf,
+ int start, int length) throws SAXException;
+
+ protected abstract void appendCommentToDocument(@NoLength char[] buf,
+ int start, int length) throws SAXException;
+
+ protected abstract void addAttributesToElement(T element,
+ HtmlAttributes attributes) throws SAXException;
+
+ protected void markMalformedIfScript(T elt) throws SAXException {
+
+ }
+
+ protected void start(boolean fragmentMode) throws SAXException {
+
+ }
+
+ protected void end() throws SAXException {
+
+ }
+
+ protected void appendDoctypeToDocument(@Local String name,
+ String publicIdentifier, String systemIdentifier)
+ throws SAXException {
+
+ }
+
+ protected void elementPushed(@NsUri String ns, @Local String name, T node)
+ throws SAXException {
+
+ }
+
+ protected void elementPopped(@NsUri String ns, @Local String name, T node)
+ throws SAXException {
+
+ }
+
+ // [NOCPP[
+
+ protected void documentMode(DocumentMode m, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#wantsComments()
+ */
+ public boolean wantsComments() {
+ return wantingComments;
+ }
+
+ public void setIgnoringComments(boolean ignoreComments) {
+ wantingComments = !ignoreComments;
+ }
+
+ /**
+ * Sets the errorHandler.
+ *
+ * @param errorHandler
+ * the errorHandler to set
+ */
+ public final void setErrorHandler(ErrorHandler errorHandler) {
+ this.errorHandler = errorHandler;
+ }
+
+ /**
+ * Returns the errorHandler.
+ *
+ * @return the errorHandler
+ */
+ public ErrorHandler getErrorHandler() {
+ return errorHandler;
+ }
+
+ /**
+ * The argument MUST be an interned string or <code>null</code>.
+ *
+ * @param context
+ */
+ public final void setFragmentContext(@Local String context) {
+ this.contextName = context;
+ this.contextNamespace = "http://www.w3.org/1999/xhtml";
+ this.contextNode = null;
+ this.fragment = (contextName != null);
+ this.quirks = false;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#cdataSectionAllowed()
+ */
+ @Inline public boolean cdataSectionAllowed() throws SAXException {
+ return isInForeign();
+ }
+
+ private boolean isInForeign() {
+ return currentPtr >= 0
+ && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml";
+ }
+
+ private boolean isInForeignButNotHtmlOrMathTextIntegrationPoint() {
+ if (currentPtr < 0) {
+ return false;
+ }
+ return !isSpecialParentInForeign(stack[currentPtr]);
+ }
+
+ /**
+ * The argument MUST be an interned string or <code>null</code>.
+ *
+ * @param context
+ */
+ public final void setFragmentContext(@Local String context,
+ @NsUri String ns, T node, boolean quirks) {
+ // [NOCPP[
+ if (!((context == null && ns == null)
+ || "http://www.w3.org/1999/xhtml" == ns
+ || "http://www.w3.org/2000/svg" == ns || "http://www.w3.org/1998/Math/MathML" == ns)) {
+ throw new IllegalArgumentException(
+ "The namespace must be the HTML, SVG or MathML namespace (or null when the local name is null). Got: "
+ + ns);
+ }
+ // ]NOCPP]
+ this.contextName = context;
+ this.contextNamespace = ns;
+ this.contextNode = node;
+ this.fragment = (contextName != null);
+ this.quirks = quirks;
+ }
+
+ protected final T currentNode() {
+ return stack[currentPtr].node;
+ }
+
+ /**
+ * Returns the scriptingEnabled.
+ *
+ * @return the scriptingEnabled
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets the scriptingEnabled.
+ *
+ * @param scriptingEnabled
+ * the scriptingEnabled to set
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ }
+
+ public void setIsSrcdocDocument(boolean isSrcdocDocument) {
+ this.isSrcdocDocument = isSrcdocDocument;
+ }
+
+ // [NOCPP[
+
+ /**
+ * Sets the doctypeExpectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ }
+
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ }
+
+ /**
+ * Sets the documentModeHandler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Sets the reportingDoctype.
+ *
+ * @param reportingDoctype
+ * the reportingDoctype to set
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * Flushes the pending characters. Public for document.write use cases only.
+ * @throws SAXException
+ */
+ public final void flushCharacters() throws SAXException {
+ if (charBufferLen > 0) {
+ if ((mode == IN_TABLE || mode == IN_TABLE_BODY || mode == IN_ROW)
+ && charBufferContainsNonWhitespace()) {
+ errNonSpaceInTable();
+ reconstructTheActiveFormattingElements();
+ if (!stack[currentPtr].isFosterParenting()) {
+ // reconstructing gave us a new current node
+ appendCharacters(currentNode(), charBuffer, 0,
+ charBufferLen);
+ charBufferLen = 0;
+ return;
+ }
+
+ int tablePos = findLastOrRoot(TreeBuilder.TABLE);
+ int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
+
+ if (templatePos >= tablePos) {
+ appendCharacters(stack[templatePos].node, charBuffer, 0, charBufferLen);
+ charBufferLen = 0;
+ return;
+ }
+
+ StackNode<T> tableElt = stack[tablePos];
+ insertFosterParentedCharacters(charBuffer, 0, charBufferLen,
+ tableElt.node, stack[tablePos - 1].node);
+ charBufferLen = 0;
+ return;
+ }
+ appendCharacters(currentNode(), charBuffer, 0, charBufferLen);
+ charBufferLen = 0;
+ }
+ }
+
+ private boolean charBufferContainsNonWhitespace() {
+ for (int i = 0; i < charBufferLen; i++) {
+ switch (charBuffer[i]) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\u000C':
+ continue;
+ default:
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Creates a comparable snapshot of the tree builder state. Snapshot
+ * creation is only supported immediately after a script end tag has been
+ * processed. In C++ the caller is responsible for calling
+ * <code>delete</code> on the returned object.
+ *
+ * @return a snapshot.
+ * @throws SAXException
+ */
+ @SuppressWarnings("unchecked") public TreeBuilderState<T> newSnapshot()
+ throws SAXException {
+ StackNode<T>[] listCopy = new StackNode[listPtr + 1];
+ for (int i = 0; i < listCopy.length; i++) {
+ StackNode<T> node = listOfActiveFormattingElements[i];
+ if (node != null) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ node.name, node.node, node.popName,
+ node.attributes.cloneAttributes(null)
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ listCopy[i] = newNode;
+ } else {
+ listCopy[i] = null;
+ }
+ }
+ StackNode<T>[] stackCopy = new StackNode[currentPtr + 1];
+ for (int i = 0; i < stackCopy.length; i++) {
+ StackNode<T> node = stack[i];
+ int listIndex = findInListOfActiveFormattingElements(node);
+ if (listIndex == -1) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ node.name, node.node, node.popName,
+ null
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ stackCopy[i] = newNode;
+ } else {
+ stackCopy[i] = listCopy[listIndex];
+ stackCopy[i].retain();
+ }
+ }
+ int[] templateModeStackCopy = new int[templateModePtr + 1];
+ System.arraycopy(templateModeStack, 0, templateModeStackCopy, 0,
+ templateModeStackCopy.length);
+ return new StateSnapshot<T>(stackCopy, listCopy, templateModeStackCopy, formPointer,
+ headPointer, deepTreeSurrogateParent, mode, originalMode, framesetOk,
+ needToDropLF, quirks);
+ }
+
+ public boolean snapshotMatches(TreeBuilderState<T> snapshot) {
+ StackNode<T>[] stackCopy = snapshot.getStack();
+ int stackLen = snapshot.getStackLength();
+ StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements();
+ int listLen = snapshot.getListOfActiveFormattingElementsLength();
+ int[] templateModeStackCopy = snapshot.getTemplateModeStack();
+ int templateModeStackLen = snapshot.getTemplateModeStackLength();
+
+ if (stackLen != currentPtr + 1
+ || listLen != listPtr + 1
+ || templateModeStackLen != templateModePtr + 1
+ || formPointer != snapshot.getFormPointer()
+ || headPointer != snapshot.getHeadPointer()
+ || deepTreeSurrogateParent != snapshot.getDeepTreeSurrogateParent()
+ || mode != snapshot.getMode()
+ || originalMode != snapshot.getOriginalMode()
+ || framesetOk != snapshot.isFramesetOk()
+ || needToDropLF != snapshot.isNeedToDropLF()
+ || quirks != snapshot.isQuirks()) { // maybe just assert quirks
+ return false;
+ }
+ for (int i = listLen - 1; i >= 0; i--) {
+ if (listCopy[i] == null
+ && listOfActiveFormattingElements[i] == null) {
+ continue;
+ } else if (listCopy[i] == null
+ || listOfActiveFormattingElements[i] == null) {
+ return false;
+ }
+ if (listCopy[i].node != listOfActiveFormattingElements[i].node) {
+ return false; // it's possible that this condition is overly
+ // strict
+ }
+ }
+ for (int i = stackLen - 1; i >= 0; i--) {
+ if (stackCopy[i].node != stack[i].node) {
+ return false;
+ }
+ }
+ for (int i = templateModeStackLen - 1; i >=0; i--) {
+ if (templateModeStackCopy[i] != templateModeStack[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @SuppressWarnings("unchecked") public void loadState(
+ TreeBuilderState<T> snapshot, Interner interner)
+ throws SAXException {
+ StackNode<T>[] stackCopy = snapshot.getStack();
+ int stackLen = snapshot.getStackLength();
+ StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements();
+ int listLen = snapshot.getListOfActiveFormattingElementsLength();
+ int[] templateModeStackCopy = snapshot.getTemplateModeStack();
+ int templateModeStackLen = snapshot.getTemplateModeStackLength();
+
+ for (int i = 0; i <= listPtr; i++) {
+ if (listOfActiveFormattingElements[i] != null) {
+ listOfActiveFormattingElements[i].release();
+ }
+ }
+ if (listOfActiveFormattingElements.length < listLen) {
+ listOfActiveFormattingElements = new StackNode[listLen];
+ }
+ listPtr = listLen - 1;
+
+ for (int i = 0; i <= currentPtr; i++) {
+ stack[i].release();
+ }
+ if (stack.length < stackLen) {
+ stack = new StackNode[stackLen];
+ }
+ currentPtr = stackLen - 1;
+
+ if (templateModeStack.length < templateModeStackLen) {
+ templateModeStack = new int[templateModeStackLen];
+ }
+ templateModePtr = templateModeStackLen - 1;
+
+ for (int i = 0; i < listLen; i++) {
+ StackNode<T> node = listCopy[i];
+ if (node != null) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ Portability.newLocalFromLocal(node.name, interner), node.node,
+ Portability.newLocalFromLocal(node.popName, interner),
+ node.attributes.cloneAttributes(null)
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ listOfActiveFormattingElements[i] = newNode;
+ } else {
+ listOfActiveFormattingElements[i] = null;
+ }
+ }
+ for (int i = 0; i < stackLen; i++) {
+ StackNode<T> node = stackCopy[i];
+ int listIndex = findInArray(node, listCopy);
+ if (listIndex == -1) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ Portability.newLocalFromLocal(node.name, interner), node.node,
+ Portability.newLocalFromLocal(node.popName, interner),
+ null
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ stack[i] = newNode;
+ } else {
+ stack[i] = listOfActiveFormattingElements[listIndex];
+ stack[i].retain();
+ }
+ }
+ System.arraycopy(templateModeStackCopy, 0, templateModeStack, 0, templateModeStackLen);
+ formPointer = snapshot.getFormPointer();
+ headPointer = snapshot.getHeadPointer();
+ deepTreeSurrogateParent = snapshot.getDeepTreeSurrogateParent();
+ mode = snapshot.getMode();
+ originalMode = snapshot.getOriginalMode();
+ framesetOk = snapshot.isFramesetOk();
+ needToDropLF = snapshot.isNeedToDropLF();
+ quirks = snapshot.isQuirks();
+ }
+
+ private int findInArray(StackNode<T> node, StackNode<T>[] arr) {
+ for (int i = listPtr; i >= 0; i--) {
+ if (node == arr[i]) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer()
+ */
+ public T getFormPointer() {
+ return formPointer;
+ }
+
+ /**
+ * Returns the headPointer.
+ *
+ * @return the headPointer
+ */
+ public T getHeadPointer() {
+ return headPointer;
+ }
+
+ /**
+ * Returns the deepTreeSurrogateParent.
+ *
+ * @return the deepTreeSurrogateParent
+ */
+ public T getDeepTreeSurrogateParent() {
+ return deepTreeSurrogateParent;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements()
+ */
+ public StackNode<T>[] getListOfActiveFormattingElements() {
+ return listOfActiveFormattingElements;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack()
+ */
+ public StackNode<T>[] getStack() {
+ return stack;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack()
+ */
+ public int[] getTemplateModeStack() {
+ return templateModeStack;
+ }
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public int getMode() {
+ return mode;
+ }
+
+ /**
+ * Returns the originalMode.
+ *
+ * @return the originalMode
+ */
+ public int getOriginalMode() {
+ return originalMode;
+ }
+
+ /**
+ * Returns the framesetOk.
+ *
+ * @return the framesetOk
+ */
+ public boolean isFramesetOk() {
+ return framesetOk;
+ }
+
+ /**
+ * Returns the needToDropLF.
+ *
+ * @return the needToDropLF
+ */
+ public boolean isNeedToDropLF() {
+ return needToDropLF;
+ }
+
+ /**
+ * Returns the quirks.
+ *
+ * @return the quirks
+ */
+ public boolean isQuirks() {
+ return quirks;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength()
+ */
+ public int getListOfActiveFormattingElementsLength() {
+ return listPtr + 1;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength()
+ */
+ public int getStackLength() {
+ return currentPtr + 1;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength()
+ */
+ public int getTemplateModeStackLength() {
+ return templateModePtr + 1;
+ }
+
+ /**
+ * Reports a stray start tag.
+ * @param name the name of the stray tag
+ *
+ * @throws SAXException
+ */
+ private void errStrayStartTag(@Local String name) throws SAXException {
+ err("Stray start tag \u201C" + name + "\u201D.");
+ }
+
+ /**
+ * Reports a stray end tag.
+ * @param name the name of the stray tag
+ *
+ * @throws SAXException
+ */
+ private void errStrayEndTag(@Local String name) throws SAXException {
+ err("Stray end tag \u201C" + name + "\u201D.");
+ }
+
+ /**
+ * Reports a state when elements expected to be closed were not.
+ *
+ * @param eltPos the position of the start tag on the stack of the element
+ * being closed.
+ * @param name the name of the end tag
+ *
+ * @throws SAXException
+ */
+ private void errUnclosedElements(int eltPos, @Local String name) throws SAXException {
+ errNoCheck("End tag \u201C" + name + "\u201D seen, but there were open elements.");
+ errListUnclosedStartTags(eltPos);
+ }
+
+ /**
+ * Reports a state when elements expected to be closed ahead of an implied
+ * end tag but were not.
+ *
+ * @param eltPos the position of the start tag on the stack of the element
+ * being closed.
+ * @param name the name of the end tag
+ *
+ * @throws SAXException
+ */
+ private void errUnclosedElementsImplied(int eltPos, String name) throws SAXException {
+ errNoCheck("End tag \u201C" + name + "\u201D implied, but there were open elements.");
+ errListUnclosedStartTags(eltPos);
+ }
+
+ /**
+ * Reports a state when elements expected to be closed ahead of an implied
+ * table cell close.
+ *
+ * @param eltPos the position of the start tag on the stack of the element
+ * being closed.
+ * @throws SAXException
+ */
+ private void errUnclosedElementsCell(int eltPos) throws SAXException {
+ errNoCheck("A table cell was implicitly closed, but there were open elements.");
+ errListUnclosedStartTags(eltPos);
+ }
+
+ private void errStrayDoctype() throws SAXException {
+ err("Stray doctype.");
+ }
+
+ private void errAlmostStandardsDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("Almost standards mode doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errQuirkyDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("Quirky doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errNonSpaceInTrailer() throws SAXException {
+ err("Non-space character in page trailer.");
+ }
+
+ private void errNonSpaceAfterFrameset() throws SAXException {
+ err("Non-space after \u201Cframeset\u201D.");
+ }
+
+ private void errNonSpaceInFrameset() throws SAXException {
+ err("Non-space in \u201Cframeset\u201D.");
+ }
+
+ private void errNonSpaceAfterBody() throws SAXException {
+ err("Non-space character after body.");
+ }
+
+ private void errNonSpaceInColgroupInFragment() throws SAXException {
+ err("Non-space in \u201Ccolgroup\u201D when parsing fragment.");
+ }
+
+ private void errNonSpaceInNoscriptInHead() throws SAXException {
+ err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D.");
+ }
+
+ private void errFooBetweenHeadAndBody(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
+ }
+
+ private void errStartTagWithoutDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errNoSelectInTableScope() throws SAXException {
+ err("No \u201Cselect\u201D in table scope.");
+ }
+
+ private void errStartSelectWhereEndSelectExpected() throws SAXException {
+ err("\u201Cselect\u201D start tag where end tag expected.");
+ }
+
+ private void errStartTagWithSelectOpen(@Local String name)
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name
+ + "\u201D start tag with \u201Cselect\u201D open.");
+ }
+
+ private void errBadStartTagInHead(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Bad start tag in \u201C" + name
+ + "\u201D in \u201Chead\u201D.");
+ }
+
+ private void errImage() throws SAXException {
+ err("Saw a start tag \u201Cimage\u201D.");
+ }
+
+ private void errIsindex() throws SAXException {
+ err("\u201Cisindex\u201D seen.");
+ }
+
+ private void errFooSeenWhenFooOpen(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("An \u201C" + name + "\u201D start tag seen but an element of the same type was already open.");
+ }
+
+ private void errHeadingWhenHeadingOpen() throws SAXException {
+ err("Heading cannot be a child of another heading.");
+ }
+
+ private void errFramesetStart() throws SAXException {
+ err("\u201Cframeset\u201D start tag seen.");
+ }
+
+ private void errNoCellToClose() throws SAXException {
+ err("No cell to close.");
+ }
+
+ private void errStartTagInTable(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Start tag \u201C" + name
+ + "\u201D seen in \u201Ctable\u201D.");
+ }
+
+ private void errFormWhenFormOpen() throws SAXException {
+ err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag.");
+ }
+
+ private void errTableSeenWhileTableOpen() throws SAXException {
+ err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open.");
+ }
+
+ private void errStartTagInTableBody(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name + "\u201D start tag in table body.");
+ }
+
+ private void errEndTagSeenWithoutDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errEndTagAfterBody() throws SAXException {
+ err("Saw an end tag after \u201Cbody\u201D had been closed.");
+ }
+
+ private void errEndTagSeenWithSelectOpen(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name
+ + "\u201D end tag with \u201Cselect\u201D open.");
+ }
+
+ private void errGarbageInColgroup() throws SAXException {
+ err("Garbage in \u201Ccolgroup\u201D fragment.");
+ }
+
+ private void errEndTagBr() throws SAXException {
+ err("End tag \u201Cbr\u201D.");
+ }
+
+ private void errNoElementToCloseButEndTagSeen(@Local String name)
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("No \u201C" + name + "\u201D element in scope but a \u201C"
+ + name + "\u201D end tag seen.");
+ }
+
+ private void errHtmlStartTagInForeignContext(@Local String name)
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("HTML start tag \u201C" + name
+ + "\u201D in a foreign namespace context.");
+ }
+
+ private void errTableClosedWhileCaptionOpen() throws SAXException {
+ err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open.");
+ }
+
+ private void errNoTableRowToClose() throws SAXException {
+ err("No table row to close.");
+ }
+
+ private void errNonSpaceInTable() throws SAXException {
+ err("Misplaced non-space characters insided a table.");
+ }
+
+ private void errUnclosedChildrenInRuby() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Unclosed children in \u201Cruby\u201D.");
+ }
+
+ private void errStartTagSeenWithoutRuby(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Start tag \u201C"
+ + name
+ + "\u201D seen without a \u201Cruby\u201D element being open.");
+ }
+
+ private void errSelfClosing() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Self-closing syntax (\u201C/>\u201D) used on a non-void HTML element. Ignoring the slash and treating as a start tag.");
+ }
+
+ private void errNoCheckUnclosedElementsOnStack() throws SAXException {
+ errNoCheck("Unclosed elements on stack.");
+ }
+
+ private void errEndTagDidNotMatchCurrentOpenElement(@Local String name,
+ @Local String currOpenName) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End tag \u201C"
+ + name
+ + "\u201D did not match the name of the current open element (\u201C"
+ + currOpenName + "\u201D).");
+ }
+
+ private void errEndTagViolatesNestingRules(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End tag \u201C" + name + "\u201D violates nesting rules.");
+ }
+
+ private void errEofWithUnclosedElements() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End of file seen and there were open elements.");
+ // just report all remaining unclosed elements
+ errListUnclosedStartTags(0);
+ }
+
+ /**
+ * Reports arriving at/near end of document with unclosed elements remaining.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ private void errEndWithUnclosedElements(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End tag for \u201C"
+ + name
+ + "\u201D seen, but there were unclosed elements.");
+ // just report all remaining unclosed elements
+ errListUnclosedStartTags(0);
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java
new file mode 100644
index 000000000..c4e2d4afb
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+/**
+ * Interface for exposing the state of the HTML5 tree builder so that the
+ * interface can be implemented by the tree builder itself and by snapshots.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface TreeBuilderState<T> {
+
+ /**
+ * Returns the stack.
+ *
+ * @return the stack
+ */
+ public StackNode<T>[] getStack();
+
+ /**
+ * Returns the listOfActiveFormattingElements.
+ *
+ * @return the listOfActiveFormattingElements
+ */
+ public StackNode<T>[] getListOfActiveFormattingElements();
+
+ /**
+ * Returns the stack of template insertion modes.
+ *
+ * @return the stack of template insertion modes
+ */
+ public int[] getTemplateModeStack();
+
+ /**
+ * Returns the formPointer.
+ *
+ * @return the formPointer
+ */
+ public T getFormPointer();
+
+ /**
+ * Returns the headPointer.
+ *
+ * @return the headPointer
+ */
+ public T getHeadPointer();
+
+ /**
+ * Returns the deepTreeSurrogateParent.
+ *
+ * @return the deepTreeSurrogateParent
+ */
+ public T getDeepTreeSurrogateParent();
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public int getMode();
+
+ /**
+ * Returns the originalMode.
+ *
+ * @return the originalMode
+ */
+ public int getOriginalMode();
+
+ /**
+ * Returns the framesetOk.
+ *
+ * @return the framesetOk
+ */
+ public boolean isFramesetOk();
+
+ /**
+ * Returns the needToDropLF.
+ *
+ * @return the needToDropLF
+ */
+ public boolean isNeedToDropLF();
+
+ /**
+ * Returns the quirks.
+ *
+ * @return the quirks
+ */
+ public boolean isQuirks();
+
+ /**
+ * Return the length of the stack.
+ * @return the length of the stack.
+ */
+ public int getStackLength();
+
+ /**
+ * Return the length of the list of active formatting elements.
+ * @return the length of the list of active formatting elements.
+ */
+ public int getListOfActiveFormattingElementsLength();
+
+ /**
+ * Return the length of the stack of template insertion modes.
+ *
+ * @return the length of the stack of template insertion modes.
+ */
+ int getTemplateModeStackLength();
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java
new file mode 100644
index 000000000..35f1ac055
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ * Copyright (c) 2018-2020 Moonchild Productions
+ * Copyright (c) 2020 Binary Outcast
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.NoLength;
+
+/**
+ * An UTF-16 buffer that knows the start and end indeces of its unconsumed
+ * content.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class UTF16Buffer {
+
+ /**
+ * The backing store of the buffer. May be larger than the logical content
+ * of this <code>UTF16Buffer</code>.
+ */
+ private final @NoLength char[] buffer;
+
+ /**
+ * The index of the first unconsumed character in the backing buffer.
+ */
+ private int start;
+
+ /**
+ * The index of the slot immediately after the last character in the backing
+ * buffer that is part of the logical content of this
+ * <code>UTF16Buffer</code>.
+ */
+ private int end;
+
+ //[NOCPP[
+
+ /**
+ * Constructor for wrapping an existing UTF-16 code unit array.
+ *
+ * @param buffer
+ * the backing buffer
+ * @param start
+ * the index of the first character to consume
+ * @param end
+ * the index immediately after the last character to consume
+ */
+ public UTF16Buffer(@NoLength char[] buffer, int start, int end) {
+ this.buffer = buffer;
+ this.start = start;
+ this.end = end;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * Returns the start index.
+ *
+ * @return the start index
+ */
+ public int getStart() {
+ return start;
+ }
+
+ /**
+ * Sets the start index.
+ *
+ * @param start
+ * the start index
+ */
+ public void setStart(int start) {
+ this.start = start;
+ }
+
+ /**
+ * Returns the backing buffer.
+ *
+ * @return the backing buffer
+ */
+ public @NoLength char[] getBuffer() {
+ return buffer;
+ }
+
+ /**
+ * Returns the end index.
+ *
+ * @return the end index
+ */
+ public int getEnd() {
+ return end;
+ }
+
+ /**
+ * Checks if the buffer has data left.
+ *
+ * @return <code>true</code> if there's data left
+ */
+ public boolean hasMore() {
+ return start < end;
+ }
+
+ /**
+ * Returns <code>end - start</code>.
+ *
+ * @return <code>end - start</code>
+ */
+ public int getLength() {
+ return end - start;
+ }
+
+ /**
+ * Adjusts the start index to skip over the first character if it is a line
+ * feed and the previous character was a carriage return.
+ *
+ * @param lastWasCR
+ * whether the previous character was a carriage return
+ */
+ public void adjust(boolean lastWasCR) {
+ if (lastWasCR && buffer[start] == '\n') {
+ start++;
+ }
+ }
+
+ /**
+ * Sets the end index.
+ *
+ * @param end
+ * the end index
+ */
+ public void setEnd(int end) {
+ this.end = end;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html
new file mode 100644
index 000000000..6d029a13e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package contains the bulk of parser internals. Only implementors of
+additional tree builders or token handlers should look here.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java
new file mode 100644
index 000000000..42d7a837f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+
+import nu.validator.htmlparser.common.ByteReadable;
+
+/**
+ * The BOM sniffing part of the HTML5 encoding sniffing algorithm.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class BomSniffer {
+
+ private final ByteReadable source;
+
+ /**
+ * @param source
+ */
+ public BomSniffer(final ByteReadable source) {
+ this.source = source;
+ }
+
+ Encoding sniff() throws IOException {
+ int b = source.readByte();
+ if (b == 0xEF) { // UTF-8
+ b = source.readByte();
+ if (b == 0xBB) {
+ b = source.readByte();
+ if (b == 0xBF) {
+ return Encoding.UTF8;
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ } else if (b == 0xFF) { // little-endian
+ b = source.readByte();
+ if (b == 0xFE) {
+ return Encoding.UTF16LE;
+ } else {
+ return null;
+ }
+ } else if (b == 0xFE) { // big-endian UTF-16
+ b = source.readByte();
+ if (b == 0xFF) {
+ return Encoding.UTF16BE;
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java
new file mode 100644
index 000000000..1a2d49746
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+public enum Confidence {
+ TENTATIVE, CERTAIN
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java
new file mode 100644
index 000000000..f0b0cc55d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2005, 2006, 2007 Henri Sivonen
+ * Copyright (c) 2007-2013 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.charset.UnsupportedCharsetException;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.EncodingDeclarationHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.extra.NormalizationChecker;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.impl.UTF16Buffer;
+import nu.validator.htmlparser.rewindable.RewindableInputStream;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class Driver implements EncodingDeclarationHandler {
+
+ /**
+ * The input UTF-16 code unit stream. If a byte stream was given, this
+ * object is an instance of <code>HtmlInputStreamReader</code>.
+ */
+ private Reader reader;
+
+ /**
+ * The reference to the rewindable byte stream. <code>null</code> if
+ * prohibited or no longer needed.
+ */
+ private RewindableInputStream rewindableInputStream;
+
+ private boolean swallowBom;
+
+ private Encoding characterEncoding;
+
+ private boolean allowRewinding = true;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private final Tokenizer tokenizer;
+
+ private Confidence confidence;
+
+ /**
+ * Used for NFC checking if non-<code>null</code>, source code capture,
+ * etc.
+ */
+ private CharacterHandler[] characterHandlers = new CharacterHandler[0];
+
+ public Driver(Tokenizer tokenizer) {
+ this.tokenizer = tokenizer;
+ tokenizer.setEncodingDeclarationHandler(this);
+ }
+
+ /**
+ * Returns the allowRewinding.
+ *
+ * @return the allowRewinding
+ */
+ public boolean isAllowRewinding() {
+ return allowRewinding;
+ }
+
+ /**
+ * Sets the allowRewinding.
+ *
+ * @param allowRewinding
+ * the allowRewinding to set
+ */
+ public void setAllowRewinding(boolean allowRewinding) {
+ this.allowRewinding = allowRewinding;
+ }
+
+ /**
+ * Turns NFC checking on or off.
+ *
+ * @param enable
+ * <code>true</code> if checking on
+ */
+ public void setCheckingNormalization(boolean enable) {
+ if (enable) {
+ if (isCheckingNormalization()) {
+ return;
+ } else {
+ NormalizationChecker normalizationChecker = new NormalizationChecker(tokenizer);
+ normalizationChecker.setErrorHandler(tokenizer.getErrorHandler());
+
+ }
+ } else {
+ if (isCheckingNormalization()) {
+ CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length - 1];
+ boolean skipped = false;
+ int j = 0;
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ if (!(!skipped && (ch instanceof NormalizationChecker))) {
+ newHandlers[j] = ch;
+ j++;
+ }
+ }
+ characterHandlers = newHandlers;
+ } else {
+ return;
+ }
+ }
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ if (characterHandler == null) {
+ throw new IllegalArgumentException("Null argument.");
+ }
+ CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length + 1];
+ System.arraycopy(characterHandlers, 0, newHandlers, 0,
+ characterHandlers.length);
+ newHandlers[characterHandlers.length] = characterHandler;
+ characterHandlers = newHandlers;
+ }
+
+ /**
+ * Query if checking normalization.
+ *
+ * @return <code>true</code> if checking on
+ */
+ public boolean isCheckingNormalization() {
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ if (ch instanceof NormalizationChecker) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Runs the tokenization. This is the main entry point.
+ *
+ * @param is
+ * the input source
+ * @throws SAXException
+ * on fatal error (if configured to treat XML violations as
+ * fatal) or if the token handler threw
+ * @throws IOException
+ * if the stream threw
+ */
+ public void tokenize(InputSource is) throws SAXException, IOException {
+ if (is == null) {
+ throw new IllegalArgumentException("InputSource was null.");
+ }
+ tokenizer.start();
+ confidence = Confidence.TENTATIVE;
+ swallowBom = true;
+ rewindableInputStream = null;
+ tokenizer.initLocation(is.getPublicId(), is.getSystemId());
+ this.reader = is.getCharacterStream();
+ this.characterEncoding = encodingFromExternalDeclaration(is.getEncoding());
+ if (this.reader == null) {
+ InputStream inputStream = is.getByteStream();
+ if (inputStream == null) {
+ throw new SAXException("Both streams in InputSource were null.");
+ }
+ if (this.characterEncoding == null) {
+ if (allowRewinding) {
+ inputStream = rewindableInputStream = new RewindableInputStream(
+ inputStream);
+ }
+ this.reader = new HtmlInputStreamReader(inputStream,
+ tokenizer.getErrorHandler(), tokenizer, this, heuristics);
+ } else {
+ if (this.characterEncoding != Encoding.UTF8) {
+ warnWithoutLocation("Legacy encoding \u201C"
+ + this.characterEncoding.getCanonName()
+ + "\u201D used. Documents should use UTF-8.");
+ }
+ becomeConfident();
+ this.reader = new HtmlInputStreamReader(inputStream,
+ tokenizer.getErrorHandler(), tokenizer, this, this.characterEncoding);
+ }
+ } else {
+ becomeConfident();
+ }
+ Throwable t = null;
+ try {
+ for (;;) {
+ try {
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.start();
+ }
+ runStates();
+ break;
+ } catch (ReparseException e) {
+ if (rewindableInputStream == null) {
+ tokenizer.fatal("Changing encoding at this point would need non-streamable behavior.");
+ } else {
+ rewindableInputStream.rewind();
+ becomeConfident();
+ this.reader = new HtmlInputStreamReader(
+ rewindableInputStream, tokenizer.getErrorHandler(), tokenizer,
+ this, this.characterEncoding);
+ }
+ continue;
+ }
+ }
+ } catch (Throwable tr) {
+ t = tr;
+ } finally {
+ try {
+ tokenizer.end();
+ characterEncoding = null;
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.end();
+ }
+ reader.close();
+ reader = null;
+ rewindableInputStream = null;
+ } catch (Throwable tr) {
+ if (t == null) {
+ t = tr;
+ } // else drop the later throwable
+ }
+ if (t != null) {
+ if (t instanceof IOException) {
+ throw (IOException) t;
+ } else if (t instanceof SAXException) {
+ throw (SAXException) t;
+ } else if (t instanceof RuntimeException) {
+ throw (RuntimeException) t;
+ } else if (t instanceof Error) {
+ throw (Error) t;
+ } else {
+ // impossible
+ throw new RuntimeException(t);
+ }
+ }
+ }
+ }
+
+ void dontSwallowBom() {
+ swallowBom = false;
+ }
+
+ private void runStates() throws SAXException, IOException {
+ char[] buffer = new char[2048];
+ UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0);
+ boolean lastWasCR = false;
+ int len = -1;
+ if ((len = reader.read(buffer)) != -1) {
+ assert len > 0;
+ int streamOffset = 0;
+ int offset = 0;
+ int length = len;
+ if (swallowBom) {
+ if (buffer[0] == '\uFEFF') {
+ streamOffset = -1;
+ offset = 1;
+ length--;
+ }
+ }
+ if (length > 0) {
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.characters(buffer, offset, length);
+ }
+ tokenizer.setTransitionBaseOffset(streamOffset);
+ bufr.setStart(offset);
+ bufr.setEnd(offset + length);
+ while (bufr.hasMore()) {
+ bufr.adjust(lastWasCR);
+ lastWasCR = false;
+ if (bufr.hasMore()) {
+ lastWasCR = tokenizer.tokenizeBuffer(bufr);
+ }
+ }
+ }
+ streamOffset = length;
+ while ((len = reader.read(buffer)) != -1) {
+ assert len > 0;
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.characters(buffer, 0, len);
+ }
+ tokenizer.setTransitionBaseOffset(streamOffset);
+ bufr.setStart(0);
+ bufr.setEnd(len);
+ while (bufr.hasMore()) {
+ bufr.adjust(lastWasCR);
+ lastWasCR = false;
+ if (bufr.hasMore()) {
+ lastWasCR = tokenizer.tokenizeBuffer(bufr);
+ }
+ }
+ streamOffset += len;
+ }
+ }
+ tokenizer.eof();
+ }
+
+ public void setEncoding(Encoding encoding, Confidence confidence) {
+ this.characterEncoding = encoding;
+ if (confidence == Confidence.CERTAIN) {
+ becomeConfident();
+ }
+ }
+
+ public boolean internalEncodingDeclaration(String internalCharset)
+ throws SAXException {
+ try {
+ internalCharset = Encoding.toAsciiLowerCase(internalCharset);
+ Encoding cs;
+ if ("utf-16".equals(internalCharset)
+ || "utf-16be".equals(internalCharset)
+ || "utf-16le".equals(internalCharset)) {
+ tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
+ + internalCharset
+ + "\u201D which is not an ASCII superset. Continuing as if the encoding had been \u201Cutf-8\u201D.");
+ cs = Encoding.UTF8;
+ internalCharset = "utf-8";
+ } else {
+ cs = Encoding.forName(internalCharset);
+ }
+ Encoding actual = cs.getActualHtmlEncoding();
+ if (actual == null) {
+ actual = cs;
+ }
+ if (!actual.isAsciiSuperset()) {
+ tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
+ + internalCharset
+ + "\u201D which is not an ASCII superset. Not changing the encoding.");
+ return false;
+ }
+ if (characterEncoding == null) {
+ // Reader case
+ return true;
+ }
+ if (characterEncoding == actual) {
+ becomeConfident();
+ return true;
+ }
+ if (confidence == Confidence.CERTAIN && actual != characterEncoding) {
+ tokenizer.errTreeBuilder("Internal encoding declaration \u201C"
+ + internalCharset
+ + "\u201D disagrees with the actual encoding of the document (\u201C"
+ + characterEncoding.getCanonName() + "\u201D).");
+ } else {
+ Encoding newEnc = whineAboutEncodingAndReturnActual(
+ internalCharset, cs);
+ tokenizer.errTreeBuilder("Changing character encoding \u201C"
+ + internalCharset + "\u201D and reparsing.");
+ characterEncoding = newEnc;
+ throw new ReparseException();
+ }
+ return true;
+ } catch (UnsupportedCharsetException e) {
+ tokenizer.errTreeBuilder("Internal encoding declaration named an unsupported chararacter encoding \u201C"
+ + internalCharset + "\u201D.");
+ return false;
+ }
+ }
+
+ /**
+ *
+ */
+ private void becomeConfident() {
+ if (rewindableInputStream != null) {
+ rewindableInputStream.willNotRewind();
+ }
+ confidence = Confidence.CERTAIN;
+ tokenizer.becomeConfident();
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics
+ * the heuristics to set
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ }
+
+ /**
+ * Reports a warning without line/col
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ protected void warnWithoutLocation(String message) throws SAXException {
+ ErrorHandler errorHandler = tokenizer.getErrorHandler();
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, null,
+ tokenizer.getSystemId(), -1, -1);
+ errorHandler.warning(spe);
+ }
+
+ /**
+ * Initializes a decoder from external decl.
+ */
+ protected Encoding encodingFromExternalDeclaration(String encoding)
+ throws SAXException {
+ if (encoding == null) {
+ return null;
+ }
+ encoding = Encoding.toAsciiLowerCase(encoding);
+ try {
+ Encoding cs = Encoding.forName(encoding);
+ if ("utf-16".equals(cs.getCanonName())
+ || "utf-32".equals(cs.getCanonName())) {
+ swallowBom = false;
+ }
+ return whineAboutEncodingAndReturnActual(encoding, cs);
+ } catch (UnsupportedCharsetException e) {
+ tokenizer.err("Unsupported character encoding name: \u201C" + encoding
+ + "\u201D. Will sniff.");
+ swallowBom = true;
+ }
+ return null; // keep the compiler happy
+ }
+
+ /**
+ * @param encoding
+ * @param cs
+ * @return
+ * @throws SAXException
+ */
+ protected Encoding whineAboutEncodingAndReturnActual(String encoding,
+ Encoding cs) throws SAXException {
+ String canonName = cs.getCanonName();
+ if (!cs.isRegistered()) {
+ if (encoding.startsWith("x-")) {
+ tokenizer.err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding. (Charmod C022)");
+ } else {
+ tokenizer.err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
+ }
+ } else if (!canonName.equals(encoding)) {
+ tokenizer.err("The encoding \u201C"
+ + encoding
+ + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
+ + canonName + "\u201D. (Charmod C024)");
+ }
+ if (cs.isShouldNot()) {
+ tokenizer.warn("Authors should not use the character encoding \u201C"
+ + encoding
+ + "\u201D. It is recommended to use \u201CUTF-8\u201D.");
+ } else if (cs.isLikelyEbcdic()) {
+ tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D.");
+ } else if (cs.isObscure()) {
+ tokenizer.warn("The character encoding \u201C"
+ + encoding
+ + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
+ }
+ Encoding actual = cs.getActualHtmlEncoding();
+ if (actual == null) {
+ return cs;
+ } else {
+ tokenizer.warn("Using \u201C" + actual.getCanonName()
+ + "\u201D instead of the declared encoding \u201C"
+ + encoding + "\u201D.");
+ return actual;
+ }
+ }
+
+ private class ReparseException extends SAXException {
+
+ }
+
+ void notifyAboutMetaBoundary() {
+ tokenizer.notifyAboutMetaBoundary();
+ }
+
+ /**
+ * @param commentPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ tokenizer.setCommentPolicy(commentPolicy);
+ }
+
+ /**
+ * @param contentNonXmlCharPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ }
+
+ /**
+ * @param contentSpacePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ tokenizer.setContentSpacePolicy(contentSpacePolicy);
+ }
+
+ /**
+ * @param eh
+ * @see nu.validator.htmlparser.impl.Tokenizer#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler eh) {
+ tokenizer.setErrorHandler(eh);
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ if (ch instanceof NormalizationChecker) {
+ NormalizationChecker nc = (NormalizationChecker) ch;
+ nc.setErrorHandler(eh);
+ }
+ }
+ }
+
+ public void setTransitionHandler(TransitionHandler transitionHandler) {
+ if (tokenizer instanceof ErrorReportingTokenizer) {
+ ErrorReportingTokenizer ert = (ErrorReportingTokenizer) tokenizer;
+ ert.setTransitionHandler(transitionHandler);
+ } else if (transitionHandler != null) {
+ throw new IllegalStateException("Attempt to set a transition handler on a plain tokenizer.");
+ }
+ }
+
+ /**
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHtml4ModeCompatibleWithXhtml1Schemata(boolean)
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+
+ /**
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+
+ /**
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ tokenizer.setNamePolicy(namePolicy);
+ }
+
+ /**
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ tokenizer.setXmlnsPolicy(xmlnsPolicy);
+ }
+
+ public String getCharacterEncoding() throws SAXException {
+ return characterEncoding.getCanonName();
+ }
+
+ public Locator getDocumentLocator() {
+ return tokenizer;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java
new file mode 100644
index 000000000..3bbc606fa
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java
@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2006 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderMalfunctionError;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+
+public class Encoding {
+
+ public static final Encoding UTF8;
+
+ public static final Encoding UTF16;
+
+ public static final Encoding UTF16LE;
+
+ public static final Encoding UTF16BE;
+
+ public static final Encoding WINDOWS1252;
+
+ private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" };
+
+ private static String[] BANNED = { "bocu1", "cesu8", "compoundtext",
+ "iscii91", "macarabic", "maccentraleurroman", "maccroatian",
+ "maccyrillic", "macdevanagari", "macfarsi", "macgreek",
+ "macgujarati", "macgurmukhi", "machebrew", "macicelandic",
+ "macroman", "macromanian", "macthai", "macturkish", "macukranian",
+ "scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname",
+ "xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom",
+ "xutf32lebom", "xutf16oppositeendian", "xutf16platformendian",
+ "xutf32oppositeendian", "xutf32platformendian" };
+
+ private static String[] NOT_OBSCURE = { "big5", "big5hkscs", "eucjp",
+ "euckr", "gb18030", "gbk", "iso2022jp", "iso2022kr", "iso88591",
+ "iso885913", "iso885915", "iso88592", "iso88593", "iso88594",
+ "iso88595", "iso88596", "iso88597", "iso88598", "iso88599",
+ "koi8r", "shiftjis", "tis620", "usascii", "utf16", "utf16be",
+ "utf16le", "utf8", "windows1250", "windows1251", "windows1252",
+ "windows1253", "windows1254", "windows1255", "windows1256",
+ "windows1257", "windows1258" };
+
+ private static Map<String, Encoding> encodingByCookedName = new HashMap<String, Encoding>();
+
+ private final String canonName;
+
+ private final Charset charset;
+
+ private final boolean asciiSuperset;
+
+ private final boolean obscure;
+
+ private final boolean shouldNot;
+
+ private final boolean likelyEbcdic;
+
+ private Encoding actualHtmlEncoding = null;
+
+ static {
+ byte[] testBuf = new byte[0x7F];
+ for (int i = 0; i < 0x7F; i++) {
+ if (isAsciiSupersetnessSensitive(i)) {
+ testBuf[i] = (byte) i;
+ } else {
+ testBuf[i] = (byte) 0x20;
+ }
+ }
+
+ Set<Encoding> encodings = new HashSet<Encoding>();
+
+ SortedMap<String, Charset> charsets = Charset.availableCharsets();
+ for (Map.Entry<String, Charset> entry : charsets.entrySet()) {
+ Charset cs = entry.getValue();
+ String name = toNameKey(cs.name());
+ String canonName = toAsciiLowerCase(cs.name());
+ if (!isBanned(name)) {
+ name = name.intern();
+ boolean asciiSuperset = asciiMapsToBasicLatin(testBuf, cs);
+ Encoding enc = new Encoding(canonName.intern(), cs,
+ asciiSuperset, isObscure(name), isShouldNot(name),
+ isLikelyEbcdic(name, asciiSuperset));
+ encodings.add(enc);
+ Set<String> aliases = cs.aliases();
+ for (String alias : aliases) {
+ encodingByCookedName.put(toNameKey(alias).intern(), enc);
+ }
+ }
+ }
+ // Overwrite possible overlapping aliases with the real things--just in
+ // case
+ for (Encoding encoding : encodings) {
+ encodingByCookedName.put(toNameKey(encoding.getCanonName()),
+ encoding);
+ }
+ UTF8 = forName("utf-8");
+ UTF16 = forName("utf-16");
+ UTF16BE = forName("utf-16be");
+ UTF16LE = forName("utf-16le");
+ WINDOWS1252 = forName("windows-1252");
+ try {
+ forName("iso-8859-1").actualHtmlEncoding = forName("windows-1252");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("iso-8859-9").actualHtmlEncoding = forName("windows-1254");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("iso-8859-11").actualHtmlEncoding = forName("windows-874");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("x-iso-8859-11").actualHtmlEncoding = forName("windows-874");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("tis-620").actualHtmlEncoding = forName("windows-874");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("gb_2312-80").actualHtmlEncoding = forName("gbk");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("gb2312").actualHtmlEncoding = forName("gbk");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ encodingByCookedName.put("x-x-big5", forName("big5"));
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ encodingByCookedName.put("euc-kr", forName("windows-949"));
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ encodingByCookedName.put("ks_c_5601-1987", forName("windows-949"));
+ } catch (UnsupportedCharsetException e) {
+ }
+ }
+
+ private static boolean isAsciiSupersetnessSensitive(int c) {
+ return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0x22)
+ || (c >= 0x26 && c <= 0x27) || (c >= 0x2C && c <= 0x3F)
+ || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A);
+ }
+
+ private static boolean isObscure(String lowerCasePreferredIanaName) {
+ return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1);
+ }
+
+ private static boolean isBanned(String lowerCasePreferredIanaName) {
+ if (lowerCasePreferredIanaName.startsWith("xibm")) {
+ return true;
+ }
+ return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1);
+ }
+
+ private static boolean isShouldNot(String lowerCasePreferredIanaName) {
+ return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1);
+ }
+
+ /**
+ * @param testBuf
+ * @param cs
+ */
+ private static boolean asciiMapsToBasicLatin(byte[] testBuf, Charset cs) {
+ CharsetDecoder dec = cs.newDecoder();
+ dec.onMalformedInput(CodingErrorAction.REPORT);
+ dec.onUnmappableCharacter(CodingErrorAction.REPORT);
+ Reader r = new InputStreamReader(new ByteArrayInputStream(testBuf), dec);
+ try {
+ for (int i = 0; i < 0x7F; i++) {
+ if (isAsciiSupersetnessSensitive(i)) {
+ if (r.read() != i) {
+ return false;
+ }
+ } else {
+ if (r.read() != 0x20) {
+ return false;
+ }
+ }
+ }
+ } catch (IOException e) {
+ return false;
+ } catch (Exception e) {
+ return false;
+ } catch (CoderMalfunctionError e) {
+ return false;
+ }
+
+ return true;
+ }
+
+ private static boolean isLikelyEbcdic(String canonName,
+ boolean asciiSuperset) {
+ if (!asciiSuperset) {
+ return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm"));
+ } else {
+ return false;
+ }
+ }
+
+ public static Encoding forName(String name) {
+ Encoding rv = encodingByCookedName.get(toNameKey(name));
+ if (rv == null) {
+ throw new UnsupportedCharsetException(name);
+ } else {
+ return rv;
+ }
+ }
+
+ public static String toNameKey(String str) {
+ if (str == null) {
+ return null;
+ }
+ int j = 0;
+ char[] buf = new char[str.length()];
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (!((c >= '\t' && c <= '\r') || (c >= '\u0020' && c <= '\u002F')
+ || (c >= '\u003A' && c <= '\u0040')
+ || (c >= '\u005B' && c <= '\u0060') || (c >= '\u007B' && c <= '\u007E'))) {
+ buf[j] = c;
+ j++;
+ }
+ }
+ return new String(buf, 0, j);
+ }
+
+ public static String toAsciiLowerCase(String str) {
+ if (str == null) {
+ return null;
+ }
+ char[] buf = new char[str.length()];
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ buf[i] = c;
+ }
+ return new String(buf);
+ }
+
+ /**
+ * @param canonName
+ * @param charset
+ * @param asciiSuperset
+ * @param obscure
+ * @param shouldNot
+ * @param likelyEbcdic
+ */
+ private Encoding(final String canonName, final Charset charset,
+ final boolean asciiSuperset, final boolean obscure,
+ final boolean shouldNot, final boolean likelyEbcdic) {
+ this.canonName = canonName;
+ this.charset = charset;
+ this.asciiSuperset = asciiSuperset;
+ this.obscure = obscure;
+ this.shouldNot = shouldNot;
+ this.likelyEbcdic = likelyEbcdic;
+ }
+
+ /**
+ * Returns the asciiSuperset.
+ *
+ * @return the asciiSuperset
+ */
+ public boolean isAsciiSuperset() {
+ return asciiSuperset;
+ }
+
+ /**
+ * Returns the canonName.
+ *
+ * @return the canonName
+ */
+ public String getCanonName() {
+ return canonName;
+ }
+
+ /**
+ * Returns the likelyEbcdic.
+ *
+ * @return the likelyEbcdic
+ */
+ public boolean isLikelyEbcdic() {
+ return likelyEbcdic;
+ }
+
+ /**
+ * Returns the obscure.
+ *
+ * @return the obscure
+ */
+ public boolean isObscure() {
+ return obscure;
+ }
+
+ /**
+ * Returns the shouldNot.
+ *
+ * @return the shouldNot
+ */
+ public boolean isShouldNot() {
+ return shouldNot;
+ }
+
+ public boolean isRegistered() {
+ return !canonName.startsWith("x-");
+ }
+
+ /**
+ * @return
+ * @see java.nio.charset.Charset#canEncode()
+ */
+ public boolean canEncode() {
+ return charset.canEncode();
+ }
+
+ /**
+ * @return
+ * @see java.nio.charset.Charset#newDecoder()
+ */
+ public CharsetDecoder newDecoder() {
+ return charset.newDecoder();
+ }
+
+ /**
+ * @return
+ * @see java.nio.charset.Charset#newEncoder()
+ */
+ public CharsetEncoder newEncoder() {
+ return charset.newEncoder();
+ }
+
+ /**
+ * Returns the actualHtmlEncoding.
+ *
+ * @return the actualHtmlEncoding
+ */
+ public Encoding getActualHtmlEncoding() {
+ return actualHtmlEncoding;
+ }
+
+ public static void main(String[] args) {
+ for (Map.Entry<String, Encoding> entry : encodingByCookedName.entrySet()) {
+ String name = entry.getKey();
+ Encoding enc = entry.getValue();
+ System.out.printf(
+ "%21s: canon %21s, obs %5s, reg %5s, asc %5s, ebc %5s\n",
+ name, enc.getCanonName(), enc.isObscure(),
+ enc.isRegistered(), enc.isAsciiSuperset(),
+ enc.isLikelyEbcdic());
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
new file mode 100644
index 000000000..413f0d9e9
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2013 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import nu.validator.htmlparser.common.ByteReadable;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.extra.ChardetSniffer;
+import nu.validator.htmlparser.extra.IcuDetectorSniffer;
+import nu.validator.htmlparser.impl.Tokenizer;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+/**
+ * Be very careful with this class. It is not a general-purpose subclass of of
+ * <code>Reader</code>. Instead, it is the minimal implementation that does
+ * what <code>Tokenizer</code> needs while being an instance of
+ * <code>Reader</code>.
+ *
+ * The only reason why this is a public class is that it needs to be visible to
+ * test code in another package.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class HtmlInputStreamReader extends Reader implements
+ ByteReadable, Locator {
+
+ private static final int SNIFFING_LIMIT = 1024;
+
+ private final InputStream inputStream;
+
+ private final ErrorHandler errorHandler;
+
+ private final Tokenizer tokenizer;
+
+ private final Driver driver;
+
+ private CharsetDecoder decoder = null;
+
+ private boolean sniffing = true;
+
+ private int limit = 0;
+
+ private int position = 0;
+
+ private int bytesRead = 0;
+
+ private boolean eofSeen = false;
+
+ private boolean shouldReadBytes = false;
+
+ private boolean charsetBoundaryPassed = false;
+
+ private final byte[] byteArray = new byte[4096]; // Length must be >=
+
+ // SNIFFING_LIMIT
+
+ private final ByteBuffer byteBuffer = ByteBuffer.wrap(byteArray);
+
+ private boolean needToNotifyTokenizer = false;
+
+ private boolean flushing = false;
+
+ private int line = -1;
+
+ private int col = -1;
+
+ private int lineColPos;
+
+ private boolean hasPendingReplacementCharacter = false;
+
+ private boolean nextCharOnNewLine;
+
+ private boolean prevWasCR;
+
+ /**
+ * @param inputStream
+ * @param errorHandler
+ * @param locator
+ * @throws IOException
+ * @throws SAXException
+ */
+ public HtmlInputStreamReader(InputStream inputStream,
+ ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver,
+ Heuristics heuristics) throws SAXException, IOException {
+ this.inputStream = inputStream;
+ this.errorHandler = errorHandler;
+ this.tokenizer = tokenizer;
+ this.driver = driver;
+ this.sniffing = true;
+ Encoding encoding = (new BomSniffer(this)).sniff();
+ if (encoding == null) {
+ position = 0;
+ encoding = (new MetaSniffer(errorHandler, this)).sniff(this);
+ boolean declared = true;
+ if (encoding == null) {
+ declared = false;
+ } else if (encoding != Encoding.UTF8) {
+ warn("Legacy encoding \u201C"
+ + encoding.getCanonName()
+ + "\u201D used. Documents should use UTF-8.");
+ }
+ if (encoding == null
+ && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) {
+ encoding = (new ChardetSniffer(byteArray, limit)).sniff();
+ }
+ if (encoding == null
+ && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) {
+ position = 0;
+ encoding = (new IcuDetectorSniffer(this)).sniff();
+ }
+ sniffing = false;
+ if (encoding == null) {
+ encoding = Encoding.WINDOWS1252;
+ }
+ if (!declared) {
+ err("The character encoding was not declared. Proceeding using \u201C" + encoding.getCanonName() + "\u201D.");
+ }
+ if (driver != null) {
+ driver.setEncoding(encoding, Confidence.TENTATIVE);
+ }
+ } else {
+ if (encoding == Encoding.UTF8) {
+ if (driver != null) {
+ driver.setEncoding(Encoding.UTF8, Confidence.CERTAIN);
+ }
+ } else {
+ warn("Legacy encoding \u201C"
+ + encoding.getCanonName()
+ + "\u201D used. Documents should use UTF-8.");
+ if (driver != null) {
+ driver.setEncoding(Encoding.UTF16, Confidence.CERTAIN);
+ }
+ }
+ }
+ this.decoder = encoding.newDecoder();
+ sniffing = false;
+ position = 0;
+ bytesRead = 0;
+ byteBuffer.position(position);
+ byteBuffer.limit(limit);
+ initDecoder();
+ }
+
+ /**
+ *
+ */
+ private void initDecoder() {
+ this.decoder.onMalformedInput(CodingErrorAction.REPORT);
+ this.decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+
+ public HtmlInputStreamReader(InputStream inputStream,
+ ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver,
+ Encoding encoding) throws SAXException, IOException {
+ this.inputStream = inputStream;
+ this.errorHandler = errorHandler;
+ this.tokenizer = tokenizer;
+ this.driver = driver;
+ this.decoder = encoding.newDecoder();
+ this.sniffing = false;
+ position = 0;
+ bytesRead = 0;
+ byteBuffer.position(0);
+ byteBuffer.limit(0);
+ shouldReadBytes = true;
+ initDecoder();
+ }
+
+ @Override public void close() throws IOException {
+ inputStream.close();
+ }
+
+ @Override public int read(char[] charArray) throws IOException {
+ lineColPos = 0;
+ assert !sniffing;
+ assert charArray.length >= 2;
+ if (needToNotifyTokenizer) {
+ if (driver != null) {
+ driver.notifyAboutMetaBoundary();
+ }
+ needToNotifyTokenizer = false;
+ }
+ CharBuffer charBuffer = CharBuffer.wrap(charArray);
+ charBuffer.limit(charArray.length);
+ charBuffer.position(0);
+ if (flushing) {
+ decoder.flush(charBuffer);
+ // return -1 if zero
+ int cPos = charBuffer.position();
+ return cPos == 0 ? -1 : cPos;
+ }
+ if (hasPendingReplacementCharacter) {
+ charBuffer.put('\uFFFD');
+ hasPendingReplacementCharacter = false;
+ }
+ for (;;) {
+ if (shouldReadBytes) {
+ int oldLimit = byteBuffer.limit();
+ int readLen;
+ if (charsetBoundaryPassed) {
+ readLen = byteArray.length - oldLimit;
+ } else {
+ readLen = SNIFFING_LIMIT - oldLimit;
+ }
+ int num = inputStream.read(byteArray, oldLimit, readLen);
+ if (num == -1) {
+ eofSeen = true;
+ inputStream.close();
+ } else {
+ byteBuffer.position(0);
+ byteBuffer.limit(oldLimit + num);
+ }
+ shouldReadBytes = false;
+ }
+ boolean finalDecode = false;
+ for (;;) {
+ int oldBytePos = byteBuffer.position();
+ CoderResult cr = decoder.decode(byteBuffer, charBuffer,
+ finalDecode);
+ bytesRead += byteBuffer.position() - oldBytePos;
+ if (cr == CoderResult.OVERFLOW) {
+ // Decoder will remember surrogates
+ return charBuffer.position();
+ } else if (cr == CoderResult.UNDERFLOW) {
+ int remaining = byteBuffer.remaining();
+ if (!charsetBoundaryPassed) {
+ if (bytesRead + remaining >= SNIFFING_LIMIT) {
+ needToNotifyTokenizer = true;
+ charsetBoundaryPassed = true;
+ }
+ }
+
+ // XXX what happens if the entire byte buffer consists of
+ // a pathologically long malformed sequence?
+
+ // If the buffer was not fully consumed, there may be an
+ // incomplete byte sequence that needs to seed the next
+ // buffer.
+ if (remaining > 0) {
+ System.arraycopy(byteArray, byteBuffer.position(),
+ byteArray, 0, remaining);
+ }
+ byteBuffer.position(0);
+ byteBuffer.limit(remaining);
+ if (flushing) {
+ // The final decode was successful. Not sure if this
+ // ever happens.
+ // Let's get out in any case.
+ int cPos = charBuffer.position();
+ return cPos == 0 ? -1 : cPos;
+ } else if (eofSeen) {
+ // If there's something left, it isn't something that
+ // would be
+ // consumed in the middle of the stream. Rerun the loop
+ // once
+ // in the final mode.
+ shouldReadBytes = false;
+ finalDecode = true;
+ flushing = true;
+ continue;
+ } else {
+ // The usual stuff. Want more bytes next time.
+ shouldReadBytes = true;
+ int cPos = charBuffer.position();
+ if (cPos == 0) {
+ // No output. Read more bytes right away
+ break;
+ }
+ return cPos;
+ }
+ } else {
+ // The result is in error. No need to test.
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < cr.length(); i++) {
+ if (i > 0) {
+ sb.append(", ");
+ }
+ sb.append('\u201C');
+ sb.append(Integer.toHexString(byteBuffer.get() & 0xFF));
+ bytesRead++;
+ sb.append('\u201D');
+ }
+ if (charBuffer.hasRemaining()) {
+ charBuffer.put('\uFFFD');
+ } else {
+ hasPendingReplacementCharacter = true;
+ }
+ calculateLineAndCol(charBuffer);
+ if (cr.isMalformed()) {
+ err("Malformed byte sequence: " + sb + ".");
+ } else if (cr.isUnmappable()) {
+ err("Unmappable byte sequence: " + sb + ".");
+ } else {
+ throw new RuntimeException(
+ "CoderResult was none of overflow, underflow, malformed or unmappable.");
+ }
+ if (finalDecode) {
+ // These were the last bytes of input. Return without
+ // relooping.
+ // return -1 if zero
+ int cPos = charBuffer.position();
+ return cPos == 0 ? -1 : cPos;
+ }
+ }
+ }
+ }
+ }
+
+ private void calculateLineAndCol(CharBuffer charBuffer) {
+ if (tokenizer != null) {
+ if (lineColPos == 0) {
+ line = tokenizer.getLine();
+ col = tokenizer.getCol();
+ nextCharOnNewLine = tokenizer.isNextCharOnNewLine();
+ prevWasCR = tokenizer.isPrevCR();
+ }
+
+ char[] charArray = charBuffer.array();
+ int i = lineColPos;
+ while (i < charBuffer.position()) {
+ char c;
+ if (nextCharOnNewLine) {
+ line++;
+ col = 1;
+ nextCharOnNewLine = false;
+ } else {
+ col++;
+ }
+
+ c = charArray[i];
+ switch (c) {
+ case '\r':
+ nextCharOnNewLine = true;
+ prevWasCR = true;
+ break;
+ case '\n':
+ if (prevWasCR) {
+ col--;
+ } else {
+ nextCharOnNewLine = true;
+ }
+ break;
+ }
+ i++;
+ }
+ lineColPos = i;
+ }
+ }
+
+ public int readByte() throws IOException {
+ if (!sniffing) {
+ throw new IllegalStateException(
+ "readByte() called when not in the sniffing state.");
+ }
+ if (position == SNIFFING_LIMIT) {
+ return -1;
+ } else if (position < limit) {
+ return byteArray[position++] & 0xFF;
+ } else {
+ int num = inputStream.read(byteArray, limit, SNIFFING_LIMIT - limit);
+ if (num == -1) {
+ return -1;
+ } else {
+ limit += num;
+ return byteArray[position++] & 0xFF;
+ }
+ }
+ }
+
+ public static void main(String[] args) {
+ CharsetDecoder dec = Charset.forName("UTF-8").newDecoder();
+ dec.onMalformedInput(CodingErrorAction.REPORT);
+ dec.onUnmappableCharacter(CodingErrorAction.REPORT);
+ byte[] bytes = { (byte) 0xF0, (byte) 0x9D, (byte) 0x80, (byte) 0x80 };
+ byte[] bytes2 = { (byte) 0xB8, (byte) 0x80, 0x63, 0x64, 0x65 };
+ ByteBuffer byteBuf = ByteBuffer.wrap(bytes);
+ ByteBuffer byteBuf2 = ByteBuffer.wrap(bytes2);
+ char[] chars = new char[1];
+ CharBuffer charBuf = CharBuffer.wrap(chars);
+
+ CoderResult cr = dec.decode(byteBuf, charBuf, false);
+ System.out.println(cr);
+ System.out.println(byteBuf);
+ // byteBuf.get();
+ cr = dec.decode(byteBuf2, charBuf, false);
+ System.out.println(cr);
+ System.out.println(byteBuf2);
+
+ }
+
+ public int getColumnNumber() {
+ if (tokenizer != null) {
+ return col;
+ }
+ return -1;
+ }
+
+ public int getLineNumber() {
+ if (tokenizer != null) {
+ return line;
+ }
+ return -1;
+ }
+
+ public String getPublicId() {
+ if (tokenizer != null) {
+ return tokenizer.getPublicId();
+ }
+ return null;
+ }
+
+ public String getSystemId() {
+ if (tokenizer != null) {
+ return tokenizer.getSystemId();
+ }
+ return null;
+ }
+
+ /**
+ * @param string
+ * @throws SAXException
+ */
+ private void err(String message) throws IOException {
+ // TODO remove wrapping when changing read() to take a CharBuffer
+ try {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.error(spe);
+ }
+ } catch (SAXException e) {
+ throw (IOException) new IOException(e.getMessage()).initCause(e);
+ }
+ }
+
+ private void warn(String message) throws IOException {
+ // TODO remove wrapping when changing read() to take a CharBuffer
+ try {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.warning(spe);
+ }
+ } catch (SAXException e) {
+ throw (IOException) new IOException(e.getMessage()).initCause(e);
+ }
+ }
+
+ public Charset getCharset() {
+ return decoder.charset();
+ }
+
+ /**
+ * @see java.io.Reader#read()
+ */
+ @Override public int read() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @see java.io.Reader#read(char[], int, int)
+ */
+ @Override public int read(char[] cbuf, int off, int len) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @see java.io.Reader#read(java.nio.CharBuffer)
+ */
+ @Override public int read(CharBuffer target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public void switchEncoding(Encoding newEnc) {
+ this.decoder = newEnc.newDecoder();
+ initDecoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java
new file mode 100644
index 000000000..baa04e44f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+import java.nio.charset.UnsupportedCharsetException;
+
+import nu.validator.htmlparser.common.ByteReadable;
+import nu.validator.htmlparser.impl.MetaScanner;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class MetaSniffer extends MetaScanner implements Locator {
+
+ private Encoding characterEncoding = null;
+
+ private final ErrorHandler errorHandler;
+
+ private final Locator locator;
+
+ private int line = 1;
+
+ private int col = 0;
+
+ private boolean prevWasCR = false;
+
+ public MetaSniffer(ErrorHandler eh, Locator locator) {
+ this.errorHandler = eh;
+ this.locator = locator;
+ this.characterEncoding = null;
+ }
+
+ /**
+ * -1 means end.
+ * @return
+ * @throws IOException
+ */
+ protected int read() throws IOException {
+ int b = readable.readByte();
+ // [NOCPP[
+ switch (b) {
+ case '\n':
+ if (!prevWasCR) {
+ line++;
+ col = 0;
+ }
+ prevWasCR = false;
+ break;
+ case '\r':
+ line++;
+ col = 0;
+ prevWasCR = true;
+ break;
+ default:
+ col++;
+ prevWasCR = false;
+ break;
+ }
+ // ]NOCPP]
+ return b;
+ }
+
+ /**
+ * Main loop.
+ *
+ * @return
+ *
+ * @throws SAXException
+ * @throws IOException
+ * @throws
+ */
+ public Encoding sniff(ByteReadable readable) throws SAXException, IOException {
+ this.readable = readable;
+ stateLoop(stateSave);
+ return characterEncoding;
+ }
+
+
+ /**
+ * @param string
+ * @throws SAXException
+ */
+ private void err(String message) throws SAXException {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.error(spe);
+ }
+ }
+
+ /**
+ * @param string
+ * @throws SAXException
+ */
+ private void warn(String message) throws SAXException {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.warning(spe);
+ }
+ }
+
+ public int getColumnNumber() {
+ return col;
+ }
+
+ public int getLineNumber() {
+ return line;
+ }
+
+ public String getPublicId() {
+ if (locator != null) {
+ return locator.getPublicId();
+ }
+ return null;
+ }
+
+ public String getSystemId() {
+ if (locator != null) {
+ return locator.getSystemId();
+ }
+ return null;
+ }
+
+ protected boolean tryCharset(String encoding) throws SAXException {
+ encoding = Encoding.toAsciiLowerCase(encoding);
+ try {
+ // XXX spec says only UTF-16
+ if ("utf-16".equals(encoding) || "utf-16be".equals(encoding) || "utf-16le".equals(encoding) || "utf-32".equals(encoding) || "utf-32be".equals(encoding) || "utf-32le".equals(encoding)) {
+ this.characterEncoding = Encoding.UTF8;
+ err("The internal character encoding declaration specified \u201C" + encoding + "\u201D which is not a rough superset of ASCII. Using \u201CUTF-8\u201D instead.");
+ return true;
+ } else {
+ Encoding cs = Encoding.forName(encoding);
+ String canonName = cs.getCanonName();
+ if (!cs.isAsciiSuperset()) {
+ err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration. Continuing the sniffing algorithm.");
+ return false;
+ }
+ if (!cs.isRegistered()) {
+ if (encoding.startsWith("x-")) {
+ err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding. (Charmod C022)");
+ } else {
+ err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
+ }
+ } else if (!cs.getCanonName().equals(encoding)) {
+ err("The encoding \u201C" + encoding
+ + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
+ + canonName + "\u201D. (Charmod C024)");
+ }
+ if (cs.isShouldNot()) {
+ warn("Authors should not use the character encoding \u201C"
+ + encoding
+ + "\u201D. It is recommended to use \u201CUTF-8\u201D.");
+ } else if (cs.isObscure()) {
+ warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
+ }
+ Encoding actual = cs.getActualHtmlEncoding();
+ if (actual == null) {
+ this.characterEncoding = cs;
+ } else {
+ warn("Using \u201C" + actual.getCanonName() + "\u201D instead of the declared encoding \u201C" + encoding + "\u201D.");
+ this.characterEncoding = actual;
+ }
+ return true;
+ }
+ } catch (UnsupportedCharsetException e) {
+ err("Unsupported character encoding name: \u201C" + encoding + "\u201D. Will continue sniffing.");
+ }
+ return false;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java
new file mode 100644
index 000000000..47a3d5eb0
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the Thai Open Source Software Center Ltd nor
+ * the names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package nu.validator.htmlparser.rewindable;
+
+public interface Rewindable {
+ void willNotRewind();
+
+ void rewind();
+
+ boolean canRewind();
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java
new file mode 100644
index 000000000..3a1cc1b91
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the Thai Open Source Software Center Ltd nor
+ * the names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package nu.validator.htmlparser.rewindable;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class RewindableInputStream extends InputStream implements Rewindable {
+ static class Block {
+ Block next;
+
+ final byte[] buf;
+
+ int used = 0;
+
+ static final int MIN_SIZE = 1024;
+
+ Block(int minSize) {
+ buf = new byte[Math.max(MIN_SIZE, minSize)];
+ }
+
+ Block() {
+ this(0);
+ }
+
+ void append(byte b) {
+ buf[used++] = b;
+ }
+
+ void append(byte[] b, int off, int len) {
+ System.arraycopy(b, off, buf, used, len);
+ used += len;
+ }
+ }
+
+ private Block head;
+
+ /**
+ * If curBlockAvail > 0, then there are curBlockAvail bytes available to be
+ * returned starting at curBlockPos in curBlock.buf.
+ */
+ private int curBlockAvail;
+
+ private Block curBlock;
+
+ private int curBlockPos;
+
+ private Block lastBlock;
+
+ /**
+ * true unless willNotRewind has been called
+ */
+ private boolean saving = true;
+
+ private final InputStream in;
+
+ private boolean pretendClosed = false;
+
+ /**
+ * true if we have got an EOF from the underlying InputStream
+ */
+ private boolean eof;
+
+ public RewindableInputStream(InputStream in) {
+ if (in == null)
+ throw new NullPointerException();
+ this.in = in;
+ }
+
+ public void close() throws IOException {
+ if (saving) {
+ curBlockAvail = 0;
+ curBlock = null;
+ pretendClosed = true;
+ } else {
+ head = null;
+ curBlock = null;
+ lastBlock = null;
+ saving = false;
+ curBlockAvail = 0;
+ in.close();
+ }
+ }
+
+ public void rewind() {
+ if (!saving)
+ throw new IllegalStateException("rewind() after willNotRewind()");
+ pretendClosed = false;
+ if (head == null)
+ return;
+ curBlock = head;
+ curBlockPos = 0;
+ curBlockAvail = curBlock.used;
+ }
+
+ public boolean canRewind() {
+ return saving;
+ }
+
+ public void willNotRewind() {
+ saving = false;
+ head = null;
+ lastBlock = null;
+ if (pretendClosed) {
+ pretendClosed = false;
+ try {
+ in.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+
+ public int read() throws IOException {
+ if (curBlockAvail > 0) {
+ int c = curBlock.buf[curBlockPos++] & 0xFF;
+ --curBlockAvail;
+ if (curBlockAvail == 0) {
+ curBlock = curBlock.next;
+ if (curBlock != null) {
+ curBlockPos = 0;
+ curBlockAvail = curBlock.used;
+ }
+ }
+ return c;
+ }
+ int c = in.read();
+ if (saving && c != -1) {
+ if (lastBlock == null)
+ lastBlock = head = new Block();
+ else if (lastBlock.used == lastBlock.buf.length)
+ lastBlock = lastBlock.next = new Block();
+ lastBlock.append((byte) c);
+ }
+ return c;
+ }
+
+ public int read(byte b[], int off, int len) throws IOException {
+ if (curBlockAvail == 0 && !saving)
+ return in.read(b, off, len);
+ if (b == null)
+ throw new NullPointerException();
+ if (len < 0)
+ throw new IndexOutOfBoundsException();
+ int nRead = 0;
+ if (curBlockAvail != 0) {
+ for (;;) {
+ if (len == 0)
+ return nRead;
+ b[off++] = curBlock.buf[curBlockPos++];
+ --len;
+ nRead++;
+ --curBlockAvail;
+ if (curBlockAvail == 0) {
+ curBlock = curBlock.next;
+ if (curBlock == null)
+ break;
+ curBlockAvail = curBlock.used;
+ curBlockPos = 0;
+ }
+ }
+ }
+ if (len == 0)
+ return nRead;
+ if (eof)
+ return nRead > 0 ? nRead : -1;
+ try {
+ int n = in.read(b, off, len);
+ if (n < 0) {
+ eof = true;
+ return nRead > 0 ? nRead : -1;
+ }
+ nRead += n;
+ if (saving) {
+ if (lastBlock == null)
+ lastBlock = head = new Block(n);
+ else if (lastBlock.buf.length - lastBlock.used < n) {
+ if (lastBlock.used != lastBlock.buf.length) {
+ int free = lastBlock.buf.length - lastBlock.used;
+ lastBlock.append(b, off, free);
+ off += free;
+ n -= free;
+ }
+ lastBlock = lastBlock.next = new Block(n);
+ }
+ lastBlock.append(b, off, n);
+ }
+ } catch (IOException e) {
+ eof = true;
+ if (nRead == 0)
+ throw e;
+ }
+ return nRead;
+ }
+
+ public int available() throws IOException {
+ if (curBlockAvail == 0)
+ return in.available();
+ int n = curBlockAvail;
+ for (Block b = curBlock.next; b != null; b = b.next)
+ n += b.used;
+ return n + in.available();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java
new file mode 100644
index 000000000..714053e70
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java
@@ -0,0 +1,1097 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.HashMap;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.impl.TreeBuilder;
+import nu.validator.htmlparser.io.Driver;
+import nu.validator.saxtree.Document;
+import nu.validator.saxtree.DocumentFragment;
+import nu.validator.saxtree.TreeParser;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.DTDHandler;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.LexicalHandler;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the SAX2
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
+ * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
+ * XML violation policy. To make the parser support non-conforming HTML fully
+ * per the HTML 5 spec while on the other hand potentially violating the SAX2
+ * API contract, set the general XML violation policy to <code>ALLOW</code>.
+ * It is possible to treat XML 1.0 infoset violations as fatal by setting
+ * the general XML violation policy to <code>FATAL</code>.
+ *
+ * <p>By default, this parser doesn't do true streaming but buffers everything
+ * first. The parser can be made truly streaming by calling
+ * <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This
+ * has the consequence that errors that require non-streamable recovery are
+ * treated as fatal.
+ *
+ * <p>By default, in order to make the parse events emulate the parse events
+ * for a DTDless XML document, the parser does not report the doctype through
+ * <code>LexicalHandler</code>. Doctype reporting through
+ * <code>LexicalHandler</code> can be turned on by calling
+ * <code>setReportingDoctype(true)</code>.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class HtmlParser implements XMLReader {
+
+ private Driver driver = null;
+
+ private TreeBuilder<?> treeBuilder = null;
+
+ private SAXStreamer saxStreamer = null; // work around javac bug
+
+ private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug
+
+ private ContentHandler contentHandler = null;
+
+ private LexicalHandler lexicalHandler = null;
+
+ private DTDHandler dtdHandler = null;
+
+ private EntityResolver entityResolver = null;
+
+ private ErrorHandler errorHandler = null;
+
+ private DocumentModeHandler documentModeHandler = null;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private boolean checkingNormalization = false;
+
+ private boolean scriptingEnabled = false;
+
+ private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
+
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata = false;
+
+ private boolean mappingLangToXmlLang = false;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
+
+ private boolean reportingDoctype = true;
+
+ private ErrorHandler treeBuilderErrorHandler = null;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private HashMap<String, String> errorProfileMap = null;
+
+ private TransitionHandler transitionHandler = null;
+
+ /**
+ * Instantiates the parser with a fatal XML violation policy.
+ *
+ */
+ public HtmlParser() {
+ this(XmlViolationPolicy.FATAL);
+ }
+
+ /**
+ * Instantiates the parser with a specific XML violation policy.
+ * @param xmlPolicy the policy
+ */
+ public HtmlParser(XmlViolationPolicy xmlPolicy) {
+ setXmlPolicy(xmlPolicy);
+ }
+
+ private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
+ if (errorHandler == null && transitionHandler == null &&
+ contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
+ return new Tokenizer(handler, newAttributesEachTime);
+ }
+ ErrorReportingTokenizer tokenizer =
+ new ErrorReportingTokenizer(handler, newAttributesEachTime);
+ tokenizer.setErrorProfile(errorProfileMap);
+ return tokenizer;
+ }
+
+ /**
+ * This class wraps different tree builders depending on configuration. This
+ * method does the work of hiding this from the user of the class.
+ */
+ private void lazyInit() {
+ if (driver == null) {
+ if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) {
+ this.saxTreeBuilder = new SAXTreeBuilder();
+ this.treeBuilder = this.saxTreeBuilder;
+ this.saxStreamer = null;
+ this.driver = new Driver(newTokenizer(treeBuilder, true));
+ } else {
+ this.saxStreamer = new SAXStreamer();
+ this.treeBuilder = this.saxStreamer;
+ this.saxTreeBuilder = null;
+ this.driver = new Driver(newTokenizer(treeBuilder, false));
+ }
+ this.driver.setErrorHandler(errorHandler);
+ this.driver.setTransitionHandler(transitionHandler);
+ this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
+ this.driver.setCheckingNormalization(checkingNormalization);
+ this.driver.setCommentPolicy(commentPolicy);
+ this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ this.driver.setContentSpacePolicy(contentSpacePolicy);
+ this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ this.driver.setXmlnsPolicy(xmlnsPolicy);
+ this.driver.setHeuristics(heuristics);
+ for (CharacterHandler characterHandler : characterHandlers) {
+ this.driver.addCharacterHandler(characterHandler);
+ }
+ this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ this.treeBuilder.setDocumentModeHandler(documentModeHandler);
+ this.treeBuilder.setIgnoringComments(lexicalHandler == null);
+ this.treeBuilder.setScriptingEnabled(scriptingEnabled);
+ this.treeBuilder.setReportingDoctype(reportingDoctype);
+ this.treeBuilder.setNamePolicy(namePolicy);
+ if (saxStreamer != null) {
+ saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
+ : contentHandler);
+ saxStreamer.setLexicalHandler(lexicalHandler);
+ driver.setAllowRewinding(false);
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getContentHandler()
+ */
+ public ContentHandler getContentHandler() {
+ return contentHandler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getDTDHandler()
+ */
+ public DTDHandler getDTDHandler() {
+ return dtdHandler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getEntityResolver()
+ */
+ public EntityResolver getEntityResolver() {
+ return entityResolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getErrorHandler()
+ */
+ public ErrorHandler getErrorHandler() {
+ return errorHandler;
+ }
+
+ /**
+ * Exposes the configuration of the emulated XML parser as well as
+ * boolean-valued configuration without using non-<code>XMLReader</code>
+ * getters directly.
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/features/external-general-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/is-standalone</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/namespaces</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/string-interning</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * <dd><code>isCheckingNormalization</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-attributes2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-locator2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/validation</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/xml-1.1</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
+ * <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
+ * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
+ * <dd><code>isMappingLangToXmlLang</code></dd>
+ * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
+ * <dd><code>isScriptingEnabled</code></dd>
+ * </dl>
+ *
+ * @param name
+ * feature URI string
+ * @return a value per the list above
+ * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
+ */
+ public boolean getFeature(String name) throws SAXNotRecognizedException,
+ SAXNotSupportedException {
+ if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
+ return true; // default value--applicable scenario never happens
+ } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
+ return isCheckingNormalization(); // the checks aren't really per
+ // XML 1.1
+ } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/validation".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
+ return false;
+ } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
+ return isHtml4ModeCompatibleWithXhtml1Schemata();
+ } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
+ return isMappingLangToXmlLang();
+ } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
+ return isScriptingEnabled();
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Allows <code>XMLReader</code>-level access to non-boolean valued
+ * getters.
+ *
+ * <p>
+ * The properties are mapped as follows:
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt>
+ * <dd><code>"1.0"</code></dd>
+ * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
+ * <dd><code>getLexicalHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
+ * <dd><code>getContentSpacePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
+ * <dd><code>getContentNonXmlCharPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
+ * <dd><code>getCommentPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
+ * <dd><code>getXmlnsPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/name-policy</code></dt>
+ * <dd><code>getNamePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
+ * <dd><code>getStreamabilityViolationPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
+ * <dd><code>getDocumentModeHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
+ * <dd><code>getDoctypeExpectation</code></dd>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * </dl>
+ *
+ * @param name
+ * property URI string
+ * @return a value per the list above
+ * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
+ */
+ public Object getProperty(String name) throws SAXNotRecognizedException,
+ SAXNotSupportedException {
+ if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not suppert DeclHandler.");
+ } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
+ return "1.0"; // Emulating an XML 1.1 parser is not supported.
+ } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not walk the DOM.");
+ } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
+ return getLexicalHandler();
+ } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not expose the source as a string.");
+ } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
+ return getContentSpacePolicy();
+ } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
+ return getContentNonXmlCharPolicy();
+ } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
+ return getCommentPolicy();
+ } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
+ return getXmlnsPolicy();
+ } else if ("http://validator.nu/properties/name-policy".equals(name)) {
+ return getNamePolicy();
+ } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
+ return getStreamabilityViolationPolicy();
+ } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
+ return getDocumentModeHandler();
+ } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
+ return getDoctypeExpectation();
+ } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
+ throw new SAXNotSupportedException(
+ "Cannot get a convenience setter.");
+ } else if ("http://validator.nu/properties/heuristics".equals(name)) {
+ return getHeuristics();
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
+ */
+ public void parse(InputSource input) throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(null);
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ Document document = saxTreeBuilder.getDocument();
+ if (document != null) {
+ new TreeParser(contentHandler, lexicalHandler).parse(document);
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses a fragment with HTML context.
+ *
+ * @param input the input to parse
+ * @param context the name of the context element (HTML namespace assumed)
+ * @throws IOException
+ * @throws SAXException
+ */
+ public void parseFragment(InputSource input, String context)
+ throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(context.intern());
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
+ new TreeParser(contentHandler, lexicalHandler).parse(fragment);
+ }
+ }
+ }
+
+ /**
+ * Parses a fragment.
+ *
+ * @param input the input to parse
+ * @param contextLocal the local name of the context element
+ * @param contextNamespace the namespace of the context element
+ * @throws IOException
+ * @throws SAXException
+ */
+ public void parseFragment(InputSource input, String contextLocal, String contextNamespace)
+ throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false);
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
+ new TreeParser(contentHandler, lexicalHandler).parse(fragment);
+ }
+ }
+ }
+
+ /**
+ * @param is
+ * @throws SAXException
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException {
+ if (is == null) {
+ throw new IllegalArgumentException("Null input.");
+ }
+ if (is.getByteStream() == null && is.getCharacterStream() == null) {
+ String systemId = is.getSystemId();
+ if (systemId == null) {
+ throw new IllegalArgumentException("No byte stream, no character stream nor URI.");
+ }
+ if (entityResolver != null) {
+ is = entityResolver.resolveEntity(is.getPublicId(), systemId);
+ }
+ if (is.getByteStream() == null || is.getCharacterStream() == null) {
+ is = new InputSource();
+ is.setSystemId(systemId);
+ is.setByteStream(new URL(systemId).openStream());
+ }
+ }
+ driver.tokenize(is);
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#parse(java.lang.String)
+ */
+ public void parse(String systemId) throws IOException, SAXException {
+ parse(new InputSource(systemId));
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
+ */
+ public void setContentHandler(ContentHandler handler) {
+ contentHandler = handler;
+ if (saxStreamer != null) {
+ saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
+ : contentHandler);
+ }
+ }
+
+ /**
+ * Sets the lexical handler.
+ * @param handler the hander.
+ */
+ public void setLexicalHandler(LexicalHandler handler) {
+ lexicalHandler = handler;
+ if (treeBuilder != null) {
+ treeBuilder.setIgnoringComments(handler == null);
+ if (saxStreamer != null) {
+ saxStreamer.setLexicalHandler(handler);
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
+ */
+ public void setDTDHandler(DTDHandler handler) {
+ dtdHandler = handler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
+ */
+ public void setEntityResolver(EntityResolver resolver) {
+ entityResolver = resolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler handler) {
+ errorHandler = handler;
+ treeBuilderErrorHandler = handler;
+ driver = null;
+ }
+
+ public void setTransitionHandler(TransitionHandler handler) {
+ transitionHandler = handler;
+ driver = null;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ * @deprecated For Validator.nu internal use
+ */
+ public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) {
+ treeBuilderErrorHandler = handler;
+ if (driver != null) {
+ treeBuilder.setErrorHandler(handler);
+ }
+ }
+
+ /**
+ * Sets a boolean feature without having to use non-<code>XMLReader</code>
+ * setters directly.
+ *
+ * <p>
+ * The supported features are:
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * <dd><code>setCheckingNormalization</code></dd>
+ * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
+ * <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
+ * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
+ * <dd><code>setMappingLangToXmlLang</code></dd>
+ * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
+ * <dd><code>setScriptingEnabled</code></dd>
+ * </dl>
+ *
+ * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
+ */
+ public void setFeature(String name, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
+ setCheckingNormalization(value);
+ } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/validation".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
+ setHtml4ModeCompatibleWithXhtml1Schemata(value);
+ } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
+ setMappingLangToXmlLang(value);
+ } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
+ setScriptingEnabled(value);
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Sets a non-boolean property without having to use non-<code>XMLReader</code>
+ * setters directly.
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
+ * <dd><code>setLexicalHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
+ * <dd><code>setContentSpacePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
+ * <dd><code>setContentNonXmlCharPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
+ * <dd><code>setCommentPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
+ * <dd><code>setXmlnsPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/name-policy</code></dt>
+ * <dd><code>setNamePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
+ * <dd><code>setStreamabilityViolationPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
+ * <dd><code>setDocumentModeHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
+ * <dd><code>setDoctypeExpectation</code></dd>
+ * <dt><code>http://validator.nu/properties/xml-policy</code></dt>
+ * <dd><code>setXmlPolicy</code></dd>
+ * </dl>
+ *
+ * @see org.xml.sax.XMLReader#setProperty(java.lang.String,
+ * java.lang.Object)
+ */
+ public void setProperty(String name, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not suppert DeclHandler.");
+ } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
+ throw new SAXNotSupportedException(
+ "Can't set document-xml-version.");
+ } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
+ throw new SAXNotSupportedException("Can't set dom-node.");
+ } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
+ setLexicalHandler((LexicalHandler) value);
+ } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
+ throw new SAXNotSupportedException("Can't set xml-string.");
+ } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
+ setContentSpacePolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
+ setContentNonXmlCharPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
+ setCommentPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
+ setXmlnsPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/name-policy".equals(name)) {
+ setNamePolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
+ setStreamabilityViolationPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
+ setDocumentModeHandler((DocumentModeHandler) value);
+ } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
+ setDoctypeExpectation((DoctypeExpectation) value);
+ } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
+ setXmlPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/heuristics".equals(name)) {
+ setHeuristics((Heuristics) value);
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Indicates whether NFC normalization of source is being checked.
+ * @return <code>true</code> if NFC normalization of source is being checked.
+ * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
+ */
+ public boolean isCheckingNormalization() {
+ return checkingNormalization;
+ }
+
+ /**
+ * Toggles the checking of the NFC normalization of source.
+ * @param enable <code>true</code> to check normalization
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
+ */
+ public void setCheckingNormalization(boolean enable) {
+ this.checkingNormalization = enable;
+ if (driver != null) {
+ driver.setCheckingNormalization(checkingNormalization);
+ }
+ }
+
+ /**
+ * Sets the policy for consecutive hyphens in comments.
+ * @param commentPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ if (driver != null) {
+ driver.setCommentPolicy(commentPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML characters except white space.
+ * @param contentNonXmlCharPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ driver = null;
+ }
+
+ /**
+ * Sets the policy for non-XML white space.
+ * @param contentSpacePolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ if (driver != null) {
+ driver.setContentSpacePolicy(contentSpacePolicy);
+ }
+ }
+
+ /**
+ * Whether the parser considers scripting to be enabled for noscript treatment.
+ *
+ * @return <code>true</code> if enabled
+ * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ if (treeBuilder != null) {
+ treeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+ }
+
+ /**
+ * Returns the doctype expectation.
+ *
+ * @return the doctypeExpectation
+ */
+ public DoctypeExpectation getDoctypeExpectation() {
+ return doctypeExpectation;
+ }
+
+ /**
+ * Sets the doctype expectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ if (treeBuilder != null) {
+ treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ }
+ }
+
+ /**
+ * Returns the document mode handler.
+ *
+ * @return the documentModeHandler
+ */
+ public DocumentModeHandler getDocumentModeHandler() {
+ return documentModeHandler;
+ }
+
+ /**
+ * Sets the document mode handler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Returns the streamabilityViolationPolicy.
+ *
+ * @return the streamabilityViolationPolicy
+ */
+ public XmlViolationPolicy getStreamabilityViolationPolicy() {
+ return streamabilityViolationPolicy;
+ }
+
+ /**
+ * Sets the streamabilityViolationPolicy.
+ *
+ * @param streamabilityViolationPolicy
+ * the streamabilityViolationPolicy to set
+ */
+ public void setStreamabilityViolationPolicy(
+ XmlViolationPolicy streamabilityViolationPolicy) {
+ this.streamabilityViolationPolicy = streamabilityViolationPolicy;
+ driver = null;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ if (driver != null) {
+ driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+ }
+
+ /**
+ * Returns the <code>Locator</code> during parse.
+ * @return the <code>Locator</code>
+ */
+ public Locator getDocumentLocator() {
+ return driver.getDocumentLocator();
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ *
+ * @return the html4ModeCompatibleWithXhtml1Schemata
+ */
+ public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
+ return html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang;
+ if (driver != null) {
+ driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang;
+ }
+
+ /**
+ * Whether the <code>xmlns</code> attribute on the root element is
+ * passed to through. (FATAL not allowed.)
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ if (driver != null) {
+ driver.setXmlnsPolicy(xmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the xmlnsPolicy.
+ *
+ * @return the xmlnsPolicy
+ */
+ public XmlViolationPolicy getXmlnsPolicy() {
+ return xmlnsPolicy;
+ }
+
+ /**
+ * Returns the lexicalHandler.
+ *
+ * @return the lexicalHandler
+ */
+ public LexicalHandler getLexicalHandler() {
+ return lexicalHandler;
+ }
+
+ /**
+ * Returns the commentPolicy.
+ *
+ * @return the commentPolicy
+ */
+ public XmlViolationPolicy getCommentPolicy() {
+ return commentPolicy;
+ }
+
+ /**
+ * Returns the contentNonXmlCharPolicy.
+ *
+ * @return the contentNonXmlCharPolicy
+ */
+ public XmlViolationPolicy getContentNonXmlCharPolicy() {
+ return contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Returns the contentSpacePolicy.
+ *
+ * @return the contentSpacePolicy
+ */
+ public XmlViolationPolicy getContentSpacePolicy() {
+ return contentSpacePolicy;
+ }
+
+ /**
+ * @param reportingDoctype
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ if (treeBuilder != null) {
+ treeBuilder.setReportingDoctype(reportingDoctype);
+ }
+ }
+
+ /**
+ * Returns the reportingDoctype.
+ *
+ * @return the reportingDoctype
+ */
+ public boolean isReportingDoctype() {
+ return reportingDoctype;
+ }
+
+ /**
+ * @param errorProfile
+ * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set)
+ */
+ public void setErrorProfile(HashMap<String, String> errorProfileMap) {
+ this.errorProfileMap = errorProfileMap;
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ if (driver != null) {
+ driver.setNamePolicy(namePolicy);
+ treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics the heuristics to set
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ if (driver != null) {
+ driver.setHeuristics(heuristics);
+ }
+ }
+
+ public Heuristics getHeuristics() {
+ return this.heuristics;
+ }
+
+ /**
+ * This is a catch-all convenience method for setting name, xmlns, content space,
+ * content non-XML char and comment policies in one go. This does not affect the
+ * streamability policy or doctype reporting.
+ *
+ * @param xmlPolicy
+ */
+ public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
+ setNamePolicy(xmlPolicy);
+ setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
+ setContentSpacePolicy(xmlPolicy);
+ setContentNonXmlCharPolicy(xmlPolicy);
+ setCommentPolicy(xmlPolicy);
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ *
+ * @return the namePolicy
+ */
+ public XmlViolationPolicy getNamePolicy() {
+ return namePolicy;
+ }
+
+ /**
+ * Does nothing.
+ * @deprecated
+ */
+ public void setBogusXmlnsPolicy(
+ XmlViolationPolicy bogusXmlnsPolicy) {
+ }
+
+ /**
+ * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
+ * @deprecated
+ * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
+ */
+ public XmlViolationPolicy getBogusXmlnsPolicy() {
+ return XmlViolationPolicy.ALTER_INFOSET;
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ this.characterHandlers.add(characterHandler);
+ if (driver != null) {
+ driver.addCharacterHandler(characterHandler);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java
new file mode 100644
index 000000000..3312398d5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.util.Arrays;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class HtmlSerializer implements ContentHandler, LexicalHandler {
+
+ private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
+ "bgsound", "br", "col", "command", "embed", "frame", "hr", "img",
+ "input", "keygen", "link", "meta", "param", "source", "track",
+ "wbr" };
+
+ private static final String[] NON_ESCAPING = { "iframe", "noembed",
+ "noframes", "noscript", "plaintext", "script", "style", "xmp" };
+
+ private static Writer wrap(OutputStream out) {
+ try {
+ return new OutputStreamWriter(out, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private int ignoreLevel = 0;
+
+ private int escapeLevel = 0;
+
+ private final Writer writer;
+
+ public HtmlSerializer(OutputStream out) {
+ this(wrap(out));
+ }
+
+ public HtmlSerializer(Writer out) {
+ this.writer = out;
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ if (escapeLevel > 0) {
+ writer.write(ch, start, length);
+ } else {
+ for (int i = start; i < start + length; i++) {
+ char c = ch[i];
+ switch (c) {
+ case '<':
+ writer.write("&lt;");
+ break;
+ case '>':
+ writer.write("&gt;");
+ break;
+ case '&':
+ writer.write("&amp;");
+ break;
+ case '\u00A0':
+ writer.write("&nbsp;");
+ break;
+ default:
+ writer.write(c);
+ break;
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endDocument() throws SAXException {
+ try {
+ writer.flush();
+ writer.close();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ if (escapeLevel > 0) {
+ escapeLevel--;
+ }
+ if (ignoreLevel > 0) {
+ ignoreLevel--;
+ } else {
+ try {
+ writer.write('<');
+ writer.write('/');
+ writer.write(localName);
+ writer.write('>');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException {
+ }
+
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ public void startDocument() throws SAXException {
+ try {
+ writer.write("<!DOCTYPE html>\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startElement(String uri, String localName, String qName,
+ Attributes atts) throws SAXException {
+ if (escapeLevel > 0) {
+ escapeLevel++;
+ }
+ boolean xhtml = "http://www.w3.org/1999/xhtml".equals(uri);
+ if (ignoreLevel > 0
+ || !(xhtml || "http://www.w3.org/2000/svg".equals(uri) || "http://www.w3.org/1998/Math/MathML".equals(uri))) {
+ ignoreLevel++;
+ return;
+ }
+ try {
+ writer.write('<');
+ writer.write(localName);
+ for (int i = 0; i < atts.getLength(); i++) {
+ String attUri = atts.getURI(i);
+ String attLocal = atts.getLocalName(i);
+ if (attUri.length() == 0) {
+ writer.write(' ');
+ } else if (!xhtml
+ && "http://www.w3.org/1999/xlink".equals(attUri)) {
+ writer.write(" xlink:");
+ } else if ("http://www.w3.org/XML/1998/namespace".equals(attUri)) {
+ if (xhtml) {
+ if ("lang".equals(attLocal)) {
+ writer.write(' ');
+ } else {
+ continue;
+ }
+ } else {
+ writer.write(" xml:");
+ }
+ } else {
+ continue;
+ }
+ writer.write(atts.getLocalName(i));
+ writer.write('=');
+ writer.write('"');
+ String val = atts.getValue(i);
+ for (int j = 0; j < val.length(); j++) {
+ char c = val.charAt(j);
+ switch (c) {
+ case '"':
+ writer.write("&quot;");
+ break;
+ case '&':
+ writer.write("&amp;");
+ break;
+ case '\u00A0':
+ writer.write("&nbsp;");
+ break;
+ default:
+ writer.write(c);
+ break;
+ }
+ }
+ writer.write('"');
+ }
+ writer.write('>');
+ if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
+ ignoreLevel++;
+ return;
+ }
+ if ("pre".equals(localName) || "textarea".equals(localName)
+ || "listing".equals(localName)) {
+ writer.write('\n');
+ }
+ if (escapeLevel == 0
+ && Arrays.binarySearch(NON_ESCAPING, localName) > -1) {
+ escapeLevel = 1;
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ if (ignoreLevel > 0 || escapeLevel > 0) {
+ return;
+ }
+ try {
+ writer.write("<!--");
+ writer.write(ch, start, length);
+ writer.write("-->");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endCDATA() throws SAXException {
+ }
+
+ public void endDTD() throws SAXException {
+ }
+
+ public void endEntity(String name) throws SAXException {
+ }
+
+ public void startCDATA() throws SAXException {
+ }
+
+ public void startDTD(String name, String publicId, String systemId)
+ throws SAXException {
+ }
+
+ public void startEntity(String name) throws SAXException {
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java
new file mode 100644
index 000000000..33e98dbe8
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+/**
+ * This subclass of <code>HtmlParser</code> simply provides a no-argument
+ * constructor that calls the constructor of the superclass with the
+ * <code>ALTER_INFOSET</code> policy. This is convenient when another Java
+ * component wants an implementation of <code>XMLReader</code> with a
+ * no-argument constructor and infoset coercion is the wanted behavior.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class InfosetCoercingHtmlParser extends HtmlParser {
+
+ /**
+ * A constructor that passes <code>ALTER_INFOSET</code> to the superclass'
+ * constructor.
+ */
+ public InfosetCoercingHtmlParser() {
+ super(XmlViolationPolicy.ALTER_INFOSET);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java
new file mode 100644
index 000000000..b6cb2f872
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.OutputStream;
+import java.io.Writer;
+
+import nu.validator.htmlparser.impl.NCName;
+
+import org.xml.sax.SAXException;
+
+public class NameCheckingXmlSerializer extends XmlSerializer {
+
+ public NameCheckingXmlSerializer(OutputStream out) {
+ super(out);
+ }
+
+ public NameCheckingXmlSerializer(Writer out) {
+ super(out);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName()
+ */
+ @Override protected void checkNCName(String name) throws SAXException {
+ if (!NCName.isNCName(name)) {
+ throw new SAXException("Not an XML 1.0 4th ed. NCName: " + name);
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java
new file mode 100644
index 000000000..07ff5da4a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.TreeBuilder;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.ext.LexicalHandler;
+
+class SAXStreamer extends TreeBuilder<Attributes>{
+
+ private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray();
+
+ private ContentHandler contentHandler = null;
+ private LexicalHandler lexicalHandler = null;
+
+ SAXStreamer() {
+ super();
+ }
+
+ @Override
+ protected void addAttributesToElement(Attributes element, HtmlAttributes attributes) throws SAXException {
+ Attributes existingAttrs = element;
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String qName = attributes.getQNameNoBoundsCheck(i);
+ if (existingAttrs.getIndex(qName) < 0) {
+ fatal();
+ }
+ }
+ }
+
+ @Override
+ protected void appendCharacters(Attributes parent, char[] buf, int start, int length) throws SAXException {
+ contentHandler.characters(buf, start, length);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object)
+ */
+ @Override protected void appendIsindexPrompt(Attributes parent)
+ throws SAXException {
+ contentHandler.characters(ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length);
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Attributes oldParent, Attributes newParent) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected void appendComment(Attributes parent, char[] buf, int start, int length) throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.comment(buf, start, length);
+ }
+ }
+
+ @Override
+ protected void appendCommentToDocument(char[] buf, int start, int length)
+ throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.comment(buf, start, length);
+ }
+ }
+
+ @Override
+ protected Attributes createElement(String ns, String name, HtmlAttributes attributes, Attributes intendedParent) throws SAXException {
+ return attributes;
+ }
+
+ @Override
+ protected Attributes createHtmlElementSetAsRoot(HtmlAttributes attributes) throws SAXException {
+ return attributes;
+ }
+
+ @Override
+ protected void detachFromParent(Attributes element) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected void appendElement(Attributes child, Attributes newParent) throws SAXException {
+ }
+
+ @Override
+ protected boolean hasChildren(Attributes element) throws SAXException {
+ return false;
+ }
+
+ public void setContentHandler(ContentHandler handler) {
+ contentHandler = handler;
+ }
+
+ public void setLexicalHandler(LexicalHandler handler) {
+ lexicalHandler = handler;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String)
+ */
+ @Override
+ protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.startDTD(name, publicIdentifier, systemIdentifier);
+ lexicalHandler.endDTD();
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(String, java.lang.String, java.lang.Object)
+ */
+ @Override
+ protected void elementPopped(String ns, String name, Attributes node) throws SAXException {
+ contentHandler.endElement(ns, name, name);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#elementPushed(String, java.lang.String, java.lang.Object)
+ */
+ @Override
+ protected void elementPushed(String ns, String name, Attributes node) throws SAXException {
+ contentHandler.startElement(ns, name, name, node);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override
+ protected void end() throws SAXException {
+ contentHandler.endDocument();
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) throws SAXException {
+ contentHandler.setDocumentLocator(tokenizer);
+ if (!fragment) {
+ contentHandler.startDocument();
+ }
+ }
+
+ protected void fatal() throws SAXException {
+ SAXParseException spe = new SAXParseException(
+ "Cannot recover after last error. Any further errors will be ignored.",
+ tokenizer);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ @Override
+ protected Attributes createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Attributes table, Attributes stackParent) throws SAXException {
+ fatal();
+ throw new RuntimeException("Unreachable");
+ }
+
+ @Override protected void insertFosterParentedCharacters(char[] buf,
+ int start, int length, Attributes table, Attributes stackParent)
+ throws SAXException {
+ fatal();
+ }
+
+ @Override protected void insertFosterParentedChild(Attributes child,
+ Attributes table, Attributes stackParent) throws SAXException {
+ fatal();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java
new file mode 100644
index 000000000..ef51d2a51
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.TreeBuilder;
+import nu.validator.saxtree.Characters;
+import nu.validator.saxtree.Comment;
+import nu.validator.saxtree.DTD;
+import nu.validator.saxtree.Document;
+import nu.validator.saxtree.DocumentFragment;
+import nu.validator.saxtree.Element;
+import nu.validator.saxtree.Node;
+import nu.validator.saxtree.ParentNode;
+
+import org.xml.sax.SAXException;
+
+class SAXTreeBuilder extends TreeBuilder<Element> {
+
+ private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray();
+
+ private Document document;
+
+ private Node cachedTable = null;
+
+ private Node cachedTablePreviousSibling = null;
+
+ SAXTreeBuilder() {
+ super();
+ }
+
+ @Override
+ protected void appendComment(Element parent, char[] buf, int start, int length) {
+ parent.appendChild(new Comment(tokenizer, buf, start, length));
+ }
+
+ @Override
+ protected void appendCommentToDocument(char[] buf, int start, int length) {
+ document.appendChild(new Comment(tokenizer, buf, start, length));
+ }
+
+ @Override
+ protected void appendCharacters(Element parent, char[] buf, int start, int length) {
+ parent.appendChild(new Characters(tokenizer, buf, start, length));
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object)
+ */
+ @Override protected void appendIsindexPrompt(Element parent)
+ throws SAXException {
+ parent.appendChild(new Characters(tokenizer, ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length));
+ }
+
+ @Override
+ protected boolean hasChildren(Element element) {
+ return element.getFirstChild() != null;
+ }
+
+ @Override
+ protected void appendElement(Element child, Element newParent) {
+ newParent.appendChild(child);
+ }
+
+ @Override
+ protected Element createHtmlElementSetAsRoot(HtmlAttributes attributes) {
+ Element newElt = new Element(tokenizer, "http://www.w3.org/1999/xhtml", "html", "html", attributes, true, null);
+ document.appendChild(newElt);
+ return newElt;
+ }
+
+ @Override
+ protected void addAttributesToElement(Element element, HtmlAttributes attributes) throws SAXException {
+ HtmlAttributes existingAttrs = (HtmlAttributes) element.getAttributes();
+ existingAttrs.merge(attributes);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String)
+ */
+ @Override
+ protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) {
+ DTD dtd = new DTD(tokenizer, name, publicIdentifier, systemIdentifier);
+ dtd.setEndLocator(tokenizer);
+ document.appendChild(dtd);
+ }
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ Document getDocument() {
+ Document rv = document;
+ document = null;
+ return rv;
+ }
+
+ DocumentFragment getDocumentFragment() {
+ DocumentFragment rv = new DocumentFragment();
+ rv.appendChildren(document.getFirstChild());
+ document = null;
+ return rv;
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override
+ protected void end() throws SAXException {
+ document.setEndLocator(tokenizer);
+ cachedTable = null;
+ cachedTablePreviousSibling = null;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) {
+ document = new Document(tokenizer);
+ cachedTable = null;
+ cachedTablePreviousSibling = null;
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Element oldParent, Element newParent) throws SAXException {
+ newParent.appendChildren(oldParent);
+ }
+
+ @Override
+ protected Element createElement(String ns, String name, HtmlAttributes attributes,
+ Element intendedParent) throws SAXException {
+ return new Element(tokenizer, ns, name, name, attributes, true, null);
+ }
+
+ @Override
+ protected Element createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Element table, Element stackParent) throws SAXException {
+ ParentNode parent = table.getParentNode();
+ Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent);
+ if (parent != null) { // always an element if not null
+ parent.insertBetween(child, previousSibling(table), table);
+ cachedTablePreviousSibling = child;
+ } else {
+ stackParent.appendChild(child);
+ }
+
+ return child;
+ }
+
+ @Override protected void insertFosterParentedCharacters(char[] buf,
+ int start, int length, Element table, Element stackParent) throws SAXException {
+ Node child = new Characters(tokenizer, buf, start, length);
+ ParentNode parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ parent.insertBetween(child, previousSibling(table), table);
+ cachedTablePreviousSibling = child;
+ } else {
+ stackParent.appendChild(child);
+ }
+ }
+
+ @Override protected void insertFosterParentedChild(Element child,
+ Element table, Element stackParent) throws SAXException {
+ ParentNode parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ parent.insertBetween(child, previousSibling(table), table);
+ cachedTablePreviousSibling = child;
+ } else {
+ stackParent.appendChild(child);
+ }
+ }
+
+ private Node previousSibling(Node table) {
+ if (table == cachedTable) {
+ return cachedTablePreviousSibling;
+ } else {
+ cachedTable = table;
+ return (cachedTablePreviousSibling = table.getPreviousSibling());
+ }
+ }
+
+ @Override protected void detachFromParent(Element element)
+ throws SAXException {
+ element.detach();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java
new file mode 100644
index 000000000..5dccf5d3a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Set;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class XmlSerializer implements ContentHandler, LexicalHandler {
+
+ private final class PrefixMapping {
+ public final String uri;
+
+ public final String prefix;
+
+ /**
+ * @param uri
+ * @param prefix
+ */
+ public PrefixMapping(String uri, String prefix) {
+ this.uri = uri;
+ this.prefix = prefix;
+ }
+
+ /**
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override public final boolean equals(Object obj) {
+ if (obj instanceof PrefixMapping) {
+ PrefixMapping other = (PrefixMapping) obj;
+ return this.prefix.equals(other.prefix);
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * @see java.lang.Object#hashCode()
+ */
+ @Override public final int hashCode() {
+ return prefix.hashCode();
+ }
+
+ }
+
+ private final class StackNode {
+ public final String uri;
+
+ public final String prefix;
+
+ public final String qName;
+
+ public final Set<PrefixMapping> mappings = new HashSet<PrefixMapping>();
+
+ /**
+ * @param uri
+ * @param qName
+ */
+ public StackNode(String uri, String qName, String prefix) {
+ this.uri = uri;
+ this.qName = qName;
+ this.prefix = prefix;
+ }
+ }
+
+ private final static Map<String, String> WELL_KNOWN_ATTRIBUTE_PREFIXES = new HashMap<String, String>();
+
+ static {
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("adobe:ns:meta/", "x");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/AdobeIllustrator/10.0/", "i");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/Extensibility/1.0/", "x");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/illustrator/1.0/", "illustrator");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/",
+ "photoshop");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/tiff/1.0/",
+ "tiff");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/",
+ "xapG");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/",
+ "xapMM");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/rights/", "xapRights");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/",
+ "xapTPg");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://purl.org/dc/elements/1.1/",
+ "dc");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://w3.org/1999/xlink", "xlink");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.carto.net/attrib/",
+ "attrib");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.iki.fi/pav/software/textext/", "textext");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.inkscape.org/namespaces/inkscape", "inkscape");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.justsystem.co.jp/hanako13/svg", "jsh");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink",
+ "xlink");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.w3.org/2001/XMLSchema-instance", "xsi");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink",
+ "xlink");
+ }
+
+ private final static Map<String, String> WELL_KNOWN_ELEMENT_PREFIXES = new HashMap<String, String>();
+
+ static {
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.w3.org/1999/XSL/Transform",
+ "xsl");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.org/dc/elements/1.1/",
+ "dc");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.w3.org/2001/XMLSchema-instance", "xsi");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.ascc.net/xml/schematron",
+ "sch");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.oclc.org/dsdl/schematron",
+ "sch");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.inkscape.org/namespaces/inkscape", "inkscape");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/AdobeIllustrator/10.0/", "i");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("adobe:ns:meta/", "x");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/tiff/1.0/", "tiff");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://creativecommons.org/ns#", "cc");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/", "Iptc4xmpCore");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/exif/1.0/", "exif");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/Extensibility/1.0/", "x");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/illustrator/1.0/",
+ "illustrator");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdfx/1.3/", "pdfx");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/",
+ "photoshop");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/Variables/1.0/",
+ "v");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/",
+ "xapG");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/img/",
+ "xapGImg");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/",
+ "xapMM");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/rights/",
+ "xapRights");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/Font#", "stFnt");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/",
+ "xapTPg");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://product.corel.com/CGS/11/cddns/", "odm");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://web.resource.org/cc/", "cc");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.freesoftware.fsf.org/bkchem/cdml", "cdml");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.opengis.net/gml", "gml");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.svgmaker.com/svgns",
+ "svgmaker");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.w3.org/2000/01/rdf-schema#", "rdfs");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://xmlns.com/foaf/0.1/", "foaf");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.xml-cml.org/schema/stmml",
+ "stm");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.iupac.org/foo/ichi", "ichi");
+ }
+
+ private final static Writer wrap(OutputStream out) {
+ Charset charset = Charset.forName("utf-8");
+ CharsetEncoder encoder = charset.newEncoder();
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ try {
+ encoder.replaceWith("\uFFFD".getBytes("utf-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ return new OutputStreamWriter(out, encoder);
+ }
+
+ // grows from head
+ private final LinkedList<StackNode> stack = new LinkedList<StackNode>();
+
+ private final Writer writer;
+
+ public XmlSerializer(OutputStream out) {
+ this(wrap(out));
+ }
+
+ public XmlSerializer(Writer out) {
+ this.writer = out;
+ }
+
+ protected void checkNCName(String name) throws SAXException {
+
+ }
+
+ private final void push(String uri, String local, String prefix) {
+ stack.addFirst(new StackNode(uri, local, prefix));
+ }
+
+ private final String pop() {
+ String rv = stack.removeFirst().qName;
+ stack.getFirst().mappings.clear();
+ return rv;
+ }
+
+ private final String lookupPrefixAttribute(String ns) {
+ if ("http://www.w3.org/XML/1998/namespace".equals(ns)) {
+ return "xml";
+ }
+ Set<String> hidden = new HashSet<String>();
+ for (StackNode node : stack) {
+ for (PrefixMapping mapping : node.mappings) {
+ if (mapping.prefix.length() != 0 && mapping.uri.equals(ns)
+ && !hidden.contains(mapping.prefix)) {
+ return mapping.prefix;
+ }
+ hidden.add(mapping.prefix);
+ }
+ }
+ return null;
+ }
+
+ private final String lookupUri(String prefix) {
+ for (StackNode node : stack) {
+ for (PrefixMapping mapping : node.mappings) {
+ if (mapping.prefix.equals(prefix)) {
+ return mapping.uri;
+ }
+ }
+ }
+ return null;
+ }
+
+ private final boolean xmlNsQname(String name) {
+ if (name == null) {
+ return false;
+ } else if ("xmlns".equals(name)) {
+ return true;
+ } else if (name.startsWith("xmlns:")) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private final void writeAttributeValue(String val) throws IOException {
+ boolean prevWasSpace = true;
+ int last = val.length() - 1;
+ for (int i = 0; i <= last; i++) {
+ char c = val.charAt(i);
+ switch (c) {
+ case '<':
+ writer.write("&lt;");
+ prevWasSpace = false;
+ break;
+ case '>':
+ writer.write("&gt;");
+ prevWasSpace = false;
+ break;
+ case '&':
+ writer.write("&amp;");
+ prevWasSpace = false;
+ break;
+ case '"':
+ writer.write("&quot;");
+ prevWasSpace = false;
+ break;
+ case '\r':
+ writer.write("&#xD;");
+ prevWasSpace = false;
+ break;
+ case '\t':
+ writer.write("&#x9;");
+ prevWasSpace = false;
+ break;
+ case '\n':
+ writer.write("&#xA;");
+ prevWasSpace = false;
+ break;
+ case ' ':
+ if (prevWasSpace || i == last) {
+ writer.write("&#x20;");
+ prevWasSpace = false;
+ } else {
+ writer.write(' ');
+ prevWasSpace = true;
+ }
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ prevWasSpace = false;
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ prevWasSpace = false;
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ prevWasSpace = false;
+ break;
+ }
+ }
+ }
+
+ private final void generatePrefix(String uri) throws SAXException {
+ int counter = 0;
+ String candidate = WELL_KNOWN_ATTRIBUTE_PREFIXES.get(uri);
+ if (candidate == null) {
+ candidate = "p" + (counter++);
+ }
+ while (lookupUri(candidate) != null) {
+ candidate = "p" + (counter++);
+ }
+ startPrefixMappingPrivate(candidate, uri);
+ }
+
+ public final void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ for (int i = start; i < start + length; i++) {
+ char c = ch[i];
+ switch (c) {
+ case '<':
+ writer.write("&lt;");
+ break;
+ case '>':
+ writer.write("&gt;");
+ break;
+ case '&':
+ writer.write("&amp;");
+ break;
+ case '\r':
+ writer.write("&#xD;");
+ break;
+ case '\t':
+ writer.write('\t');
+ break;
+ case '\n':
+ writer.write('\n');
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ break;
+ }
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void endDocument() throws SAXException {
+ try {
+ stack.clear();
+ writer.flush();
+ writer.close();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ try {
+ writer.write('<');
+ writer.write('/');
+ writer.write(pop());
+ writer.write('>');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+
+ public final void processingInstruction(String target, String data)
+ throws SAXException {
+ try {
+ checkNCName(target);
+ writer.write("<?");
+ writer.write(target);
+ writer.write(' ');
+ boolean prevWasQuestionmark = false;
+ for (int i = 0; i < data.length(); i++) {
+ char c = data.charAt(i);
+ switch (c) {
+ case '?':
+ writer.write('?');
+ prevWasQuestionmark = true;
+ break;
+ case '>':
+ if (prevWasQuestionmark) {
+ writer.write(" >");
+ } else {
+ writer.write('>');
+ }
+ prevWasQuestionmark = false;
+ break;
+ case '\t':
+ writer.write('\t');
+ prevWasQuestionmark = false;
+ break;
+ case '\r':
+ case '\n':
+ writer.write('\n');
+ prevWasQuestionmark = false;
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ prevWasQuestionmark = false;
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ prevWasQuestionmark = false;
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ prevWasQuestionmark = false;
+ break;
+ }
+ }
+ writer.write("?>");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void setDocumentLocator(Locator locator) {
+ }
+
+ public final void startDocument() throws SAXException {
+ try {
+ writer.write("<?xml version='1.0' encoding='utf-8'?>\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ stack.clear();
+ push(null, null, null);
+ }
+
+ public final void startElement(String uri, String localName, String q,
+ Attributes atts) throws SAXException {
+ checkNCName(localName);
+ String prefix;
+ String qName;
+ if (uri.length() == 0) {
+ prefix = "";
+ qName = localName;
+ // generate xmlns
+ startPrefixMappingPrivate(prefix, uri);
+ } else {
+ prefix = WELL_KNOWN_ELEMENT_PREFIXES.get(uri);
+ if (prefix == null) {
+ prefix = "";
+ }
+ String lookup = lookupUri(prefix);
+ if (lookup != null && !lookup.equals(uri)) {
+ prefix = "";
+ }
+ startPrefixMappingPrivate(prefix, uri);
+ if (prefix.length() == 0) {
+ qName = localName;
+ } else {
+ qName = prefix + ':' + localName;
+ }
+ }
+
+ int attLen = atts.getLength();
+ for (int i = 0; i < attLen; i++) {
+ String attUri = atts.getURI(i);
+ if (attUri.length() == 0
+ || "http://www.w3.org/XML/1998/namespace".equals(attUri)
+ || "http://www.w3.org/2000/xmlns/".equals(attUri)
+ || atts.getLocalName(i).length() == 0
+ || xmlNsQname(atts.getQName(i))) {
+ continue;
+ }
+ if (lookupPrefixAttribute(attUri) == null) {
+ generatePrefix(attUri);
+ }
+ }
+
+ try {
+ writer.write('<');
+ writer.write(qName);
+ for (PrefixMapping mapping : stack.getFirst().mappings) {
+ writer.write(' ');
+ if (mapping.prefix.length() == 0) {
+ writer.write("xmlns");
+ } else {
+ writer.write("xmlns:");
+ writer.write(mapping.prefix);
+ }
+ writer.write('=');
+ writer.write('"');
+ writeAttributeValue(mapping.uri);
+ writer.write('"');
+ }
+
+ for (int i = 0; i < attLen; i++) {
+ String attUri = atts.getURI(i);
+ if ("http://www.w3.org/XML/1998/namespace".equals(attUri)
+ || "http://www.w3.org/2000/xmlns/".equals(attUri)
+ || atts.getLocalName(i).length() == 0
+ || xmlNsQname(atts.getQName(i))) {
+ continue;
+ }
+ writer.write(' ');
+ if (attUri.length() != 0) {
+ writer.write(lookupPrefixAttribute(attUri));
+ writer.write(':');
+ }
+ String attLocal = atts.getLocalName(i);
+ checkNCName(attLocal);
+ writer.write(attLocal);
+ writer.write('=');
+ writer.write('"');
+ writeAttributeValue(atts.getValue(i));
+ writer.write('"');
+ }
+ writer.write('>');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ push(uri, qName, prefix);
+ }
+
+ public final void comment(char[] ch, int start, int length) throws SAXException {
+ try {
+ boolean prevWasHyphen = false;
+ writer.write("<!--");
+ for (int i = start; i < start + length; i++) {
+ char c = ch[i];
+ switch (c) {
+ case '-':
+ if (prevWasHyphen) {
+ writer.write(" -");
+ } else {
+ writer.write('-');
+ prevWasHyphen = true;
+ }
+ break;
+ case '\t':
+ writer.write('\t');
+ prevWasHyphen = false;
+ break;
+ case '\r':
+ case '\n':
+ writer.write('\n');
+ prevWasHyphen = false;
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ prevWasHyphen = false;
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ prevWasHyphen = false;
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ prevWasHyphen = false;
+ break;
+ }
+ }
+ if (prevWasHyphen) {
+ writer.write(' ');
+ }
+ writer.write("-->");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void endCDATA() throws SAXException {
+ }
+
+ public final void endDTD() throws SAXException {
+ }
+
+ public final void endEntity(String name) throws SAXException {
+ }
+
+ public final void startCDATA() throws SAXException {
+ }
+
+ public final void startDTD(String name, String publicId, String systemId)
+ throws SAXException {
+ }
+
+ public final void startEntity(String name) throws SAXException {
+ }
+
+ public final void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ if (prefix.length() == 0 || uri.equals(lookupUri(prefix))) {
+ return;
+ }
+ if (uri.equals(lookupUri(prefix))) {
+ return;
+ }
+ if ("http://www.w3.org/XML/1998/namespace".equals(uri)) {
+ if ("xml".equals(prefix)) {
+ return;
+ } else {
+ throw new SAXException("Attempt to declare a reserved NS uri.");
+ }
+ }
+ if ("http://www.w3.org/2000/xmlns/".equals(uri)) {
+ throw new SAXException("Attempt to declare a reserved NS uri.");
+ }
+ if (uri.length() == 0 && prefix.length() != 0) {
+ throw new SAXException("Can bind a prefix to no namespace.");
+ }
+ checkNCName(prefix);
+ Set<PrefixMapping> theSet = stack.getFirst().mappings;
+ PrefixMapping mapping = new PrefixMapping(uri, prefix);
+ if (theSet.contains(mapping)) {
+ throw new SAXException(
+ "Attempt to map one prefix to two URIs on one element.");
+ }
+ theSet.add(mapping);
+ }
+
+ public final void startPrefixMappingPrivate(String prefix, String uri)
+ throws SAXException {
+ if (uri.equals(lookupUri(prefix))) {
+ return;
+ }
+ stack.getFirst().mappings.add(new PrefixMapping(uri, prefix));
+ }
+
+ public final void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public final void skippedEntity(String name) throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html
new file mode 100644
index 000000000..60532962f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides an HTML5 parser that exposes the document through the SAX API.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java
new file mode 100644
index 000000000..6dcff5600
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.xom.Element;
+
+/**
+ * Interface for elements that have an associated form pointer.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface FormPointer {
+
+ /**
+ * Returns the form.
+ *
+ * @return the form
+ */
+ public abstract Element getForm();
+
+ /**
+ * Sets the form.
+ *
+ * @param form the form to set
+ */
+ public abstract void setForm(Element form);
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java
new file mode 100644
index 000000000..2e2e18df7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.xom.Element;
+
+/**
+ * Element with an associated form.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class FormPtrElement extends Element implements FormPointer {
+
+ private Element form = null;
+
+ /**
+ * Copy constructor (<code>FormPointer</code>-aware).
+ * @param elt
+ */
+ public FormPtrElement(Element elt) {
+ super(elt);
+ if (elt instanceof FormPointer) {
+ FormPointer other = (FormPointer) elt;
+ this.setForm(other.getForm());
+ }
+ }
+
+ /**
+ * Null form.
+ *
+ * @param name
+ * @param uri
+ */
+ public FormPtrElement(String name, String uri) {
+ super(name, uri);
+ }
+
+ /**
+ * Full constructor.
+ *
+ * @param name
+ * @param uri
+ * @param form
+ */
+ public FormPtrElement(String name, String uri, Element form) {
+ super(name, uri);
+ this.form = form;
+ }
+
+ /**
+ * Gets the form.
+ * @see nu.validator.htmlparser.xom.FormPointer#getForm()
+ */
+ public Element getForm() {
+ return form;
+ }
+
+ /**
+ * Sets the form.
+ * @see nu.validator.htmlparser.xom.FormPointer#setForm(nu.xom.Element)
+ */
+ public void setForm(Element form) {
+ this.form = form;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java
new file mode 100644
index 000000000..845ea15cf
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java
@@ -0,0 +1,773 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+import nu.xom.Builder;
+import nu.xom.Document;
+import nu.xom.Nodes;
+import nu.xom.ParsingException;
+import nu.xom.ValidityException;
+
+import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the XOM
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
+ * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
+ * XML violation policy. It is possible to treat XML 1.0 infoset violations
+ * as fatal by setting the general XML violation policy to <code>FATAL</code>.
+ *
+ * <p>The doctype is not represented in the tree.
+ *
+ * <p>The document mode is represented via the <code>Mode</code>
+ * interface on the <code>Document</code> node if the node implements
+ * that interface (depends on the used node factory).
+ *
+ * <p>The form pointer is stored if the node factory supports storing it.
+ *
+ * <p>This package has its own node factory class because the official
+ * XOM node factory may return multiple nodes instead of one confusing
+ * the assumptions of the DOM-oriented HTML5 parsing algorithm.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class HtmlBuilder extends Builder {
+
+ private Driver driver;
+
+ private final XOMTreeBuilder treeBuilder;
+
+ private final SimpleNodeFactory simpleNodeFactory;
+
+ private EntityResolver entityResolver;
+
+ private ErrorHandler errorHandler = null;
+
+ private DocumentModeHandler documentModeHandler = null;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private boolean checkingNormalization = false;
+
+ private boolean scriptingEnabled = false;
+
+ private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
+
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata = false;
+
+ private boolean mappingLangToXmlLang = false;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
+
+ private boolean reportingDoctype = true;
+
+ private ErrorHandler treeBuilderErrorHandler = null;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private TransitionHandler transitionHandler = null;
+
+ /**
+ * Constructor with default node factory and fatal XML violation policy.
+ */
+ public HtmlBuilder() {
+ this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL);
+ }
+
+ /**
+ * Constructor with given node factory and fatal XML violation policy.
+ * @param nodeFactory the factory
+ */
+ public HtmlBuilder(SimpleNodeFactory nodeFactory) {
+ this(nodeFactory, XmlViolationPolicy.FATAL);
+ }
+
+ /**
+ * Constructor with default node factory and given XML violation policy.
+ * @param xmlPolicy the policy
+ */
+ public HtmlBuilder(XmlViolationPolicy xmlPolicy) {
+ this(new SimpleNodeFactory(), xmlPolicy);
+ }
+
+ /**
+ * Constructor with given node factory and given XML violation policy.
+ * @param nodeFactory the factory
+ * @param xmlPolicy the policy
+ */
+ public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) {
+ super();
+ this.simpleNodeFactory = nodeFactory;
+ this.treeBuilder = new XOMTreeBuilder(nodeFactory);
+ this.driver = null;
+ setXmlPolicy(xmlPolicy);
+ }
+
+ private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
+ if (errorHandler == null && transitionHandler == null
+ && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
+ return new Tokenizer(handler, newAttributesEachTime);
+ } else {
+ return new ErrorReportingTokenizer(handler, newAttributesEachTime);
+ }
+ }
+
+ /**
+ * This class wraps different tree builders depending on configuration. This
+ * method does the work of hiding this from the user of the class.
+ */
+ private void lazyInit() {
+ if (driver == null) {
+ this.driver = new Driver(newTokenizer(treeBuilder, false));
+ this.driver.setErrorHandler(errorHandler);
+ this.driver.setTransitionHandler(transitionHandler);
+ this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
+ this.driver.setCheckingNormalization(checkingNormalization);
+ this.driver.setCommentPolicy(commentPolicy);
+ this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ this.driver.setContentSpacePolicy(contentSpacePolicy);
+ this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ this.driver.setXmlnsPolicy(xmlnsPolicy);
+ this.driver.setHeuristics(heuristics);
+ for (CharacterHandler characterHandler : characterHandlers) {
+ this.driver.addCharacterHandler(characterHandler);
+ }
+ this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ this.treeBuilder.setDocumentModeHandler(documentModeHandler);
+ this.treeBuilder.setScriptingEnabled(scriptingEnabled);
+ this.treeBuilder.setReportingDoctype(reportingDoctype);
+ this.treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+
+ private void tokenize(InputSource is) throws ParsingException, IOException,
+ MalformedURLException {
+ try {
+ if (is == null) {
+ throw new IllegalArgumentException("Null input.");
+ }
+ if (is.getByteStream() == null && is.getCharacterStream() == null) {
+ String systemId = is.getSystemId();
+ if (systemId == null) {
+ throw new IllegalArgumentException(
+ "No byte stream, no character stream nor URI.");
+ }
+ if (entityResolver != null) {
+ is = entityResolver.resolveEntity(is.getPublicId(),
+ systemId);
+ }
+ if (is.getByteStream() == null
+ || is.getCharacterStream() == null) {
+ is = new InputSource();
+ is.setSystemId(systemId);
+ is.setByteStream(new URL(systemId).openStream());
+ }
+ }
+ driver.tokenize(is);
+ } catch (SAXParseException e) {
+ throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(),
+ e.getColumnNumber(), e);
+ } catch (SAXException e) {
+ throw new ParsingException(e.getMessage(), e);
+ }
+ }
+
+ /**
+ * Parse from SAX <code>InputSource</code>.
+ * @param is the <code>InputSource</code>
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ */
+ public Document build(InputSource is) throws ParsingException, IOException {
+ lazyInit();
+ treeBuilder.setFragmentContext(null);
+ tokenize(is);
+ return treeBuilder.getDocument();
+ }
+
+ /**
+ * Parse a fragment from SAX <code>InputSource</code> assuming an HTML
+ * context.
+ * @param is the <code>InputSource</code>
+ * @param context the name of the context element (HTML namespace assumed)
+ * @return the fragment
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ */
+ public Nodes buildFragment(InputSource is, String context)
+ throws IOException, ParsingException {
+ lazyInit();
+ treeBuilder.setFragmentContext(context.intern());
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Parse a fragment from SAX <code>InputSource</code>.
+ * @param is the <code>InputSource</code>
+ * @param contextLocal the local name of the context element
+ * @parem contextNamespace the namespace of the context element
+ * @return the fragment
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ */
+ public Nodes buildFragment(InputSource is, String contextLocal, String contextNamespace)
+ throws IOException, ParsingException {
+ lazyInit();
+ treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false);
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Parse from <code>File</code>.
+ * @param file the file
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.File)
+ */
+ @Override
+ public Document build(File file) throws ParsingException,
+ ValidityException, IOException {
+ return build(new FileInputStream(file), file.toURI().toASCIIString());
+ }
+
+ /**
+ * Parse from <code>InputStream</code>.
+ * @param stream the stream
+ * @param uri the base URI
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String)
+ */
+ @Override
+ public Document build(InputStream stream, String uri)
+ throws ParsingException, ValidityException, IOException {
+ InputSource is = new InputSource(stream);
+ is.setSystemId(uri);
+ return build(is);
+ }
+
+ /**
+ * Parse from <code>InputStream</code>.
+ * @param stream the stream
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.InputStream)
+ */
+ @Override
+ public Document build(InputStream stream) throws ParsingException,
+ ValidityException, IOException {
+ return build(new InputSource(stream));
+ }
+
+ /**
+ * Parse from <code>Reader</code>.
+ * @param stream the reader
+ * @param uri the base URI
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.Reader, java.lang.String)
+ */
+ @Override
+ public Document build(Reader stream, String uri) throws ParsingException,
+ ValidityException, IOException {
+ InputSource is = new InputSource(stream);
+ is.setSystemId(uri);
+ return build(is);
+ }
+
+ /**
+ * Parse from <code>Reader</code>.
+ * @param stream the reader
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.Reader)
+ */
+ @Override
+ public Document build(Reader stream) throws ParsingException,
+ ValidityException, IOException {
+ return build(new InputSource(stream));
+ }
+
+ /**
+ * Parse from <code>String</code>.
+ * @param content the HTML source as string
+ * @param uri the base URI
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.lang.String, java.lang.String)
+ */
+ @Override
+ public Document build(String content, String uri) throws ParsingException,
+ ValidityException, IOException {
+ return build(new StringReader(content), uri);
+ }
+
+ /**
+ * Parse from URI.
+ * @param uri the URI of the document
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.lang.String)
+ */
+ @Override
+ public Document build(String uri) throws ParsingException,
+ ValidityException, IOException {
+ return build(new InputSource(uri));
+ }
+
+ /**
+ * Gets the node factory
+ */
+ public SimpleNodeFactory getSimpleNodeFactory() {
+ return simpleNodeFactory;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
+ */
+ public void setEntityResolver(EntityResolver resolver) {
+ entityResolver = resolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler handler) {
+ errorHandler = handler;
+ treeBuilderErrorHandler = handler;
+ driver = null;
+ }
+
+ public void setTransitionHander(TransitionHandler handler) {
+ transitionHandler = handler;
+ driver = null;
+ }
+
+ /**
+ * Indicates whether NFC normalization of source is being checked.
+ * @return <code>true</code> if NFC normalization of source is being checked.
+ * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
+ */
+ public boolean isCheckingNormalization() {
+ return checkingNormalization;
+ }
+
+ /**
+ * Toggles the checking of the NFC normalization of source.
+ * @param enable <code>true</code> to check normalization
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
+ */
+ public void setCheckingNormalization(boolean enable) {
+ this.checkingNormalization = enable;
+ if (driver != null) {
+ driver.setCheckingNormalization(checkingNormalization);
+ }
+ }
+
+ /**
+ * Sets the policy for consecutive hyphens in comments.
+ * @param commentPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ if (driver != null) {
+ driver.setCommentPolicy(commentPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML characters except white space.
+ * @param contentNonXmlCharPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ driver = null;
+ }
+
+ /**
+ * Sets the policy for non-XML white space.
+ * @param contentSpacePolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ if (driver != null) {
+ driver.setContentSpacePolicy(contentSpacePolicy);
+ }
+ }
+
+ /**
+ * Whether the parser considers scripting to be enabled for noscript treatment.
+ *
+ * @return <code>true</code> if enabled
+ * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ if (treeBuilder != null) {
+ treeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+ }
+
+ /**
+ * Returns the doctype expectation.
+ *
+ * @return the doctypeExpectation
+ */
+ public DoctypeExpectation getDoctypeExpectation() {
+ return doctypeExpectation;
+ }
+
+ /**
+ * Sets the doctype expectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ if (treeBuilder != null) {
+ treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ }
+ }
+
+ /**
+ * Returns the document mode handler.
+ *
+ * @return the documentModeHandler
+ */
+ public DocumentModeHandler getDocumentModeHandler() {
+ return documentModeHandler;
+ }
+
+ /**
+ * Sets the document mode handler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Returns the streamabilityViolationPolicy.
+ *
+ * @return the streamabilityViolationPolicy
+ */
+ public XmlViolationPolicy getStreamabilityViolationPolicy() {
+ return streamabilityViolationPolicy;
+ }
+
+ /**
+ * Sets the streamabilityViolationPolicy.
+ *
+ * @param streamabilityViolationPolicy
+ * the streamabilityViolationPolicy to set
+ */
+ public void setStreamabilityViolationPolicy(
+ XmlViolationPolicy streamabilityViolationPolicy) {
+ this.streamabilityViolationPolicy = streamabilityViolationPolicy;
+ driver = null;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ if (driver != null) {
+ driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+ }
+
+ /**
+ * Returns the <code>Locator</code> during parse.
+ * @return the <code>Locator</code>
+ */
+ public Locator getDocumentLocator() {
+ return driver.getDocumentLocator();
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ *
+ * @return the html4ModeCompatibleWithXhtml1Schemata
+ */
+ public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
+ return html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang;
+ if (driver != null) {
+ driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang;
+ }
+
+ /**
+ * Whether the <code>xmlns</code> attribute on the root element is
+ * passed to through. (FATAL not allowed.)
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ if (driver != null) {
+ driver.setXmlnsPolicy(xmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the xmlnsPolicy.
+ *
+ * @return the xmlnsPolicy
+ */
+ public XmlViolationPolicy getXmlnsPolicy() {
+ return xmlnsPolicy;
+ }
+
+ /**
+ * Returns the commentPolicy.
+ *
+ * @return the commentPolicy
+ */
+ public XmlViolationPolicy getCommentPolicy() {
+ return commentPolicy;
+ }
+
+ /**
+ * Returns the contentNonXmlCharPolicy.
+ *
+ * @return the contentNonXmlCharPolicy
+ */
+ public XmlViolationPolicy getContentNonXmlCharPolicy() {
+ return contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Returns the contentSpacePolicy.
+ *
+ * @return the contentSpacePolicy
+ */
+ public XmlViolationPolicy getContentSpacePolicy() {
+ return contentSpacePolicy;
+ }
+
+ /**
+ * @param reportingDoctype
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ if (treeBuilder != null) {
+ treeBuilder.setReportingDoctype(reportingDoctype);
+ }
+ }
+
+ /**
+ * Returns the reportingDoctype.
+ *
+ * @return the reportingDoctype
+ */
+ public boolean isReportingDoctype() {
+ return reportingDoctype;
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ if (driver != null) {
+ driver.setNamePolicy(namePolicy);
+ treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics the heuristics to set
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ if (driver != null) {
+ driver.setHeuristics(heuristics);
+ }
+ }
+
+ public Heuristics getHeuristics() {
+ return this.heuristics;
+ }
+
+ /**
+ * This is a catch-all convenience method for setting name, xmlns, content space,
+ * content non-XML char and comment policies in one go. This does not affect the
+ * streamability policy or doctype reporting.
+ *
+ * @param xmlPolicy
+ */
+ public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
+ setNamePolicy(xmlPolicy);
+ setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
+ setContentSpacePolicy(xmlPolicy);
+ setContentNonXmlCharPolicy(xmlPolicy);
+ setCommentPolicy(xmlPolicy);
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ *
+ * @return the namePolicy
+ */
+ public XmlViolationPolicy getNamePolicy() {
+ return namePolicy;
+ }
+
+ /**
+ * Does nothing.
+ * @deprecated
+ */
+ public void setBogusXmlnsPolicy(
+ XmlViolationPolicy bogusXmlnsPolicy) {
+ }
+
+ /**
+ * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
+ * @deprecated
+ * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
+ */
+ public XmlViolationPolicy getBogusXmlnsPolicy() {
+ return XmlViolationPolicy.ALTER_INFOSET;
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ this.characterHandlers.add(characterHandler);
+ if (driver != null) {
+ driver.addCharacterHandler(characterHandler);
+ }
+ }
+
+
+ /**
+ * Sets whether comment nodes appear in the tree.
+ * @param ignoreComments <code>true</code> to ignore comments
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
+ */
+ public void setIgnoringComments(boolean ignoreComments) {
+ treeBuilder.setIgnoringComments(ignoreComments);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java
new file mode 100644
index 000000000..3b76b1421
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.xom.Document;
+import nu.xom.Element;
+
+/**
+ * Document with <code>Mode</code>.
+ * @version $Id$
+ * @author hsivonen
+ */
+public class ModalDocument extends Document implements Mode {
+
+ private DocumentMode mode = null;
+
+ /**
+ * Copy constructor (<code>Mode</code>-aware).
+ * @param doc
+ */
+ public ModalDocument(Document doc) {
+ super(doc);
+ if (doc instanceof Mode) {
+ Mode modal = (Mode) doc;
+ setMode(modal.getMode());
+ }
+ }
+
+ /**
+ * With root.
+ *
+ * @param elt
+ */
+ public ModalDocument(Element elt) {
+ super(elt);
+ }
+
+ /**
+ * Gets the mode.
+ * @see nu.validator.htmlparser.xom.Mode#getMode()
+ */
+ public DocumentMode getMode() {
+ return mode;
+ }
+
+ /**
+ * Sets the mode.
+ * @see nu.validator.htmlparser.xom.Mode#setMode(nu.validator.htmlparser.common.DocumentMode)
+ */
+ public void setMode(DocumentMode mode) {
+ this.mode = mode;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java
new file mode 100644
index 000000000..bd2dcbc26
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+
+/**
+ * Interface for attaching a <code>DocumentMode</code> on a Document.
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface Mode {
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public abstract DocumentMode getMode();
+
+ /**
+ * Sets the mode.
+ *
+ * @param mode the mode to set
+ */
+ public abstract void setMode(DocumentMode mode);
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java
new file mode 100644
index 000000000..147b5d930
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.xom.Attribute;
+import nu.xom.Comment;
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Text;
+import nu.xom.Attribute.Type;
+
+/**
+ * A simpler node factory that does not use <code>Nodes</code>..
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class SimpleNodeFactory {
+
+ /**
+ * <code>return new Attribute(localName, uri, value, type);</code>
+ * @param localName
+ * @param uri
+ * @param value
+ * @param type
+ * @return
+ */
+ public Attribute makeAttribute(String localName, String uri, String value, Type type) {
+ return new Attribute(localName, uri, value, type);
+ }
+
+ /**
+ * <code>return new Text(string);</code>
+ * @param string
+ * @return
+ */
+ public Text makeText(String string) {
+ return new Text(string);
+ }
+
+ /**
+ * <code>return new Comment(string);</code>
+ * @param string
+ * @return
+ */
+ public Comment makeComment(String string) {
+ return new Comment(string);
+ }
+
+ /**
+ * <code>return new Element(name, namespace);</code>
+ * @param name
+ * @param namespace
+ * @return
+ */
+ public Element makeElement(String name, String namespace) {
+ return new Element(name, namespace);
+ }
+
+ /**
+ * <code>return new FormPtrElement(name, namespace, form);</code>
+ * @param name
+ * @param namespace
+ * @param form
+ * @return
+ */
+ public Element makeElement(String name, String namespace, Element form) {
+ return new FormPtrElement(name, namespace, form);
+ }
+
+ /**
+ * <code>return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot"));</code>
+ *
+ * <p>Subclasses adviced to return an instance of <code>Mode</code>. (Not required, though.)
+ *
+ * @return
+ */
+ public Document makeDocument() {
+ return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot"));
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java
new file mode 100644
index 000000000..623f31927
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.validator.htmlparser.impl.CoalescingTreeBuilder;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.xom.Attribute;
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Node;
+import nu.xom.Nodes;
+import nu.xom.ParentNode;
+import nu.xom.Text;
+import nu.xom.XMLException;
+
+import org.xml.sax.SAXException;
+
+class XOMTreeBuilder extends CoalescingTreeBuilder<Element> {
+
+ private final SimpleNodeFactory nodeFactory;
+
+ private Document document;
+
+ private int cachedTableIndex = -1;
+
+ private Element cachedTable = null;
+
+ protected XOMTreeBuilder(SimpleNodeFactory nodeFactory) {
+ super();
+ this.nodeFactory = nodeFactory;
+ }
+
+ @Override
+ protected void addAttributesToElement(Element element, HtmlAttributes attributes)
+ throws SAXException {
+ try {
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String localName = attributes.getLocalNameNoBoundsCheck(i);
+ String uri = attributes.getURINoBoundsCheck(i);
+ if (element.getAttribute(localName, uri) == null) {
+ element.addAttribute(nodeFactory.makeAttribute(
+ localName,
+ uri,
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void appendCharacters(Element parent, String text)
+ throws SAXException {
+ try {
+ int childCount = parent.getChildCount();
+ Node lastChild;
+ if (childCount != 0
+ && ((lastChild = parent.getChild(childCount - 1)) instanceof Text)) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setValue(lastAsText.getValue() + text);
+ return;
+ }
+ parent.appendChild(nodeFactory.makeText(text));
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Element oldParent,
+ Element newParent) throws SAXException {
+ try {
+ Nodes children = oldParent.removeChildren();
+ for (int i = 0; i < children.size(); i++) {
+ newParent.appendChild(children.get(i));
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendComment(Element parent, String comment) throws SAXException {
+ try {
+ parent.appendChild(nodeFactory.makeComment(comment));
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendCommentToDocument(String comment)
+ throws SAXException {
+ try {
+ Element root = document.getRootElement();
+ if ("http://www.xom.nu/fakeRoot".equals(root.getNamespaceURI())) {
+ document.insertChild(nodeFactory.makeComment(comment), document.indexOf(root));
+ } else {
+ document.appendChild(nodeFactory.makeComment(comment));
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element intendedParent) throws SAXException {
+ try {
+ Element rv = nodeFactory.makeElement(name, ns);
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.addAttribute(nodeFactory.makeAttribute(
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getURINoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ return rv;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override
+ protected Element createHtmlElementSetAsRoot(
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ Element rv = nodeFactory.makeElement("html",
+ "http://www.w3.org/1999/xhtml");
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.addAttribute(nodeFactory.makeAttribute(
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getURINoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ document.setRootElement(rv);
+ return rv;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override
+ protected void detachFromParent(Element element) throws SAXException {
+ try {
+ element.detach();
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendElement(Element child,
+ Element newParent) throws SAXException {
+ try {
+ child.detach();
+ newParent.appendChild(child);
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected boolean hasChildren(Element element) throws SAXException {
+ try {
+ return element.getChildCount() != 0;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ Document getDocument() {
+ Document rv = document;
+ document = null;
+ return rv;
+ }
+
+ Nodes getDocumentFragment() {
+ Element rootElt = document.getRootElement();
+ Nodes rv = rootElt.removeChildren();
+ document = null;
+ return rv;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String,
+ * java.lang.String, org.xml.sax.Attributes, java.lang.Object)
+ */
+ @Override
+ protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException {
+ try {
+ Element rv = nodeFactory.makeElement(name,
+ ns, form);
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.addAttribute(nodeFactory.makeAttribute(
+ attributes.getLocalName(i),
+ attributes.getURINoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ return rv;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) throws SAXException {
+ document = nodeFactory.makeDocument();
+ cachedTableIndex = -1;
+ cachedTable = null;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode,
+ * java.lang.String, java.lang.String, boolean)
+ */
+ @Override
+ protected void documentMode(DocumentMode mode, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+ if (document instanceof Mode) {
+ Mode modal = (Mode) document;
+ modal.setMode(mode);
+ }
+ }
+
+ @Override
+ protected Element createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParent();
+ Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent);
+ if (parent != null) { // always an element if not null
+ ((ParentNode) parent).insertChild(child, indexOfTable(table, stackParent));
+ cachedTableIndex++;
+ } else {
+ stackParent.appendChild(child);
+ }
+ return child;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override protected void insertFosterParentedCharacters(String text,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParent();
+ if (parent != null) { // always an element if not null
+ Element parentAsElt = (Element) parent;
+ int tableIndex = indexOfTable(table, parentAsElt);
+ Node prevSibling;
+ if (tableIndex != 0
+ && ((prevSibling = parentAsElt.getChild(tableIndex - 1)) instanceof Text)) {
+ Text prevAsText = (Text) prevSibling;
+ prevAsText.setValue(prevAsText.getValue() + text);
+ return;
+ }
+ parentAsElt.insertChild(nodeFactory.makeText(text), tableIndex);
+ cachedTableIndex++;
+ return;
+ }
+ int childCount = stackParent.getChildCount();
+ Node lastChild;
+ if (childCount != 0
+ && ((lastChild = stackParent.getChild(childCount - 1)) instanceof Text)) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setValue(lastAsText.getValue() + text);
+ return;
+ }
+ stackParent.appendChild(nodeFactory.makeText(text));
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void insertFosterParentedChild(Element child,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParent();
+ if (parent != null) { // always an element if not null
+ ((ParentNode)parent).insertChild(child, indexOfTable(table, stackParent));
+ cachedTableIndex++;
+ } else {
+ stackParent.appendChild(child);
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ private int indexOfTable(Element table, Element stackParent) {
+ if (table == cachedTable) {
+ return cachedTableIndex;
+ } else {
+ cachedTable = table;
+ return (cachedTableIndex = stackParent.indexOf(table));
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override protected void end() throws SAXException {
+ cachedTableIndex = -1;
+ cachedTable = null;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html
new file mode 100644
index 000000000..a936d5e3a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides an HTML5 parser that exposes the document through the XOM API.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java
new file mode 100644
index 000000000..f17ce3f89
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A CDATA section.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class CDATA extends ParentNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ */
+ public CDATA(Locator locator) {
+ super(locator);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startCDATA(this);
+ }
+
+ /**
+ *
+ * @throws SAXException if things go wrong
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endCDATA(endLocator);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.CDATA;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java
new file mode 100644
index 000000000..55c7715f6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+
+/**
+ * A common superclass for character buffer node classes.
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class CharBufferNode extends Node {
+
+ /**
+ * The buffer.
+ */
+ protected final char[] buffer;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset
+ * @param length the length
+ */
+ CharBufferNode(Locator locator, char[] buf, int start, int length) {
+ super(locator);
+ this.buffer = new char[length];
+ System.arraycopy(buf, start, buffer, 0, length);
+ }
+
+ /**
+ * Returns the wrapped buffer as a string.
+ *
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return new String(buffer);
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java
new file mode 100644
index 000000000..b8cc2d6d6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A run of characters
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Characters extends CharBufferNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset in the buffer
+ * @param length the length
+ */
+ public Characters(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.characters(buffer, 0, buffer.length, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.CHARACTERS;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java
new file mode 100644
index 000000000..f010462fb
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A comment.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Comment extends CharBufferNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset
+ * @param length the length
+ */
+ public Comment(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.comment(buffer, 0, buffer.length, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.COMMENT;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java
new file mode 100644
index 000000000..2169e0571
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A doctype.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class DTD extends ParentNode {
+
+ /**
+ * The name.
+ */
+ private final String name;
+
+ /**
+ * The public id.
+ */
+ private final String publicIdentifier;
+
+ /**
+ * The system id.
+ */
+ private final String systemIdentifier;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param name the name
+ * @param publicIdentifier the public id
+ * @param systemIdentifier the system id
+ */
+ public DTD(Locator locator, String name, String publicIdentifier, String systemIdentifier) {
+ super(locator);
+ this.name = name;
+ this.publicIdentifier = publicIdentifier;
+ this.systemIdentifier = systemIdentifier;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startDTD(name, publicIdentifier, systemIdentifier, this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endDTD(endLocator);
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Returns the publicIdentifier.
+ *
+ * @return the publicIdentifier
+ */
+ public String getPublicIdentifier() {
+ return publicIdentifier;
+ }
+
+ /**
+ * Returns the systemIdentifier.
+ *
+ * @return the systemIdentifier
+ */
+ public String getSystemIdentifier() {
+ return systemIdentifier;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.DTD;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java
new file mode 100644
index 000000000..3bb6f09c7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A document.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Document extends ParentNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ */
+ public Document(Locator locator) {
+ super(locator);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startDocument(this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endDocument(endLocator);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.DOCUMENT;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java
new file mode 100644
index 000000000..06816932f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.helpers.LocatorImpl;
+
+/**
+ * A document fragment.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class DocumentFragment extends ParentNode {
+
+ /**
+ * The constructor.
+ */
+ public DocumentFragment() {
+ super(new LocatorImpl());
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override void visit(TreeParser treeParser) {
+ // nothing
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override public NodeType getNodeType() {
+ return NodeType.DOCUMENT_FRAGMENT;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java
new file mode 100644
index 000000000..3d33164e5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * An element.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Element extends ParentNode {
+
+ /**
+ * The namespace URI.
+ */
+ private final String uri;
+
+ /**
+ * The local name.
+ */
+ private final String localName;
+
+ /**
+ * The qualified name.
+ */
+ private final String qName;
+
+ /**
+ * The attributes.
+ */
+ private final Attributes attributes;
+
+ /**
+ * The namespace prefix mappings.
+ */
+ private final List<PrefixMapping> prefixMappings;
+
+ /**
+ * The contructor.
+ * @param locator the locator.
+ * @param uri the namespace URI
+ * @param localName the local name
+ * @param qName the qualified name
+ * @param atts the attributes
+ * @param retainAttributes <code>true</code> to retain the attributes instead of copying
+ * @param prefixMappings the prefix mappings
+ */
+ public Element(Locator locator, String uri, String localName, String qName,
+ Attributes atts, boolean retainAttributes,
+ List<PrefixMapping> prefixMappings) {
+ super(locator);
+ this.uri = uri;
+ this.localName = localName;
+ this.qName = qName;
+ if (retainAttributes) {
+ this.attributes = atts;
+ } else {
+ this.attributes = new AttributesImpl(atts);
+ }
+ this.prefixMappings = prefixMappings;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ if (prefixMappings != null) {
+ for (PrefixMapping mapping : prefixMappings) {
+ treeParser.startPrefixMapping(mapping.getPrefix(),
+ mapping.getUri(), this);
+ }
+ }
+ treeParser.startElement(uri, localName, qName, attributes, this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endElement(uri, localName, qName, endLocator);
+ if (prefixMappings != null) {
+ for (PrefixMapping mapping : prefixMappings) {
+ treeParser.endPrefixMapping(mapping.getPrefix(), endLocator);
+ }
+ }
+ }
+
+ /**
+ * Returns the attributes.
+ *
+ * @return the attributes
+ */
+ public Attributes getAttributes() {
+ return attributes;
+ }
+
+ /**
+ * Returns the localName.
+ *
+ * @return the localName
+ */
+ public String getLocalName() {
+ return localName;
+ }
+
+ /**
+ * Returns the prefixMappings.
+ *
+ * @return the prefixMappings
+ */
+ public List<PrefixMapping> getPrefixMappings() {
+ return prefixMappings;
+ }
+
+ /**
+ * Returns the qName.
+ *
+ * @return the qName
+ */
+ public String getQName() {
+ return qName;
+ }
+
+ /**
+ * Returns the uri.
+ *
+ * @return the uri
+ */
+ public String getUri() {
+ return uri;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.ELEMENT;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java
new file mode 100644
index 000000000..091013736
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * An entity.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Entity extends ParentNode {
+
+ /**
+ * The name.
+ */
+ private final String name;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param name the name
+ */
+ public Entity(Locator locator, String name) {
+ super(locator);
+ this.name = name;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startEntity(name, this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endEntity(name, endLocator);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.ENTITY;
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java
new file mode 100644
index 000000000..e5fcf350f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A run ignorable whitespace.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class IgnorableWhitespace extends CharBufferNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset
+ * @param length the length
+ */
+ public IgnorableWhitespace(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.ignorableWhitespace(buffer, 0, buffer.length, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.IGNORABLE_WHITESPACE;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java
new file mode 100644
index 000000000..37c0c6325
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+
+/**
+ * A locator implementation.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class LocatorImpl implements Locator {
+
+ /**
+ * The system id.
+ */
+ private final String systemId;
+
+ /**
+ * The public id.
+ */
+ private final String publicId;
+
+ /**
+ * The column.
+ */
+ private final int column;
+
+ /**
+ * The line.
+ */
+ private final int line;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ */
+ public LocatorImpl(Locator locator) {
+ if (locator == null) {
+ this.systemId = null;
+ this.publicId = null;
+ this.column = -1;
+ this.line = -1;
+ } else {
+ this.systemId = locator.getSystemId();
+ this.publicId = locator.getPublicId();
+ this.column = locator.getColumnNumber();
+ this.line = locator.getLineNumber();
+ }
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ return column;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ return line;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java
new file mode 100644
index 000000000..7aed83b75
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * The common node superclass.
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class Node implements Locator {
+
+ /**
+ * The system id.
+ */
+ private final String systemId;
+
+ /**
+ * The public id.
+ */
+ private final String publicId;
+
+ /**
+ * The column.
+ */
+ private final int column;
+
+ /**
+ * The line.
+ */
+ private final int line;
+
+ /**
+ * The next sibling.
+ */
+ private Node nextSibling = null;
+
+ /**
+ * The parent.
+ */
+ private ParentNode parentNode = null;
+
+ /**
+ * The constructor.
+ *
+ * @param locator the locator
+ */
+ Node(Locator locator) {
+ if (locator == null) {
+ this.systemId = null;
+ this.publicId = null;
+ this.column = -1;
+ this.line = -1;
+ } else {
+ this.systemId = locator.getSystemId();
+ this.publicId = locator.getPublicId();
+ this.column = locator.getColumnNumber();
+ this.line = locator.getLineNumber();
+ }
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ return column;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ return line;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ /**
+ * Visit the node.
+ *
+ * @param treeParser the visitor
+ * @throws SAXException if stuff goes wrong
+ */
+ abstract void visit(TreeParser treeParser) throws SAXException;
+
+ /**
+ * Revisit the node.
+ *
+ * @param treeParser the visitor
+ * @throws SAXException if stuff goes wrong
+ */
+ void revisit(TreeParser treeParser) throws SAXException {
+ return;
+ }
+
+ /**
+ * Return the first child.
+ * @return the first child
+ */
+ public Node getFirstChild() {
+ return null;
+ }
+
+ /**
+ * Returns the nextSibling.
+ *
+ * @return the nextSibling
+ */
+ public final Node getNextSibling() {
+ return nextSibling;
+ }
+
+ /**
+ * Returns the previous sibling
+ * @return the previous sibling
+ */
+ public final Node getPreviousSibling() {
+ Node prev = null;
+ Node next = parentNode.getFirstChild();
+ for(;;) {
+ if (this == next) {
+ return prev;
+ }
+ prev = next;
+ next = next.nextSibling;
+ }
+ }
+
+ /**
+ * Sets the nextSibling.
+ *
+ * @param nextSibling the nextSibling to set
+ */
+ void setNextSibling(Node nextSibling) {
+ this.nextSibling = nextSibling;
+ }
+
+
+ /**
+ * Returns the parentNode.
+ *
+ * @return the parentNode
+ */
+ public final ParentNode getParentNode() {
+ return parentNode;
+ }
+
+ /**
+ * Sets the parentNode.
+ *
+ * @param parentNode the parentNode to set
+ */
+ void setParentNode(ParentNode parentNode) {
+ this.parentNode = parentNode;
+ }
+
+ /**
+ * Return the node type.
+ * @return the node type
+ */
+ public abstract NodeType getNodeType();
+
+ // Subclass-specific accessors that are hoisted here to
+ // avoid casting.
+
+ /**
+ * Detach this node from its parent.
+ */
+ public void detach() {
+ if (parentNode != null) {
+ parentNode.removeChild(this);
+ parentNode = null;
+ }
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the publicIdentifier.
+ *
+ * @return the publicIdentifier
+ */
+ public String getPublicIdentifier() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the systemIdentifier.
+ *
+ * @return the systemIdentifier
+ */
+ public String getSystemIdentifier() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the attributes.
+ *
+ * @return the attributes
+ */
+ public Attributes getAttributes() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the localName.
+ *
+ * @return the localName
+ */
+ public String getLocalName() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the prefixMappings.
+ *
+ * @return the prefixMappings
+ */
+ public List<PrefixMapping> getPrefixMappings() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the qName.
+ *
+ * @return the qName
+ */
+ public String getQName() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the uri.
+ *
+ * @return the uri
+ */
+ public String getUri() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the data.
+ *
+ * @return the data
+ */
+ public String getData() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the target.
+ *
+ * @return the target
+ */
+ public String getTarget() {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java
new file mode 100644
index 000000000..c3c927f0d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+/**
+ * The node type.
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum NodeType {
+ /**
+ * A CDATA section.
+ */
+ CDATA,
+ /**
+ * A run of characters.
+ */
+ CHARACTERS,
+ /**
+ * A comment.
+ */
+ COMMENT,
+ /**
+ * A document.
+ */
+ DOCUMENT,
+ /**
+ * A document fragment.
+ */
+ DOCUMENT_FRAGMENT,
+ /**
+ * A DTD.
+ */
+ DTD,
+ /**
+ * An element.
+ */
+ ELEMENT,
+ /**
+ * An entity.
+ */
+ ENTITY,
+ /**
+ * A run of ignorable whitespace.
+ */
+ IGNORABLE_WHITESPACE,
+ /**
+ * A processing instruction.
+ */
+ PROCESSING_INSTRUCTION,
+ /**
+ * A skipped entity.
+ */
+ SKIPPED_ENTITY
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java
new file mode 100644
index 000000000..de63f3b57
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * A lexical handler that does nothing.
+ * @version $Id$
+ * @author hsivonen
+ */
+final class NullLexicalHandler implements LexicalHandler {
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
+ */
+ public void comment(char[] arg0, int arg1, int arg2) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endCDATA()
+ */
+ public void endCDATA() throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endDTD()
+ */
+ public void endDTD() throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
+ */
+ public void endEntity(String arg0) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startCDATA()
+ */
+ public void startCDATA() throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void startDTD(String arg0, String arg1, String arg2) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
+ */
+ public void startEntity(String arg0) throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java
new file mode 100644
index 000000000..6cc96003f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+
+/**
+ * Common superclass for parent nodes.
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class ParentNode extends Node {
+
+ /**
+ * The end locator.
+ */
+ protected Locator endLocator;
+
+ /**
+ * The first child.
+ */
+ private Node firstChild = null;
+
+ /**
+ * The last child (for efficiency).
+ */
+ private Node lastChild = null;
+
+ /**
+ * The constuctor.
+ * @param locator the locator
+ */
+ ParentNode(Locator locator) {
+ super(locator);
+ }
+
+ /**
+ * Sets the endLocator.
+ *
+ * @param endLocator the endLocator to set
+ */
+ public void setEndLocator(Locator endLocator) {
+ this.endLocator = new LocatorImpl(endLocator);
+ }
+
+ /**
+ * Copies the endLocator from another node.
+ *
+ * @param another the another node
+ */
+ public void copyEndLocator(ParentNode another) {
+ this.endLocator = another.endLocator;
+ }
+
+ /**
+ * Returns the firstChild.
+ *
+ * @return the firstChild
+ */
+ public final Node getFirstChild() {
+ return firstChild;
+ }
+
+ /**
+ * Returns the lastChild.
+ *
+ * @return the lastChild
+ */
+ public final Node getLastChild() {
+ return lastChild;
+ }
+
+ /**
+ * Insert a new child before a pre-existing child and return the newly inserted child.
+ * @param child the new child
+ * @param sibling the existing child before which to insert (must be a child of this node) or <code>null</code> to append
+ * @return <code>child</code>
+ */
+ public Node insertBefore(Node child, Node sibling) {
+ assert sibling == null || this == sibling.getParentNode();
+ if (sibling == null) {
+ return appendChild(child);
+ }
+ child.detach();
+ child.setParentNode(this);
+ if (firstChild == sibling) {
+ child.setNextSibling(sibling);
+ firstChild = child;
+ } else {
+ Node prev = firstChild;
+ Node next = firstChild.getNextSibling();
+ while (next != sibling) {
+ prev = next;
+ next = next.getNextSibling();
+ }
+ prev.setNextSibling(child);
+ child.setNextSibling(next);
+ }
+ return child;
+ }
+
+ public Node insertBetween(Node child, Node prev, Node next) {
+ assert prev == null || this == prev.getParentNode();
+ assert next == null || this == next.getParentNode();
+ assert prev != null || next == firstChild;
+ assert next != null || prev == lastChild;
+ assert prev == null || next == null || prev.getNextSibling() == next;
+ if (next == null) {
+ return appendChild(child);
+ }
+ child.detach();
+ child.setParentNode(this);
+ child.setNextSibling(next);
+ if (prev == null) {
+ firstChild = child;
+ } else {
+ prev.setNextSibling(child);
+ }
+ return child;
+ }
+
+ /**
+ * Append a child to this node and return the child.
+ *
+ * @param child the child to append.
+ * @return <code>child</code>
+ */
+ public Node appendChild(Node child) {
+ child.detach();
+ child.setParentNode(this);
+ if (firstChild == null) {
+ firstChild = child;
+ } else {
+ lastChild.setNextSibling(child);
+ }
+ lastChild = child;
+ return child;
+ }
+
+ /**
+ * Append the children of another node to this node removing them from the other node .
+ * @param parent the other node whose children to append to this one
+ */
+ public void appendChildren(Node parent) {
+ Node child = parent.getFirstChild();
+ if (child == null) {
+ return;
+ }
+ ParentNode another = (ParentNode) parent;
+ if (firstChild == null) {
+ firstChild = child;
+ } else {
+ lastChild.setNextSibling(child);
+ }
+ lastChild = another.lastChild;
+ do {
+ child.setParentNode(this);
+ } while ((child = child.getNextSibling()) != null);
+ another.firstChild = null;
+ another.lastChild = null;
+ }
+
+ /**
+ * Remove a child from this node.
+ * @param node the child to remove
+ */
+ void removeChild(Node node) {
+ assert this == node.getParentNode();
+ if (firstChild == node) {
+ firstChild = node.getNextSibling();
+ if (lastChild == node) {
+ lastChild = null;
+ }
+ } else {
+ Node prev = firstChild;
+ Node next = firstChild.getNextSibling();
+ while (next != node) {
+ prev = next;
+ next = next.getNextSibling();
+ }
+ prev.setNextSibling(node.getNextSibling());
+ if (lastChild == node) {
+ lastChild = prev;
+ }
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java
new file mode 100644
index 000000000..8ffaf4a2c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+/**
+ * A prefix mapping.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class PrefixMapping {
+ /**
+ * The namespace prefix.
+ */
+ private final String prefix;
+ /**
+ * The namespace URI.
+ */
+ private final String uri;
+ /**
+ * Constructor.
+ * @param prefix the prefix
+ * @param uri the URI
+ */
+ public PrefixMapping(final String prefix, final String uri) {
+ this.prefix = prefix;
+ this.uri = uri;
+ }
+ /**
+ * Returns the prefix.
+ *
+ * @return the prefix
+ */
+ public String getPrefix() {
+ return prefix;
+ }
+ /**
+ * Returns the uri.
+ *
+ * @return the uri
+ */
+ public String getUri() {
+ return uri;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java
new file mode 100644
index 000000000..014e63821
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A processing instruction.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class ProcessingInstruction extends Node {
+
+ /**
+ * PI target.
+ */
+ private final String target;
+
+ /**
+ * PI data.
+ */
+ private final String data;
+
+ /**
+ * Constructor.
+ * @param locator the locator
+ * @param target PI target
+ * @param data PI data
+ */
+ public ProcessingInstruction(Locator locator, String target, String data) {
+ super(locator);
+ this.target = target;
+ this.data = data;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.processingInstruction(target, data, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.PROCESSING_INSTRUCTION;
+ }
+
+ /**
+ * Returns the data.
+ *
+ * @return the data
+ */
+ public String getData() {
+ return data;
+ }
+
+ /**
+ * Returns the target.
+ *
+ * @return the target
+ */
+ public String getTarget() {
+ return target;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java
new file mode 100644
index 000000000..01ca61490
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A skipped entity.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class SkippedEntity extends Node {
+
+ /**
+ * The name.
+ */
+ private final String name;
+
+ /**
+ * Constructor.
+ * @param locator the locator
+ * @param name the name
+ */
+ public SkippedEntity(Locator locator, String name) {
+ super(locator);
+ this.name = name;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.skippedEntity(name, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.SKIPPED_ENTITY;
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java
new file mode 100644
index 000000000..39fe236b3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * Builds a SAX Tree representation of a document or a fragment
+ * streamed as <code>ContentHandler</code> and
+ * <code>LexicalHandler</code> events. The start/end event matching
+ * is expected to adhere to the SAX API contract. Things will
+ * simply break if this is not the case. Fragments are expected to
+ * omit <code>startDocument()</code> and <code>endDocument()</code>
+ * calls.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class TreeBuilder implements ContentHandler, LexicalHandler {
+
+ /**
+ * The locator.
+ */
+ private Locator locator;
+
+ /**
+ * The current node.
+ */
+ private ParentNode current;
+
+ /**
+ * Whether to retain attribute objects.
+ */
+ private final boolean retainAttributes;
+
+ /**
+ * The prefix mappings for the next element to be inserted.
+ */
+ private List<PrefixMapping> prefixMappings;
+
+ /**
+ * Constructs a reusable <code>TreeBuilder</code> that builds
+ * <code>Document</code>s and copies attributes.
+ */
+ public TreeBuilder() {
+ this(false, false);
+ }
+
+ /**
+ * The constructor. The instance will be reusabe if building a full
+ * document and not reusable if building a fragment.
+ *
+ * @param fragment whether this <code>TreeBuilder</code> should build
+ * a <code>DocumentFragment</code> instead of a <code>Document</code>.
+ * @param retainAttributes whether instances of the <code>Attributes</code>
+ * interface passed to <code>startElement</code> should be retained
+ * (the alternative is copying).
+ */
+ public TreeBuilder(boolean fragment, boolean retainAttributes) {
+ if (fragment) {
+ current = new DocumentFragment();
+ }
+ this.retainAttributes = retainAttributes;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ current.appendChild(new Characters(locator, ch, start, length));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#endDocument()
+ */
+ public void endDocument() throws SAXException {
+ current.setEndLocator(locator);
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
+ */
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
+ */
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ current.appendChild(new IgnorableWhitespace(locator, ch, start, length));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
+ */
+ public void processingInstruction(String target, String data) throws SAXException {
+ current.appendChild(new ProcessingInstruction(locator, target, data));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
+ */
+ public void setDocumentLocator(Locator locator) {
+ this.locator = locator;
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ current.appendChild(new SkippedEntity(locator, name));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#startDocument()
+ */
+ public void startDocument() throws SAXException {
+ current = new Document(locator);
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
+ */
+ public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
+ current = (ParentNode) current.appendChild(new Element(locator, uri, localName, qName, atts, retainAttributes, prefixMappings));
+ prefixMappings = null;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
+ */
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ if (prefixMappings == null) {
+ prefixMappings = new LinkedList<PrefixMapping>();
+ }
+ prefixMappings.add(new PrefixMapping(prefix, uri));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
+ */
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ current.appendChild(new Comment(locator, ch, start, length));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endCDATA()
+ */
+ public void endCDATA() throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endDTD()
+ */
+ public void endDTD() throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
+ */
+ public void endEntity(String name) throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startCDATA()
+ */
+ public void startCDATA() throws SAXException {
+ current = (ParentNode) current.appendChild(new CDATA(locator));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void startDTD(String name, String publicId, String systemId) throws SAXException {
+ current = (ParentNode) current.appendChild(new DTD(locator, name, publicId, systemId));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
+ */
+ public void startEntity(String name) throws SAXException {
+ current = (ParentNode) current.appendChild(new Entity(locator, name));
+ }
+
+ /**
+ * Returns the root (<code>Document</code> if building a full document or
+ * <code>DocumentFragment</code> if building a fragment.).
+ *
+ * @return the root
+ */
+ public ParentNode getRoot() {
+ return current;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java
new file mode 100644
index 000000000..a9d92deb0
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * A tree visitor that replays a tree as SAX events.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class TreeParser implements Locator {
+
+ /**
+ * The content handler.
+ */
+ private final ContentHandler contentHandler;
+
+ /**
+ * The lexical handler.
+ */
+ private final LexicalHandler lexicalHandler;
+
+ /**
+ * The current locator.
+ */
+ private Locator locatorDelegate;
+
+ /**
+ * The constructor.
+ *
+ * @param contentHandler
+ * must not be <code>null</code>
+ * @param lexicalHandler
+ * may be <code>null</code>
+ */
+ public TreeParser(final ContentHandler contentHandler,
+ final LexicalHandler lexicalHandler) {
+ if (contentHandler == null) {
+ throw new IllegalArgumentException("contentHandler was null.");
+ }
+ this.contentHandler = contentHandler;
+ if (lexicalHandler == null) {
+ this.lexicalHandler = new NullLexicalHandler();
+ } else {
+ this.lexicalHandler = lexicalHandler;
+ }
+ }
+
+ /**
+ * Causes SAX events for the tree rooted at the argument to be emitted.
+ * <code>startDocument()</code> and <code>endDocument()</code> are only
+ * emitted for a <code>Document</code> node.
+ *
+ * @param node
+ * the root
+ * @throws SAXException
+ */
+ public void parse(Node node) throws SAXException {
+ contentHandler.setDocumentLocator(this);
+ Node current = node;
+ Node next;
+ for (;;) {
+ current.visit(this);
+ if ((next = current.getFirstChild()) != null) {
+ current = next;
+ continue;
+ }
+ for (;;) {
+ current.revisit(this);
+ if (current == node) {
+ return;
+ }
+ if ((next = current.getNextSibling()) != null) {
+ current = next;
+ break;
+ }
+ current = current.getParentNode();
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ void characters(char[] ch, int start, int length, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.characters(ch, start, length);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#endDocument()
+ */
+ void endDocument(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.endDocument();
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#endElement(java.lang.String,
+ * java.lang.String, java.lang.String)
+ */
+ void endElement(String uri, String localName, String qName, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.endElement(uri, localName, qName);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
+ */
+ void endPrefixMapping(String prefix, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.endPrefixMapping(prefix);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
+ */
+ void ignorableWhitespace(char[] ch, int start, int length, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.ignorableWhitespace(ch, start, length);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String,
+ * java.lang.String)
+ */
+ void processingInstruction(String target, String data, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.processingInstruction(target, data);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
+ */
+ void skippedEntity(String name, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.skippedEntity(name);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#startDocument()
+ */
+ void startDocument(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.startDocument();
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#startElement(java.lang.String,
+ * java.lang.String, java.lang.String, org.xml.sax.Attributes)
+ */
+ void startElement(String uri, String localName, String qName,
+ Attributes atts, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.startElement(uri, localName, qName, atts);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String,
+ * java.lang.String)
+ */
+ void startPrefixMapping(String prefix, String uri, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.startPrefixMapping(prefix, uri);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
+ */
+ void comment(char[] ch, int start, int length, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.comment(ch, start, length);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#endCDATA()
+ */
+ void endCDATA(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.endCDATA();
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#endDTD()
+ */
+ void endDTD(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.endDTD();
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
+ */
+ void endEntity(String name, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.endEntity(name);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#startCDATA()
+ */
+ void startCDATA(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.startCDATA();
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String,
+ * java.lang.String, java.lang.String)
+ */
+ void startDTD(String name, String publicId, String systemId, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.startDTD(name, publicId, systemId);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
+ */
+ void startEntity(String name, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.startEntity(name);
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ if (locatorDelegate == null) {
+ return -1;
+ } else {
+ return locatorDelegate.getColumnNumber();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ if (locatorDelegate == null) {
+ return -1;
+ } else {
+ return locatorDelegate.getLineNumber();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ if (locatorDelegate == null) {
+ return null;
+ } else {
+
+ return locatorDelegate.getPublicId();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ if (locatorDelegate == null) {
+ return null;
+ } else {
+ return locatorDelegate.getSystemId();
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html
new file mode 100644
index 000000000..0c34dad81
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html
@@ -0,0 +1,46 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides SAX Tree: a tree model optimized for creation from SAX
+events and replay as SAX events.</p>
+<h2>Design Principles</h2>
+<ol>
+<li>Preserve information exposed through <code>ContentHandler</code>,
+<code>LexicalHandler</code> <em>and</em> <code>Locator</code>.
+<li>Creation from SAX events or as part of the parse of a conforming
+HTML5 document should be <em>fast</em>.</li>
+<li>Emitting SAX events based on the tree should be <em>fast</em>.</li>
+<li>Mutations should be <em>possible</em> but should not make the above
+"fast" cases slower.</li>
+<li>Concurrent reads should work without locking when there are no
+concurrent mutations.</li>
+<li>The user of the API has the responsibility of using the API properly:
+for the sake of performance, the model does not check if it is being
+used properly. Improper use may, therefore, put the model in and
+inconsistent state.</li>
+</ol>
+</body>
+</html> \ No newline at end of file