summaryrefslogtreecommitdiffstats
path: root/parser/html/java/htmlparser
diff options
context:
space:
mode:
Diffstat (limited to 'parser/html/java/htmlparser')
-rw-r--r--parser/html/java/htmlparser/HtmlParser-compile3
-rw-r--r--parser/html/java/htmlparser/HtmlParser-compile-detailed3
-rw-r--r--parser/html/java/htmlparser/HtmlParser-compile-detailed.launch24
-rw-r--r--parser/html/java/htmlparser/HtmlParser-compile.launch22
-rw-r--r--parser/html/java/htmlparser/HtmlParser-linux3
-rw-r--r--parser/html/java/htmlparser/HtmlParser-shell3
-rw-r--r--parser/html/java/htmlparser/HtmlParser.launch23
-rw-r--r--parser/html/java/htmlparser/LICENSE.txt96
-rw-r--r--parser/html/java/htmlparser/README.txt5
-rw-r--r--parser/html/java/htmlparser/doc/README15
-rw-r--r--parser/html/java/htmlparser/doc/named-character-references.html4
-rw-r--r--parser/html/java/htmlparser/doc/tokenization.txt1147
-rw-r--r--parser/html/java/htmlparser/doc/tree-construction.txt2201
-rw-r--r--parser/html/java/htmlparser/generate-encoding-data.py745
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml12
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java477
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java265
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java87
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java46
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html225
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt25
-rw-r--r--parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html2
-rw-r--r--parser/html/java/htmlparser/mozilla-export-scripts/README.txt25
-rw-r--r--parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh24
-rw-r--r--parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh25
-rw-r--r--parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh24
-rw-r--r--parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh63
-rw-r--r--parser/html/java/htmlparser/mozilla-export-scripts/util.sh23
-rw-r--r--parser/html/java/htmlparser/pom.xml240
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/DomUtils.java36
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/README65
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/Rakefile77
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/extconf.rb45
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb5
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/fonts.rb11
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/google.html10
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/greek.xml2
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/validator.cpp210
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java59
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java185
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java184
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java185
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java80
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java95
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java886
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java57
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java64
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java61
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java63
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java184
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java57
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java187
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java186
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java181
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java189
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java56
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java189
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java189
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java188
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java194
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java192
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java191
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java185
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java182
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java182
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java184
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java59
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java75
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java62
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java56
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java56
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java57
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java197
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java192
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java186
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java27
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java27
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java34
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java33
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html30
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java44
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java59
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java47
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java46
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java58
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java52
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java35
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java183
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java53
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java48
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java357
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java259
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java736
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java84
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java77
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java268
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java2473
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java90
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java1609
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java772
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt55
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java618
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java60
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java854
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java495
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java944
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java311
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java150
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java136
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java295
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java204
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java43
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java7067
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java6558
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java129
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java151
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html30
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java79
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java27
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java597
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java395
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java512
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java199
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java42
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java235
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java1097
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java269
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java47
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java51
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java196
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java210
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java737
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java49
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java87
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java773
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java75
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java48
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java102
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java351
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html29
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java70
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java62
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java66
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java118
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java70
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java58
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java172
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java86
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java104
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java307
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java76
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java85
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java208
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java65
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java94
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java77
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java250
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java301
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/saxtree/package.html46
-rw-r--r--parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java42
-rw-r--r--parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java257
-rw-r--r--parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java139
-rw-r--r--parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java136
-rw-r--r--parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java153
-rw-r--r--parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java269
-rw-r--r--parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html297
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java96
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java491
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java115
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java49
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java40
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java123
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java185
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java66
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java201
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java210
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java211
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java239
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java50
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java246
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java97
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java63
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java33
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html29
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java87
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java86
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java89
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java89
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java237
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java162
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java169
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html29
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java67
-rw-r--r--parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html29
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java139
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java70
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java445
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java2421
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java70
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java306
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java84
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java75
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java89
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java148
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java86
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java70
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java73
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java80
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java71
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java81
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java99
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java104
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java182
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java580
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java79
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java1349
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java183
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java144
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java106
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java1586
-rw-r--r--parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java191
248 files changed, 62553 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/HtmlParser-compile b/parser/html/java/htmlparser/HtmlParser-compile
new file mode 100644
index 000000000..3e867827f
--- /dev/null
+++ b/parser/html/java/htmlparser/HtmlParser-compile
@@ -0,0 +1,3 @@
+#!/bin/sh
+APPDIR=`dirname $0`;
+java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser;
diff --git a/parser/html/java/htmlparser/HtmlParser-compile-detailed b/parser/html/java/htmlparser/HtmlParser-compile-detailed
new file mode 100644
index 000000000..a4102d642
--- /dev/null
+++ b/parser/html/java/htmlparser/HtmlParser-compile-detailed
@@ -0,0 +1,3 @@
+#!/bin/sh
+APPDIR=`dirname $0`;
+java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -style DETAILED -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser;
diff --git a/parser/html/java/htmlparser/HtmlParser-compile-detailed.launch b/parser/html/java/htmlparser/HtmlParser-compile-detailed.launch
new file mode 100644
index 000000000..0347fd6cf
--- /dev/null
+++ b/parser/html/java/htmlparser/HtmlParser-compile-detailed.launch
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/htmlparser"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="4"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<listAttribute key="org.eclipse.jdt.launching.CLASSPATH">
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#10;&lt;runtimeClasspathEntry containerPath=&quot;org.eclipse.jdt.launching.JRE_CONTAINER&quot; javaProject=&quot;htmlparser&quot; path=&quot;1&quot; type=&quot;4&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/src&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/gwt-src&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/super&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#10;&lt;runtimeClasspathEntry id=&quot;org.eclipse.jdt.launching.classpathentry.defaultClasspath&quot;&gt;&#10;&lt;memento exportedEntriesOnly=&quot;false&quot; project=&quot;htmlparser&quot;/&gt;&#10;&lt;/runtimeClasspathEntry&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#10;&lt;runtimeClasspathEntry externalArchive=&quot;/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#10;&lt;runtimeClasspathEntry externalArchive=&quot;/Developer/gwt-mac-1.5.1/gwt-user.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.jdt.launching.DEFAULT_CLASSPATH" value="false"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="com.google.gwt.dev.GWTCompiler"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-style DETAILED -out /Users/hsivonen/Projects/whattf/htmlparser/www nu.validator.htmlparser.HtmlParser"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="htmlparser"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-XstartOnFirstThread -Xmx256M"/>
+</launchConfiguration>
diff --git a/parser/html/java/htmlparser/HtmlParser-compile.launch b/parser/html/java/htmlparser/HtmlParser-compile.launch
new file mode 100644
index 000000000..54e7bc337
--- /dev/null
+++ b/parser/html/java/htmlparser/HtmlParser-compile.launch
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/htmlparser"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="4"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<listAttribute key="org.eclipse.jdt.launching.CLASSPATH">
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#10;&lt;runtimeClasspathEntry containerPath=&quot;org.eclipse.jdt.launching.JRE_CONTAINER&quot; javaProject=&quot;htmlparser&quot; path=&quot;1&quot; type=&quot;4&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/src&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/gwt-src&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/super&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#10;&lt;runtimeClasspathEntry id=&quot;org.eclipse.jdt.launching.classpathentry.defaultClasspath&quot;&gt;&#10;&lt;memento exportedEntriesOnly=&quot;false&quot; project=&quot;htmlparser&quot;/&gt;&#10;&lt;/runtimeClasspathEntry&gt;&#10;"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.jdt.launching.DEFAULT_CLASSPATH" value="false"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="com.google.gwt.dev.GWTCompiler"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-out /home/hsivonen/Projects/whattf/htmlparser/www nu.validator.htmlparser.HtmlParser"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="htmlparser"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xmx256M"/>
+</launchConfiguration>
diff --git a/parser/html/java/htmlparser/HtmlParser-linux b/parser/html/java/htmlparser/HtmlParser-linux
new file mode 100644
index 000000000..0a9e9deff
--- /dev/null
+++ b/parser/html/java/htmlparser/HtmlParser-linux
@@ -0,0 +1,3 @@
+#!/bin/sh
+APPDIR=`dirname $0`;
+java -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/home/hsivonen/gwt-linux-1.5.1/gwt-user.jar:/home/hsivonen/gwt-linux-1.5.1/gwt-dev-linux.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html;
diff --git a/parser/html/java/htmlparser/HtmlParser-shell b/parser/html/java/htmlparser/HtmlParser-shell
new file mode 100644
index 000000000..ffcf2e297
--- /dev/null
+++ b/parser/html/java/htmlparser/HtmlParser-shell
@@ -0,0 +1,3 @@
+#!/bin/sh
+APPDIR=`dirname $0`;
+java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html;
diff --git a/parser/html/java/htmlparser/HtmlParser.launch b/parser/html/java/htmlparser/HtmlParser.launch
new file mode 100644
index 000000000..9335abf60
--- /dev/null
+++ b/parser/html/java/htmlparser/HtmlParser.launch
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/htmlparser"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="4"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<listAttribute key="org.eclipse.jdt.launching.CLASSPATH">
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry containerPath=&quot;org.eclipse.jdt.launching.JRE_CONTAINER&quot; javaProject=&quot;htmlparser&quot; path=&quot;1&quot; type=&quot;4&quot;/&gt;&#13;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/src&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/gwt-src&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry internalArchive=&quot;/htmlparser/super&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry id=&quot;org.eclipse.jdt.launching.classpathentry.defaultClasspath&quot;&gt;&#13;&#10;&lt;memento project=&quot;htmlparser&quot;/&gt;&#13;&#10;&lt;/runtimeClasspathEntry&gt;&#13;&#10;"/>
+<listEntry value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&#13;&#10;&lt;runtimeClasspathEntry externalArchive=&quot;/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar&quot; path=&quot;3&quot; type=&quot;2&quot;/&gt;&#13;&#10;"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.jdt.launching.DEFAULT_CLASSPATH" value="false"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="com.google.gwt.dev.GWTShell"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-out www nu.validator.htmlparser.HtmlParser/HtmlParser.html"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="htmlparser"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-XstartOnFirstThread -Xmx256M"/>
+</launchConfiguration>
diff --git a/parser/html/java/htmlparser/LICENSE.txt b/parser/html/java/htmlparser/LICENSE.txt
new file mode 100644
index 000000000..4bfe5d331
--- /dev/null
+++ b/parser/html/java/htmlparser/LICENSE.txt
@@ -0,0 +1,96 @@
+This is for the HTML parser as a whole except the rewindable input stream,
+the named character classes and the Live DOM Viewer.
+For the copyright notices for individual files, please see individual files.
+
+/*
+ * Copyright (c) 2005, 2006, 2007 Henri Sivonen
+ * Copyright (c) 2007-2012 Mozilla Foundation
+ * Portions of comments Copyright 2004-2007 Apple Computer, Inc., Mozilla
+ * Foundation, and Opera Software ASA.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+The following license is for the WHATWG spec from which the named character
+data was extracted.
+
+/*
+ * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera
+ * Software ASA.
+ *
+ * You are granted a license to use, reproduce and create derivative works of
+ * this document.
+ */
+
+The following license is for the rewindable input stream.
+
+/*
+ * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the Thai Open Source Software Center Ltd nor
+ * the names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+The following license applies to the Live DOM Viewer:
+
+Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/parser/html/java/htmlparser/README.txt b/parser/html/java/htmlparser/README.txt
new file mode 100644
index 000000000..713b404e8
--- /dev/null
+++ b/parser/html/java/htmlparser/README.txt
@@ -0,0 +1,5 @@
+An HTML5 parser.
+
+Please see http://about.validator.nu/htmlparser/
+
+-- Henri Sivonen (hsivonen@iki.fi).
diff --git a/parser/html/java/htmlparser/doc/README b/parser/html/java/htmlparser/doc/README
new file mode 100644
index 000000000..e0132a41e
--- /dev/null
+++ b/parser/html/java/htmlparser/doc/README
@@ -0,0 +1,15 @@
+tokenization.txt represents the state of the spec implemented in Tokenizer.java.
+
+To get a diffable version corresponding to the current spec:
+lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html > current.txt
+
+tree-construction.txt represents the state of the spec implemented in TreeBuilder.java.
+
+To get a diffable version corresponding to the current spec:
+lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html > current.txt
+
+
+The text of the files in this directory comes from the WHATWG HTML 5 spec
+which carries the following notice:
+© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera Software ASA.
+You are granted a license to use, reproduce and create derivative works of this document.
diff --git a/parser/html/java/htmlparser/doc/named-character-references.html b/parser/html/java/htmlparser/doc/named-character-references.html
new file mode 100644
index 000000000..5f05a991f
--- /dev/null
+++ b/parser/html/java/htmlparser/doc/named-character-references.html
@@ -0,0 +1,4 @@
+<!-- spec rev 5682 -->
+
+ <table><thead><tr><th> Name </th> <th> Character(s) </th> <th> Glyph </th> <tbody><tr id="entity-AElig"><td> <code title="">AElig;</code> </td> <td> U+000C6 </td> <td> <span class="glyph" title="">&AElig;</span> </td> <tr id="entity-AMP"><td> <code title="">AMP;</code> </td> <td> U+00026 </td> <td> <span class="glyph" title="">&amp;</span> </td> <tr id="entity-Aacute"><td> <code title="">Aacute;</code> </td> <td> U+000C1 </td> <td> <span class="glyph" title="">&Aacute;</span> </td> <tr id="entity-Abreve"><td> <code title="">Abreve;</code> </td> <td> U+00102 </td> <td> <span class="glyph" title="">&#258;</span> </td> <tr id="entity-Acirc"><td> <code title="">Acirc;</code> </td> <td> U+000C2 </td> <td> <span class="glyph" title="">&Acirc;</span> </td> <tr id="entity-Acy"><td> <code title="">Acy;</code> </td> <td> U+00410 </td> <td> <span class="glyph" title="">&#1040;</span> </td> <tr id="entity-Afr"><td> <code title="">Afr;</code> </td> <td> U+1D504 </td> <td> <span class="glyph" title="">&#120068;</span> </td> <tr id="entity-Agrave"><td> <code title="">Agrave;</code> </td> <td> U+000C0 </td> <td> <span class="glyph" title="">&Agrave;</span> </td> <tr id="entity-Alpha"><td> <code title="">Alpha;</code> </td> <td> U+00391 </td> <td> <span class="glyph" title="">&Alpha;</span> </td> <tr id="entity-Amacr"><td> <code title="">Amacr;</code> </td> <td> U+00100 </td> <td> <span class="glyph" title="">&#256;</span> </td> <tr id="entity-And"><td> <code title="">And;</code> </td> <td> U+02A53 </td> <td> <span class="glyph" title="">&#10835;</span> </td> <tr id="entity-Aogon"><td> <code title="">Aogon;</code> </td> <td> U+00104 </td> <td> <span class="glyph" title="">&#260;</span> </td> <tr id="entity-Aopf"><td> <code title="">Aopf;</code> </td> <td> U+1D538 </td> <td> <span class="glyph" title="">&#120120;</span> </td> <tr id="entity-ApplyFunction"><td> <code title="">ApplyFunction;</code> </td> <td> U+02061 </td> <td> <span class="glyph" title="">&#8289;</span> </td> <tr id="entity-Aring"><td> <code title="">Aring;</code> </td> <td> U+000C5 </td> <td> <span class="glyph" title="">&Aring;</span> </td> <tr id="entity-Ascr"><td> <code title="">Ascr;</code> </td> <td> U+1D49C </td> <td> <span class="glyph" title="">&#119964;</span> </td> <tr id="entity-Assign"><td> <code title="">Assign;</code> </td> <td> U+02254 </td> <td> <span class="glyph" title="">&#8788;</span> </td> <tr id="entity-Atilde"><td> <code title="">Atilde;</code> </td> <td> U+000C3 </td> <td> <span class="glyph" title="">&Atilde;</span> </td> <tr id="entity-Auml"><td> <code title="">Auml;</code> </td> <td> U+000C4 </td> <td> <span class="glyph" title="">&Auml;</span> </td> <tr id="entity-Backslash"><td> <code title="">Backslash;</code> </td> <td> U+02216 </td> <td> <span class="glyph" title="">&#8726;</span> </td> <tr id="entity-Barv"><td> <code title="">Barv;</code> </td> <td> U+02AE7 </td> <td> <span class="glyph" title="">&#10983;</span> </td> <tr id="entity-Barwed"><td> <code title="">Barwed;</code> </td> <td> U+02306 </td> <td> <span class="glyph" title="">&#8966;</span> </td> <tr id="entity-Bcy"><td> <code title="">Bcy;</code> </td> <td> U+00411 </td> <td> <span class="glyph" title="">&#1041;</span> </td> <tr id="entity-Because"><td> <code title="">Because;</code> </td> <td> U+02235 </td> <td> <span class="glyph" title="">&#8757;</span> </td> <tr id="entity-Bernoullis"><td> <code title="">Bernoullis;</code> </td> <td> U+0212C </td> <td> <span class="glyph" title="">&#8492;</span> </td> <tr id="entity-Beta"><td> <code title="">Beta;</code> </td> <td> U+00392 </td> <td> <span class="glyph" title="">&Beta;</span> </td> <tr id="entity-Bfr"><td> <code title="">Bfr;</code> </td> <td> U+1D505 </td> <td> <span class="glyph" title="">&#120069;</span> </td> <tr id="entity-Bopf"><td> <code title="">Bopf;</code> </td> <td> U+1D539 </td> <td> <span class="glyph" title="">&#120121;</span> </td> <tr id="entity-Breve"><td> <code title="">Breve;</code> </td> <td> U+002D8 </td> <td> <span class="glyph" title="">&#728;</span> </td> <tr id="entity-Bscr"><td> <code title="">Bscr;</code> </td> <td> U+0212C </td> <td> <span class="glyph" title="">&#8492;</span> </td> <tr id="entity-Bumpeq"><td> <code title="">Bumpeq;</code> </td> <td> U+0224E </td> <td> <span class="glyph" title="">&#8782;</span> </td> <tr id="entity-CHcy"><td> <code title="">CHcy;</code> </td> <td> U+00427 </td> <td> <span class="glyph" title="">&#1063;</span> </td> <tr id="entity-COPY"><td> <code title="">COPY;</code> </td> <td> U+000A9 </td> <td> <span class="glyph" title="">&copy;</span> </td> <tr id="entity-Cacute"><td> <code title="">Cacute;</code> </td> <td> U+00106 </td> <td> <span class="glyph" title="">&#262;</span> </td> <tr id="entity-Cap"><td> <code title="">Cap;</code> </td> <td> U+022D2 </td> <td> <span class="glyph" title="">&#8914;</span> </td> <tr id="entity-CapitalDifferentialD"><td> <code title="">CapitalDifferentialD;</code> </td> <td> U+02145 </td> <td> <span class="glyph" title="">&#8517;</span> </td> <tr id="entity-Cayleys"><td> <code title="">Cayleys;</code> </td> <td> U+0212D </td> <td> <span class="glyph" title="">&#8493;</span> </td> <tr id="entity-Ccaron"><td> <code title="">Ccaron;</code> </td> <td> U+0010C </td> <td> <span class="glyph" title="">&#268;</span> </td> <tr id="entity-Ccedil"><td> <code title="">Ccedil;</code> </td> <td> U+000C7 </td> <td> <span class="glyph" title="">&Ccedil;</span> </td> <tr id="entity-Ccirc"><td> <code title="">Ccirc;</code> </td> <td> U+00108 </td> <td> <span class="glyph" title="">&#264;</span> </td> <tr id="entity-Cconint"><td> <code title="">Cconint;</code> </td> <td> U+02230 </td> <td> <span class="glyph" title="">&#8752;</span> </td> <tr id="entity-Cdot"><td> <code title="">Cdot;</code> </td> <td> U+0010A </td> <td> <span class="glyph" title="">&#266;</span> </td> <tr id="entity-Cedilla"><td> <code title="">Cedilla;</code> </td> <td> U+000B8 </td> <td> <span class="glyph" title="">&cedil;</span> </td> <tr id="entity-CenterDot"><td> <code title="">CenterDot;</code> </td> <td> U+000B7 </td> <td> <span class="glyph" title="">&middot;</span> </td> <tr id="entity-Cfr"><td> <code title="">Cfr;</code> </td> <td> U+0212D </td> <td> <span class="glyph" title="">&#8493;</span> </td> <tr id="entity-Chi"><td> <code title="">Chi;</code> </td> <td> U+003A7 </td> <td> <span class="glyph" title="">&Chi;</span> </td> <tr id="entity-CircleDot"><td> <code title="">CircleDot;</code> </td> <td> U+02299 </td> <td> <span class="glyph" title="">&#8857;</span> </td> <tr id="entity-CircleMinus"><td> <code title="">CircleMinus;</code> </td> <td> U+02296 </td> <td> <span class="glyph" title="">&#8854;</span> </td> <tr id="entity-CirclePlus"><td> <code title="">CirclePlus;</code> </td> <td> U+02295 </td> <td> <span class="glyph" title="">&oplus;</span> </td> <tr id="entity-CircleTimes"><td> <code title="">CircleTimes;</code> </td> <td> U+02297 </td> <td> <span class="glyph" title="">&otimes;</span> </td> <tr id="entity-ClockwiseContourIntegral"><td> <code title="">ClockwiseContourIntegral;</code> </td> <td> U+02232 </td> <td> <span class="glyph" title="">&#8754;</span> </td> <tr id="entity-CloseCurlyDoubleQuote"><td> <code title="">CloseCurlyDoubleQuote;</code> </td> <td> U+0201D </td> <td> <span class="glyph" title="">&rdquo;</span> </td> <tr id="entity-CloseCurlyQuote"><td> <code title="">CloseCurlyQuote;</code> </td> <td> U+02019 </td> <td> <span class="glyph" title="">&rsquo;</span> </td> <tr id="entity-Colon"><td> <code title="">Colon;</code> </td> <td> U+02237 </td> <td> <span class="glyph" title="">&#8759;</span> </td> <tr id="entity-Colone"><td> <code title="">Colone;</code> </td> <td> U+02A74 </td> <td> <span class="glyph" title="">&#10868;</span> </td> <tr id="entity-Congruent"><td> <code title="">Congruent;</code> </td> <td> U+02261 </td> <td> <span class="glyph" title="">&equiv;</span> </td> <tr id="entity-Conint"><td> <code title="">Conint;</code> </td> <td> U+0222F </td> <td> <span class="glyph" title="">&#8751;</span> </td> <tr id="entity-ContourIntegral"><td> <code title="">ContourIntegral;</code> </td> <td> U+0222E </td> <td> <span class="glyph" title="">&#8750;</span> </td> <tr id="entity-Copf"><td> <code title="">Copf;</code> </td> <td> U+02102 </td> <td> <span class="glyph" title="">&#8450;</span> </td> <tr id="entity-Coproduct"><td> <code title="">Coproduct;</code> </td> <td> U+02210 </td> <td> <span class="glyph" title="">&#8720;</span> </td> <tr id="entity-CounterClockwiseContourIntegral"><td> <code title="">CounterClockwiseContourIntegral;</code> </td> <td> U+02233 </td> <td> <span class="glyph" title="">&#8755;</span> </td> <tr id="entity-Cross"><td> <code title="">Cross;</code> </td> <td> U+02A2F </td> <td> <span class="glyph" title="">&#10799;</span> </td> <tr id="entity-Cscr"><td> <code title="">Cscr;</code> </td> <td> U+1D49E </td> <td> <span class="glyph" title="">&#119966;</span> </td> <tr id="entity-Cup"><td> <code title="">Cup;</code> </td> <td> U+022D3 </td> <td> <span class="glyph" title="">&#8915;</span> </td> <tr id="entity-CupCap"><td> <code title="">CupCap;</code> </td> <td> U+0224D </td> <td> <span class="glyph" title="">&#8781;</span> </td> <tr id="entity-DD"><td> <code title="">DD;</code> </td> <td> U+02145 </td> <td> <span class="glyph" title="">&#8517;</span> </td> <tr id="entity-DDotrahd"><td> <code title="">DDotrahd;</code> </td> <td> U+02911 </td> <td> <span class="glyph" title="">&#10513;</span> </td> <tr id="entity-DJcy"><td> <code title="">DJcy;</code> </td> <td> U+00402 </td> <td> <span class="glyph" title="">&#1026;</span> </td> <tr id="entity-DScy"><td> <code title="">DScy;</code> </td> <td> U+00405 </td> <td> <span class="glyph" title="">&#1029;</span> </td> <tr id="entity-DZcy"><td> <code title="">DZcy;</code> </td> <td> U+0040F </td> <td> <span class="glyph" title="">&#1039;</span> </td> <tr id="entity-Dagger"><td> <code title="">Dagger;</code> </td> <td> U+02021 </td> <td> <span class="glyph" title="">&Dagger;</span> </td> <tr id="entity-Darr"><td> <code title="">Darr;</code> </td> <td> U+021A1 </td> <td> <span class="glyph" title="">&#8609;</span> </td> <tr id="entity-Dashv"><td> <code title="">Dashv;</code> </td> <td> U+02AE4 </td> <td> <span class="glyph" title="">&#10980;</span> </td> <tr id="entity-Dcaron"><td> <code title="">Dcaron;</code> </td> <td> U+0010E </td> <td> <span class="glyph" title="">&#270;</span> </td> <tr id="entity-Dcy"><td> <code title="">Dcy;</code> </td> <td> U+00414 </td> <td> <span class="glyph" title="">&#1044;</span> </td> <tr id="entity-Del"><td> <code title="">Del;</code> </td> <td> U+02207 </td> <td> <span class="glyph" title="">&nabla;</span> </td> <tr id="entity-Delta"><td> <code title="">Delta;</code> </td> <td> U+00394 </td> <td> <span class="glyph" title="">&Delta;</span> </td> <tr id="entity-Dfr"><td> <code title="">Dfr;</code> </td> <td> U+1D507 </td> <td> <span class="glyph" title="">&#120071;</span> </td> <tr id="entity-DiacriticalAcute"><td> <code title="">DiacriticalAcute;</code> </td> <td> U+000B4 </td> <td> <span class="glyph" title="">&acute;</span> </td> <tr id="entity-DiacriticalDot"><td> <code title="">DiacriticalDot;</code> </td> <td> U+002D9 </td> <td> <span class="glyph" title="">&#729;</span> </td> <tr id="entity-DiacriticalDoubleAcute"><td> <code title="">DiacriticalDoubleAcute;</code> </td> <td> U+002DD </td> <td> <span class="glyph" title="">&#733;</span> </td> <tr id="entity-DiacriticalGrave"><td> <code title="">DiacriticalGrave;</code> </td> <td> U+00060 </td> <td> <span class="glyph" title="">`</span> </td> <tr id="entity-DiacriticalTilde"><td> <code title="">DiacriticalTilde;</code> </td> <td> U+002DC </td> <td> <span class="glyph" title="">&tilde;</span> </td> <tr id="entity-Diamond"><td> <code title="">Diamond;</code> </td> <td> U+022C4 </td> <td> <span class="glyph" title="">&#8900;</span> </td> <tr id="entity-DifferentialD"><td> <code title="">DifferentialD;</code> </td> <td> U+02146 </td> <td> <span class="glyph" title="">&#8518;</span> </td> <tr id="entity-Dopf"><td> <code title="">Dopf;</code> </td> <td> U+1D53B </td> <td> <span class="glyph" title="">&#120123;</span> </td> <tr id="entity-Dot"><td> <code title="">Dot;</code> </td> <td> U+000A8 </td> <td> <span class="glyph" title="">&uml;</span> </td> <tr id="entity-DotDot"><td> <code title="">DotDot;</code> </td> <td> U+020DC </td> <td> <span class="glyph composition" title="">&#9676;&#8412;</span> </td> <tr id="entity-DotEqual"><td> <code title="">DotEqual;</code> </td> <td> U+02250 </td> <td> <span class="glyph" title="">&#8784;</span> </td> <tr id="entity-DoubleContourIntegral"><td> <code title="">DoubleContourIntegral;</code> </td> <td> U+0222F </td> <td> <span class="glyph" title="">&#8751;</span> </td> <tr id="entity-DoubleDot"><td> <code title="">DoubleDot;</code> </td> <td> U+000A8 </td> <td> <span class="glyph" title="">&uml;</span> </td> <tr id="entity-DoubleDownArrow"><td> <code title="">DoubleDownArrow;</code> </td> <td> U+021D3 </td> <td> <span class="glyph" title="">&dArr;</span> </td> <tr id="entity-DoubleLeftArrow"><td> <code title="">DoubleLeftArrow;</code> </td> <td> U+021D0 </td> <td> <span class="glyph" title="">&lArr;</span> </td> <tr id="entity-DoubleLeftRightArrow"><td> <code title="">DoubleLeftRightArrow;</code> </td> <td> U+021D4 </td> <td> <span class="glyph" title="">&hArr;</span> </td> <tr id="entity-DoubleLeftTee"><td> <code title="">DoubleLeftTee;</code> </td> <td> U+02AE4 </td> <td> <span class="glyph" title="">&#10980;</span> </td> <tr id="entity-DoubleLongLeftArrow"><td> <code title="">DoubleLongLeftArrow;</code> </td> <td> U+027F8 </td> <td> <span class="glyph" title="">&#10232;</span> </td> <tr id="entity-DoubleLongLeftRightArrow"><td> <code title="">DoubleLongLeftRightArrow;</code> </td> <td> U+027FA </td> <td> <span class="glyph" title="">&#10234;</span> </td> <tr id="entity-DoubleLongRightArrow"><td> <code title="">DoubleLongRightArrow;</code> </td> <td> U+027F9 </td> <td> <span class="glyph" title="">&#10233;</span> </td> <tr id="entity-DoubleRightArrow"><td> <code title="">DoubleRightArrow;</code> </td> <td> U+021D2 </td> <td> <span class="glyph" title="">&rArr;</span> </td> <tr id="entity-DoubleRightTee"><td> <code title="">DoubleRightTee;</code> </td> <td> U+022A8 </td> <td> <span class="glyph" title="">&#8872;</span> </td> <tr id="entity-DoubleUpArrow"><td> <code title="">DoubleUpArrow;</code> </td> <td> U+021D1 </td> <td> <span class="glyph" title="">&uArr;</span> </td> <tr id="entity-DoubleUpDownArrow"><td> <code title="">DoubleUpDownArrow;</code> </td> <td> U+021D5 </td> <td> <span class="glyph" title="">&#8661;</span> </td> <tr id="entity-DoubleVerticalBar"><td> <code title="">DoubleVerticalBar;</code> </td> <td> U+02225 </td> <td> <span class="glyph" title="">&#8741;</span> </td> <tr id="entity-DownArrow"><td> <code title="">DownArrow;</code> </td> <td> U+02193 </td> <td> <span class="glyph" title="">&darr;</span> </td> <tr id="entity-DownArrowBar"><td> <code title="">DownArrowBar;</code> </td> <td> U+02913 </td> <td> <span class="glyph" title="">&#10515;</span> </td> <tr id="entity-DownArrowUpArrow"><td> <code title="">DownArrowUpArrow;</code> </td> <td> U+021F5 </td> <td> <span class="glyph" title="">&#8693;</span> </td> <tr id="entity-DownBreve"><td> <code title="">DownBreve;</code> </td> <td> U+00311 </td> <td> <span class="glyph composition" title="">&#9676;&#785;</span> </td> <tr id="entity-DownLeftRightVector"><td> <code title="">DownLeftRightVector;</code> </td> <td> U+02950 </td> <td> <span class="glyph" title="">&#10576;</span> </td> <tr id="entity-DownLeftTeeVector"><td> <code title="">DownLeftTeeVector;</code> </td> <td> U+0295E </td> <td> <span class="glyph" title="">&#10590;</span> </td> <tr id="entity-DownLeftVector"><td> <code title="">DownLeftVector;</code> </td> <td> U+021BD </td> <td> <span class="glyph" title="">&#8637;</span> </td> <tr id="entity-DownLeftVectorBar"><td> <code title="">DownLeftVectorBar;</code> </td> <td> U+02956 </td> <td> <span class="glyph" title="">&#10582;</span> </td> <tr id="entity-DownRightTeeVector"><td> <code title="">DownRightTeeVector;</code> </td> <td> U+0295F </td> <td> <span class="glyph" title="">&#10591;</span> </td> <tr id="entity-DownRightVector"><td> <code title="">DownRightVector;</code> </td> <td> U+021C1 </td> <td> <span class="glyph" title="">&#8641;</span> </td> <tr id="entity-DownRightVectorBar"><td> <code title="">DownRightVectorBar;</code> </td> <td> U+02957 </td> <td> <span class="glyph" title="">&#10583;</span> </td> <tr id="entity-DownTee"><td> <code title="">DownTee;</code> </td> <td> U+022A4 </td> <td> <span class="glyph" title="">&#8868;</span> </td> <tr id="entity-DownTeeArrow"><td> <code title="">DownTeeArrow;</code> </td> <td> U+021A7 </td> <td> <span class="glyph" title="">&#8615;</span> </td> <tr id="entity-Downarrow"><td> <code title="">Downarrow;</code> </td> <td> U+021D3 </td> <td> <span class="glyph" title="">&dArr;</span> </td> <tr id="entity-Dscr"><td> <code title="">Dscr;</code> </td> <td> U+1D49F </td> <td> <span class="glyph" title="">&#119967;</span> </td> <tr id="entity-Dstrok"><td> <code title="">Dstrok;</code> </td> <td> U+00110 </td> <td> <span class="glyph" title="">&#272;</span> </td> <tr id="entity-ENG"><td> <code title="">ENG;</code> </td> <td> U+0014A </td> <td> <span class="glyph" title="">&#330;</span> </td> <tr id="entity-ETH"><td> <code title="">ETH;</code> </td> <td> U+000D0 </td> <td> <span class="glyph" title="">&ETH;</span> </td> <tr id="entity-Eacute"><td> <code title="">Eacute;</code> </td> <td> U+000C9 </td> <td> <span class="glyph" title="">&Eacute;</span> </td> <tr id="entity-Ecaron"><td> <code title="">Ecaron;</code> </td> <td> U+0011A </td> <td> <span class="glyph" title="">&#282;</span> </td> <tr id="entity-Ecirc"><td> <code title="">Ecirc;</code> </td> <td> U+000CA </td> <td> <span class="glyph" title="">&Ecirc;</span> </td> <tr id="entity-Ecy"><td> <code title="">Ecy;</code> </td> <td> U+0042D </td> <td> <span class="glyph" title="">&#1069;</span> </td> <tr id="entity-Edot"><td> <code title="">Edot;</code> </td> <td> U+00116 </td> <td> <span class="glyph" title="">&#278;</span> </td> <tr id="entity-Efr"><td> <code title="">Efr;</code> </td> <td> U+1D508 </td> <td> <span class="glyph" title="">&#120072;</span> </td> <tr id="entity-Egrave"><td> <code title="">Egrave;</code> </td> <td> U+000C8 </td> <td> <span class="glyph" title="">&Egrave;</span> </td> <tr id="entity-Element"><td> <code title="">Element;</code> </td> <td> U+02208 </td> <td> <span class="glyph" title="">&isin;</span> </td> <tr id="entity-Emacr"><td> <code title="">Emacr;</code> </td> <td> U+00112 </td> <td> <span class="glyph" title="">&#274;</span> </td> <tr id="entity-EmptySmallSquare"><td> <code title="">EmptySmallSquare;</code> </td> <td> U+025FB </td> <td> <span class="glyph" title="">&#9723;</span> </td> <tr id="entity-EmptyVerySmallSquare"><td> <code title="">EmptyVerySmallSquare;</code> </td> <td> U+025AB </td> <td> <span class="glyph" title="">&#9643;</span> </td> <tr id="entity-Eogon"><td> <code title="">Eogon;</code> </td> <td> U+00118 </td> <td> <span class="glyph" title="">&#280;</span> </td> <tr id="entity-Eopf"><td> <code title="">Eopf;</code> </td> <td> U+1D53C </td> <td> <span class="glyph" title="">&#120124;</span> </td> <tr id="entity-Epsilon"><td> <code title="">Epsilon;</code> </td> <td> U+00395 </td> <td> <span class="glyph" title="">&Epsilon;</span> </td> <tr id="entity-Equal"><td> <code title="">Equal;</code> </td> <td> U+02A75 </td> <td> <span class="glyph" title="">&#10869;</span> </td> <tr id="entity-EqualTilde"><td> <code title="">EqualTilde;</code> </td> <td> U+02242 </td> <td> <span class="glyph" title="">&#8770;</span> </td> <tr id="entity-Equilibrium"><td> <code title="">Equilibrium;</code> </td> <td> U+021CC </td> <td> <span class="glyph" title="">&#8652;</span> </td> <tr id="entity-Escr"><td> <code title="">Escr;</code> </td> <td> U+02130 </td> <td> <span class="glyph" title="">&#8496;</span> </td> <tr id="entity-Esim"><td> <code title="">Esim;</code> </td> <td> U+02A73 </td> <td> <span class="glyph" title="">&#10867;</span> </td> <tr id="entity-Eta"><td> <code title="">Eta;</code> </td> <td> U+00397 </td> <td> <span class="glyph" title="">&Eta;</span> </td> <tr id="entity-Euml"><td> <code title="">Euml;</code> </td> <td> U+000CB </td> <td> <span class="glyph" title="">&Euml;</span> </td> <tr id="entity-Exists"><td> <code title="">Exists;</code> </td> <td> U+02203 </td> <td> <span class="glyph" title="">&exist;</span> </td> <tr id="entity-ExponentialE"><td> <code title="">ExponentialE;</code> </td> <td> U+02147 </td> <td> <span class="glyph" title="">&#8519;</span> </td> <tr id="entity-Fcy"><td> <code title="">Fcy;</code> </td> <td> U+00424 </td> <td> <span class="glyph" title="">&#1060;</span> </td> <tr id="entity-Ffr"><td> <code title="">Ffr;</code> </td> <td> U+1D509 </td> <td> <span class="glyph" title="">&#120073;</span> </td> <tr id="entity-FilledSmallSquare"><td> <code title="">FilledSmallSquare;</code> </td> <td> U+025FC </td> <td> <span class="glyph" title="">&#9724;</span> </td> <tr id="entity-FilledVerySmallSquare"><td> <code title="">FilledVerySmallSquare;</code> </td> <td> U+025AA </td> <td> <span class="glyph" title="">&#9642;</span> </td> <tr id="entity-Fopf"><td> <code title="">Fopf;</code> </td> <td> U+1D53D </td> <td> <span class="glyph" title="">&#120125;</span> </td> <tr id="entity-ForAll"><td> <code title="">ForAll;</code> </td> <td> U+02200 </td> <td> <span class="glyph" title="">&forall;</span> </td> <tr id="entity-Fouriertrf"><td> <code title="">Fouriertrf;</code> </td> <td> U+02131 </td> <td> <span class="glyph" title="">&#8497;</span> </td> <tr id="entity-Fscr"><td> <code title="">Fscr;</code> </td> <td> U+02131 </td> <td> <span class="glyph" title="">&#8497;</span> </td> <tr id="entity-GJcy"><td> <code title="">GJcy;</code> </td> <td> U+00403 </td> <td> <span class="glyph" title="">&#1027;</span> </td> <tr id="entity-GT"><td> <code title="">GT;</code> </td> <td> U+0003E </td> <td> <span class="glyph" title="">&gt;</span> </td> <tr id="entity-Gamma"><td> <code title="">Gamma;</code> </td> <td> U+00393 </td> <td> <span class="glyph" title="">&Gamma;</span> </td> <tr id="entity-Gammad"><td> <code title="">Gammad;</code> </td> <td> U+003DC </td> <td> <span class="glyph" title="">&#988;</span> </td> <tr id="entity-Gbreve"><td> <code title="">Gbreve;</code> </td> <td> U+0011E </td> <td> <span class="glyph" title="">&#286;</span> </td> <tr id="entity-Gcedil"><td> <code title="">Gcedil;</code> </td> <td> U+00122 </td> <td> <span class="glyph" title="">&#290;</span> </td> <tr id="entity-Gcirc"><td> <code title="">Gcirc;</code> </td> <td> U+0011C </td> <td> <span class="glyph" title="">&#284;</span> </td> <tr id="entity-Gcy"><td> <code title="">Gcy;</code> </td> <td> U+00413 </td> <td> <span class="glyph" title="">&#1043;</span> </td> <tr id="entity-Gdot"><td> <code title="">Gdot;</code> </td> <td> U+00120 </td> <td> <span class="glyph" title="">&#288;</span> </td> <tr id="entity-Gfr"><td> <code title="">Gfr;</code> </td> <td> U+1D50A </td> <td> <span class="glyph" title="">&#120074;</span> </td> <tr id="entity-Gg"><td> <code title="">Gg;</code> </td> <td> U+022D9 </td> <td> <span class="glyph" title="">&#8921;</span> </td> <tr id="entity-Gopf"><td> <code title="">Gopf;</code> </td> <td> U+1D53E </td> <td> <span class="glyph" title="">&#120126;</span> </td> <tr id="entity-GreaterEqual"><td> <code title="">GreaterEqual;</code> </td> <td> U+02265 </td> <td> <span class="glyph" title="">&ge;</span> </td> <tr id="entity-GreaterEqualLess"><td> <code title="">GreaterEqualLess;</code> </td> <td> U+022DB </td> <td> <span class="glyph" title="">&#8923;</span> </td> <tr id="entity-GreaterFullEqual"><td> <code title="">GreaterFullEqual;</code> </td> <td> U+02267 </td> <td> <span class="glyph" title="">&#8807;</span> </td> <tr id="entity-GreaterGreater"><td> <code title="">GreaterGreater;</code> </td> <td> U+02AA2 </td> <td> <span class="glyph" title="">&#10914;</span> </td> <tr id="entity-GreaterLess"><td> <code title="">GreaterLess;</code> </td> <td> U+02277 </td> <td> <span class="glyph" title="">&#8823;</span> </td> <tr id="entity-GreaterSlantEqual"><td> <code title="">GreaterSlantEqual;</code> </td> <td> U+02A7E </td> <td> <span class="glyph" title="">&#10878;</span> </td> <tr id="entity-GreaterTilde"><td> <code title="">GreaterTilde;</code> </td> <td> U+02273 </td> <td> <span class="glyph" title="">&#8819;</span> </td> <tr id="entity-Gscr"><td> <code title="">Gscr;</code> </td> <td> U+1D4A2 </td> <td> <span class="glyph" title="">&#119970;</span> </td> <tr id="entity-Gt"><td> <code title="">Gt;</code> </td> <td> U+0226B </td> <td> <span class="glyph" title="">&#8811;</span> </td> <tr id="entity-HARDcy"><td> <code title="">HARDcy;</code> </td> <td> U+0042A </td> <td> <span class="glyph" title="">&#1066;</span> </td> <tr id="entity-Hacek"><td> <code title="">Hacek;</code> </td> <td> U+002C7 </td> <td> <span class="glyph" title="">&#711;</span> </td> <tr id="entity-Hat"><td> <code title="">Hat;</code> </td> <td> U+0005E </td> <td> <span class="glyph" title="">^</span> </td> <tr id="entity-Hcirc"><td> <code title="">Hcirc;</code> </td> <td> U+00124 </td> <td> <span class="glyph" title="">&#292;</span> </td> <tr id="entity-Hfr"><td> <code title="">Hfr;</code> </td> <td> U+0210C </td> <td> <span class="glyph" title="">&#8460;</span> </td> <tr id="entity-HilbertSpace"><td> <code title="">HilbertSpace;</code> </td> <td> U+0210B </td> <td> <span class="glyph" title="">&#8459;</span> </td> <tr id="entity-Hopf"><td> <code title="">Hopf;</code> </td> <td> U+0210D </td> <td> <span class="glyph" title="">&#8461;</span> </td> <tr id="entity-HorizontalLine"><td> <code title="">HorizontalLine;</code> </td> <td> U+02500 </td> <td> <span class="glyph" title="">&#9472;</span> </td> <tr id="entity-Hscr"><td> <code title="">Hscr;</code> </td> <td> U+0210B </td> <td> <span class="glyph" title="">&#8459;</span> </td> <tr id="entity-Hstrok"><td> <code title="">Hstrok;</code> </td> <td> U+00126 </td> <td> <span class="glyph" title="">&#294;</span> </td> <tr id="entity-HumpDownHump"><td> <code title="">HumpDownHump;</code> </td> <td> U+0224E </td> <td> <span class="glyph" title="">&#8782;</span> </td> <tr id="entity-HumpEqual"><td> <code title="">HumpEqual;</code> </td> <td> U+0224F </td> <td> <span class="glyph" title="">&#8783;</span> </td> <tr id="entity-IEcy"><td> <code title="">IEcy;</code> </td> <td> U+00415 </td> <td> <span class="glyph" title="">&#1045;</span> </td> <tr id="entity-IJlig"><td> <code title="">IJlig;</code> </td> <td> U+00132 </td> <td> <span class="glyph" title="">&#306;</span> </td> <tr id="entity-IOcy"><td> <code title="">IOcy;</code> </td> <td> U+00401 </td> <td> <span class="glyph" title="">&#1025;</span> </td> <tr id="entity-Iacute"><td> <code title="">Iacute;</code> </td> <td> U+000CD </td> <td> <span class="glyph" title="">&Iacute;</span> </td> <tr id="entity-Icirc"><td> <code title="">Icirc;</code> </td> <td> U+000CE </td> <td> <span class="glyph" title="">&Icirc;</span> </td> <tr id="entity-Icy"><td> <code title="">Icy;</code> </td> <td> U+00418 </td> <td> <span class="glyph" title="">&#1048;</span> </td> <tr id="entity-Idot"><td> <code title="">Idot;</code> </td> <td> U+00130 </td> <td> <span class="glyph" title="">&#304;</span> </td> <tr id="entity-Ifr"><td> <code title="">Ifr;</code> </td> <td> U+02111 </td> <td> <span class="glyph" title="">&image;</span> </td> <tr id="entity-Igrave"><td> <code title="">Igrave;</code> </td> <td> U+000CC </td> <td> <span class="glyph" title="">&Igrave;</span> </td> <tr id="entity-Im"><td> <code title="">Im;</code> </td> <td> U+02111 </td> <td> <span class="glyph" title="">&image;</span> </td> <tr id="entity-Imacr"><td> <code title="">Imacr;</code> </td> <td> U+0012A </td> <td> <span class="glyph" title="">&#298;</span> </td> <tr id="entity-ImaginaryI"><td> <code title="">ImaginaryI;</code> </td> <td> U+02148 </td> <td> <span class="glyph" title="">&#8520;</span> </td> <tr id="entity-Implies"><td> <code title="">Implies;</code> </td> <td> U+021D2 </td> <td> <span class="glyph" title="">&rArr;</span> </td> <tr id="entity-Int"><td> <code title="">Int;</code> </td> <td> U+0222C </td> <td> <span class="glyph" title="">&#8748;</span> </td> <tr id="entity-Integral"><td> <code title="">Integral;</code> </td> <td> U+0222B </td> <td> <span class="glyph" title="">&int;</span> </td> <tr id="entity-Intersection"><td> <code title="">Intersection;</code> </td> <td> U+022C2 </td> <td> <span class="glyph" title="">&#8898;</span> </td> <tr id="entity-InvisibleComma"><td> <code title="">InvisibleComma;</code> </td> <td> U+02063 </td> <td> <span class="glyph" title="">&#8291;</span> </td> <tr id="entity-InvisibleTimes"><td> <code title="">InvisibleTimes;</code> </td> <td> U+02062 </td> <td> <span class="glyph" title="">&#8290;</span> </td> <tr id="entity-Iogon"><td> <code title="">Iogon;</code> </td> <td> U+0012E </td> <td> <span class="glyph" title="">&#302;</span> </td> <tr id="entity-Iopf"><td> <code title="">Iopf;</code> </td> <td> U+1D540 </td> <td> <span class="glyph" title="">&#120128;</span> </td> <tr id="entity-Iota"><td> <code title="">Iota;</code> </td> <td> U+00399 </td> <td> <span class="glyph" title="">&Iota;</span> </td> <tr id="entity-Iscr"><td> <code title="">Iscr;</code> </td> <td> U+02110 </td> <td> <span class="glyph" title="">&#8464;</span> </td> <tr id="entity-Itilde"><td> <code title="">Itilde;</code> </td> <td> U+00128 </td> <td> <span class="glyph" title="">&#296;</span> </td> <tr id="entity-Iukcy"><td> <code title="">Iukcy;</code> </td> <td> U+00406 </td> <td> <span class="glyph" title="">&#1030;</span> </td> <tr id="entity-Iuml"><td> <code title="">Iuml;</code> </td> <td> U+000CF </td> <td> <span class="glyph" title="">&Iuml;</span> </td> <tr id="entity-Jcirc"><td> <code title="">Jcirc;</code> </td> <td> U+00134 </td> <td> <span class="glyph" title="">&#308;</span> </td> <tr id="entity-Jcy"><td> <code title="">Jcy;</code> </td> <td> U+00419 </td> <td> <span class="glyph" title="">&#1049;</span> </td> <tr id="entity-Jfr"><td> <code title="">Jfr;</code> </td> <td> U+1D50D </td> <td> <span class="glyph" title="">&#120077;</span> </td> <tr id="entity-Jopf"><td> <code title="">Jopf;</code> </td> <td> U+1D541 </td> <td> <span class="glyph" title="">&#120129;</span> </td> <tr id="entity-Jscr"><td> <code title="">Jscr;</code> </td> <td> U+1D4A5 </td> <td> <span class="glyph" title="">&#119973;</span> </td> <tr id="entity-Jsercy"><td> <code title="">Jsercy;</code> </td> <td> U+00408 </td> <td> <span class="glyph" title="">&#1032;</span> </td> <tr id="entity-Jukcy"><td> <code title="">Jukcy;</code> </td> <td> U+00404 </td> <td> <span class="glyph" title="">&#1028;</span> </td> <tr id="entity-KHcy"><td> <code title="">KHcy;</code> </td> <td> U+00425 </td> <td> <span class="glyph" title="">&#1061;</span> </td> <tr id="entity-KJcy"><td> <code title="">KJcy;</code> </td> <td> U+0040C </td> <td> <span class="glyph" title="">&#1036;</span> </td> <tr id="entity-Kappa"><td> <code title="">Kappa;</code> </td> <td> U+0039A </td> <td> <span class="glyph" title="">&Kappa;</span> </td> <tr id="entity-Kcedil"><td> <code title="">Kcedil;</code> </td> <td> U+00136 </td> <td> <span class="glyph" title="">&#310;</span> </td> <tr id="entity-Kcy"><td> <code title="">Kcy;</code> </td> <td> U+0041A </td> <td> <span class="glyph" title="">&#1050;</span> </td> <tr id="entity-Kfr"><td> <code title="">Kfr;</code> </td> <td> U+1D50E </td> <td> <span class="glyph" title="">&#120078;</span> </td> <tr id="entity-Kopf"><td> <code title="">Kopf;</code> </td> <td> U+1D542 </td> <td> <span class="glyph" title="">&#120130;</span> </td> <tr id="entity-Kscr"><td> <code title="">Kscr;</code> </td> <td> U+1D4A6 </td> <td> <span class="glyph" title="">&#119974;</span> </td> <tr id="entity-LJcy"><td> <code title="">LJcy;</code> </td> <td> U+00409 </td> <td> <span class="glyph" title="">&#1033;</span> </td> <tr id="entity-LT"><td> <code title="">LT;</code> </td> <td> U+0003C </td> <td> <span class="glyph" title="">&lt;</span> </td> <tr id="entity-Lacute"><td> <code title="">Lacute;</code> </td> <td> U+00139 </td> <td> <span class="glyph" title="">&#313;</span> </td> <tr id="entity-Lambda"><td> <code title="">Lambda;</code> </td> <td> U+0039B </td> <td> <span class="glyph" title="">&Lambda;</span> </td> <tr id="entity-Lang"><td> <code title="">Lang;</code> </td> <td> U+027EA </td> <td> <span class="glyph" title="">&#10218;</span> </td> <tr id="entity-Laplacetrf"><td> <code title="">Laplacetrf;</code> </td> <td> U+02112 </td> <td> <span class="glyph" title="">&#8466;</span> </td> <tr id="entity-Larr"><td> <code title="">Larr;</code> </td> <td> U+0219E </td> <td> <span class="glyph" title="">&#8606;</span> </td> <tr id="entity-Lcaron"><td> <code title="">Lcaron;</code> </td> <td> U+0013D </td> <td> <span class="glyph" title="">&#317;</span> </td> <tr id="entity-Lcedil"><td> <code title="">Lcedil;</code> </td> <td> U+0013B </td> <td> <span class="glyph" title="">&#315;</span> </td> <tr id="entity-Lcy"><td> <code title="">Lcy;</code> </td> <td> U+0041B </td> <td> <span class="glyph" title="">&#1051;</span> </td> <tr id="entity-LeftAngleBracket"><td> <code title="">LeftAngleBracket;</code> </td> <td> U+027E8 </td> <td> <span class="glyph" title="">&#9001;</span> </td> <tr id="entity-LeftArrow"><td> <code title="">LeftArrow;</code> </td> <td> U+02190 </td> <td> <span class="glyph" title="">&larr;</span> </td> <tr id="entity-LeftArrowBar"><td> <code title="">LeftArrowBar;</code> </td> <td> U+021E4 </td> <td> <span class="glyph" title="">&#8676;</span> </td> <tr id="entity-LeftArrowRightArrow"><td> <code title="">LeftArrowRightArrow;</code> </td> <td> U+021C6 </td> <td> <span class="glyph" title="">&#8646;</span> </td> <tr id="entity-LeftCeiling"><td> <code title="">LeftCeiling;</code> </td> <td> U+02308 </td> <td> <span class="glyph" title="">&lceil;</span> </td> <tr id="entity-LeftDoubleBracket"><td> <code title="">LeftDoubleBracket;</code> </td> <td> U+027E6 </td> <td> <span class="glyph" title="">&#10214;</span> </td> <tr id="entity-LeftDownTeeVector"><td> <code title="">LeftDownTeeVector;</code> </td> <td> U+02961 </td> <td> <span class="glyph" title="">&#10593;</span> </td> <tr id="entity-LeftDownVector"><td> <code title="">LeftDownVector;</code> </td> <td> U+021C3 </td> <td> <span class="glyph" title="">&#8643;</span> </td> <tr id="entity-LeftDownVectorBar"><td> <code title="">LeftDownVectorBar;</code> </td> <td> U+02959 </td> <td> <span class="glyph" title="">&#10585;</span> </td> <tr id="entity-LeftFloor"><td> <code title="">LeftFloor;</code> </td> <td> U+0230A </td> <td> <span class="glyph" title="">&lfloor;</span> </td> <tr id="entity-LeftRightArrow"><td> <code title="">LeftRightArrow;</code> </td> <td> U+02194 </td> <td> <span class="glyph" title="">&harr;</span> </td> <tr id="entity-LeftRightVector"><td> <code title="">LeftRightVector;</code> </td> <td> U+0294E </td> <td> <span class="glyph" title="">&#10574;</span> </td> <tr id="entity-LeftTee"><td> <code title="">LeftTee;</code> </td> <td> U+022A3 </td> <td> <span class="glyph" title="">&#8867;</span> </td> <tr id="entity-LeftTeeArrow"><td> <code title="">LeftTeeArrow;</code> </td> <td> U+021A4 </td> <td> <span class="glyph" title="">&#8612;</span> </td> <tr id="entity-LeftTeeVector"><td> <code title="">LeftTeeVector;</code> </td> <td> U+0295A </td> <td> <span class="glyph" title="">&#10586;</span> </td> <tr id="entity-LeftTriangle"><td> <code title="">LeftTriangle;</code> </td> <td> U+022B2 </td> <td> <span class="glyph" title="">&#8882;</span> </td> <tr id="entity-LeftTriangleBar"><td> <code title="">LeftTriangleBar;</code> </td> <td> U+029CF </td> <td> <span class="glyph" title="">&#10703;</span> </td> <tr id="entity-LeftTriangleEqual"><td> <code title="">LeftTriangleEqual;</code> </td> <td> U+022B4 </td> <td> <span class="glyph" title="">&#8884;</span> </td> <tr id="entity-LeftUpDownVector"><td> <code title="">LeftUpDownVector;</code> </td> <td> U+02951 </td> <td> <span class="glyph" title="">&#10577;</span> </td> <tr id="entity-LeftUpTeeVector"><td> <code title="">LeftUpTeeVector;</code> </td> <td> U+02960 </td> <td> <span class="glyph" title="">&#10592;</span> </td> <tr id="entity-LeftUpVector"><td> <code title="">LeftUpVector;</code> </td> <td> U+021BF </td> <td> <span class="glyph" title="">&#8639;</span> </td> <tr id="entity-LeftUpVectorBar"><td> <code title="">LeftUpVectorBar;</code> </td> <td> U+02958 </td> <td> <span class="glyph" title="">&#10584;</span> </td> <tr id="entity-LeftVector"><td> <code title="">LeftVector;</code> </td> <td> U+021BC </td> <td> <span class="glyph" title="">&#8636;</span> </td> <tr id="entity-LeftVectorBar"><td> <code title="">LeftVectorBar;</code> </td> <td> U+02952 </td> <td> <span class="glyph" title="">&#10578;</span> </td> <tr id="entity-Leftarrow"><td> <code title="">Leftarrow;</code> </td> <td> U+021D0 </td> <td> <span class="glyph" title="">&lArr;</span> </td> <tr id="entity-Leftrightarrow"><td> <code title="">Leftrightarrow;</code> </td> <td> U+021D4 </td> <td> <span class="glyph" title="">&hArr;</span> </td> <tr id="entity-LessEqualGreater"><td> <code title="">LessEqualGreater;</code> </td> <td> U+022DA </td> <td> <span class="glyph" title="">&#8922;</span> </td> <tr id="entity-LessFullEqual"><td> <code title="">LessFullEqual;</code> </td> <td> U+02266 </td> <td> <span class="glyph" title="">&#8806;</span> </td> <tr id="entity-LessGreater"><td> <code title="">LessGreater;</code> </td> <td> U+02276 </td> <td> <span class="glyph" title="">&#8822;</span> </td> <tr id="entity-LessLess"><td> <code title="">LessLess;</code> </td> <td> U+02AA1 </td> <td> <span class="glyph" title="">&#10913;</span> </td> <tr id="entity-LessSlantEqual"><td> <code title="">LessSlantEqual;</code> </td> <td> U+02A7D </td> <td> <span class="glyph" title="">&#10877;</span> </td> <tr id="entity-LessTilde"><td> <code title="">LessTilde;</code> </td> <td> U+02272 </td> <td> <span class="glyph" title="">&#8818;</span> </td> <tr id="entity-Lfr"><td> <code title="">Lfr;</code> </td> <td> U+1D50F </td> <td> <span class="glyph" title="">&#120079;</span> </td> <tr id="entity-Ll"><td> <code title="">Ll;</code> </td> <td> U+022D8 </td> <td> <span class="glyph" title="">&#8920;</span> </td> <tr id="entity-Lleftarrow"><td> <code title="">Lleftarrow;</code> </td> <td> U+021DA </td> <td> <span class="glyph" title="">&#8666;</span> </td> <tr id="entity-Lmidot"><td> <code title="">Lmidot;</code> </td> <td> U+0013F </td> <td> <span class="glyph" title="">&#319;</span> </td> <tr id="entity-LongLeftArrow"><td> <code title="">LongLeftArrow;</code> </td> <td> U+027F5 </td> <td> <span class="glyph" title="">&#10229;</span> </td> <tr id="entity-LongLeftRightArrow"><td> <code title="">LongLeftRightArrow;</code> </td> <td> U+027F7 </td> <td> <span class="glyph" title="">&#10231;</span> </td> <tr id="entity-LongRightArrow"><td> <code title="">LongRightArrow;</code> </td> <td> U+027F6 </td> <td> <span class="glyph" title="">&#10230;</span> </td> <tr id="entity-Longleftarrow"><td> <code title="">Longleftarrow;</code> </td> <td> U+027F8 </td> <td> <span class="glyph" title="">&#10232;</span> </td> <tr id="entity-Longleftrightarrow"><td> <code title="">Longleftrightarrow;</code> </td> <td> U+027FA </td> <td> <span class="glyph" title="">&#10234;</span> </td> <tr id="entity-Longrightarrow"><td> <code title="">Longrightarrow;</code> </td> <td> U+027F9 </td> <td> <span class="glyph" title="">&#10233;</span> </td> <tr id="entity-Lopf"><td> <code title="">Lopf;</code> </td> <td> U+1D543 </td> <td> <span class="glyph" title="">&#120131;</span> </td> <tr id="entity-LowerLeftArrow"><td> <code title="">LowerLeftArrow;</code> </td> <td> U+02199 </td> <td> <span class="glyph" title="">&#8601;</span> </td> <tr id="entity-LowerRightArrow"><td> <code title="">LowerRightArrow;</code> </td> <td> U+02198 </td> <td> <span class="glyph" title="">&#8600;</span> </td> <tr id="entity-Lscr"><td> <code title="">Lscr;</code> </td> <td> U+02112 </td> <td> <span class="glyph" title="">&#8466;</span> </td> <tr id="entity-Lsh"><td> <code title="">Lsh;</code> </td> <td> U+021B0 </td> <td> <span class="glyph" title="">&#8624;</span> </td> <tr id="entity-Lstrok"><td> <code title="">Lstrok;</code> </td> <td> U+00141 </td> <td> <span class="glyph" title="">&#321;</span> </td> <tr id="entity-Lt"><td> <code title="">Lt;</code> </td> <td> U+0226A </td> <td> <span class="glyph" title="">&#8810;</span> </td> <tr id="entity-Map"><td> <code title="">Map;</code> </td> <td> U+02905 </td> <td> <span class="glyph" title="">&#10501;</span> </td> <tr id="entity-Mcy"><td> <code title="">Mcy;</code> </td> <td> U+0041C </td> <td> <span class="glyph" title="">&#1052;</span> </td> <tr id="entity-MediumSpace"><td> <code title="">MediumSpace;</code> </td> <td> U+0205F </td> <td> <span class="glyph" title="">&#8287;</span> </td> <tr id="entity-Mellintrf"><td> <code title="">Mellintrf;</code> </td> <td> U+02133 </td> <td> <span class="glyph" title="">&#8499;</span> </td> <tr id="entity-Mfr"><td> <code title="">Mfr;</code> </td> <td> U+1D510 </td> <td> <span class="glyph" title="">&#120080;</span> </td> <tr id="entity-MinusPlus"><td> <code title="">MinusPlus;</code> </td> <td> U+02213 </td> <td> <span class="glyph" title="">&#8723;</span> </td> <tr id="entity-Mopf"><td> <code title="">Mopf;</code> </td> <td> U+1D544 </td> <td> <span class="glyph" title="">&#120132;</span> </td> <tr id="entity-Mscr"><td> <code title="">Mscr;</code> </td> <td> U+02133 </td> <td> <span class="glyph" title="">&#8499;</span> </td> <tr id="entity-Mu"><td> <code title="">Mu;</code> </td> <td> U+0039C </td> <td> <span class="glyph" title="">&Mu;</span> </td> <tr id="entity-NJcy"><td> <code title="">NJcy;</code> </td> <td> U+0040A </td> <td> <span class="glyph" title="">&#1034;</span> </td> <tr id="entity-Nacute"><td> <code title="">Nacute;</code> </td> <td> U+00143 </td> <td> <span class="glyph" title="">&#323;</span> </td> <tr id="entity-Ncaron"><td> <code title="">Ncaron;</code> </td> <td> U+00147 </td> <td> <span class="glyph" title="">&#327;</span> </td> <tr id="entity-Ncedil"><td> <code title="">Ncedil;</code> </td> <td> U+00145 </td> <td> <span class="glyph" title="">&#325;</span> </td> <tr id="entity-Ncy"><td> <code title="">Ncy;</code> </td> <td> U+0041D </td> <td> <span class="glyph" title="">&#1053;</span> </td> <tr id="entity-NegativeMediumSpace"><td> <code title="">NegativeMediumSpace;</code> </td> <td> U+0200B </td> <td> <span class="glyph" title="">&#8203;</span> </td> <tr id="entity-NegativeThickSpace"><td> <code title="">NegativeThickSpace;</code> </td> <td> U+0200B </td> <td> <span class="glyph" title="">&#8203;</span> </td> <tr id="entity-NegativeThinSpace"><td> <code title="">NegativeThinSpace;</code> </td> <td> U+0200B </td> <td> <span class="glyph" title="">&#8203;</span> </td> <tr id="entity-NegativeVeryThinSpace"><td> <code title="">NegativeVeryThinSpace;</code> </td> <td> U+0200B </td> <td> <span class="glyph" title="">&#8203;</span> </td> <tr id="entity-NestedGreaterGreater"><td> <code title="">NestedGreaterGreater;</code> </td> <td> U+0226B </td> <td> <span class="glyph" title="">&#8811;</span> </td> <tr id="entity-NestedLessLess"><td> <code title="">NestedLessLess;</code> </td> <td> U+0226A </td> <td> <span class="glyph" title="">&#8810;</span> </td> <tr id="entity-NewLine"><td> <code title="">NewLine;</code> </td> <td> U+0000A </td> <td> <span class="glyph control" title="">&#9226;</span> </td> <tr id="entity-Nfr"><td> <code title="">Nfr;</code> </td> <td> U+1D511 </td> <td> <span class="glyph" title="">&#120081;</span> </td> <tr id="entity-NoBreak"><td> <code title="">NoBreak;</code> </td> <td> U+02060 </td> <td> <span class="glyph" title="">&#8288;</span> </td> <tr id="entity-NonBreakingSpace"><td> <code title="">NonBreakingSpace;</code> </td> <td> U+000A0 </td> <td> <span class="glyph" title="">&nbsp;</span> </td> <tr id="entity-Nopf"><td> <code title="">Nopf;</code> </td> <td> U+02115 </td> <td> <span class="glyph" title="">&#8469;</span> </td> <tr id="entity-Not"><td> <code title="">Not;</code> </td> <td> U+02AEC </td> <td> <span class="glyph" title="">&#10988;</span> </td> <tr id="entity-NotCongruent"><td> <code title="">NotCongruent;</code> </td> <td> U+02262 </td> <td> <span class="glyph" title="">&#8802;</span> </td> <tr id="entity-NotCupCap"><td> <code title="">NotCupCap;</code> </td> <td> U+0226D </td> <td> <span class="glyph" title="">&#8813;</span> </td> <tr id="entity-NotDoubleVerticalBar"><td> <code title="">NotDoubleVerticalBar;</code> </td> <td> U+02226 </td> <td> <span class="glyph" title="">&#8742;</span> </td> <tr id="entity-NotElement"><td> <code title="">NotElement;</code> </td> <td> U+02209 </td> <td> <span class="glyph" title="">&notin;</span> </td> <tr id="entity-NotEqual"><td> <code title="">NotEqual;</code> </td> <td> U+02260 </td> <td> <span class="glyph" title="">&ne;</span> </td> <tr id="entity-NotEqualTilde"><td> <code title="">NotEqualTilde;</code> </td> <td> U+02242 U+00338 </td> <td> <span class="glyph compound" title="">&#8770;&#824;</span> </td> <tr id="entity-NotExists"><td> <code title="">NotExists;</code> </td> <td> U+02204 </td> <td> <span class="glyph" title="">&#8708;</span> </td> <tr id="entity-NotGreater"><td> <code title="">NotGreater;</code> </td> <td> U+0226F </td> <td> <span class="glyph" title="">&#8815;</span> </td> <tr id="entity-NotGreaterEqual"><td> <code title="">NotGreaterEqual;</code> </td> <td> U+02271 </td> <td> <span class="glyph" title="">&#8817;</span> </td> <tr id="entity-NotGreaterFullEqual"><td> <code title="">NotGreaterFullEqual;</code> </td> <td> U+02267 U+00338 </td> <td> <span class="glyph compound" title="">&#8807;&#824;</span> </td> <tr id="entity-NotGreaterGreater"><td> <code title="">NotGreaterGreater;</code> </td> <td> U+0226B U+00338 </td> <td> <span class="glyph compound" title="">&#8811;&#824;</span> </td> <tr id="entity-NotGreaterLess"><td> <code title="">NotGreaterLess;</code> </td> <td> U+02279 </td> <td> <span class="glyph" title="">&#8825;</span> </td> <tr id="entity-NotGreaterSlantEqual"><td> <code title="">NotGreaterSlantEqual;</code> </td> <td> U+02A7E U+00338 </td> <td> <span class="glyph compound" title="">&#10878;&#824;</span> </td> <tr id="entity-NotGreaterTilde"><td> <code title="">NotGreaterTilde;</code> </td> <td> U+02275 </td> <td> <span class="glyph" title="">&#8821;</span> </td> <tr id="entity-NotHumpDownHump"><td> <code title="">NotHumpDownHump;</code> </td> <td> U+0224E U+00338 </td> <td> <span class="glyph compound" title="">&#8782;&#824;</span> </td> <tr id="entity-NotHumpEqual"><td> <code title="">NotHumpEqual;</code> </td> <td> U+0224F U+00338 </td> <td> <span class="glyph compound" title="">&#8783;&#824;</span> </td> <tr id="entity-NotLeftTriangle"><td> <code title="">NotLeftTriangle;</code> </td> <td> U+022EA </td> <td> <span class="glyph" title="">&#8938;</span> </td> <tr id="entity-NotLeftTriangleBar"><td> <code title="">NotLeftTriangleBar;</code> </td> <td> U+029CF U+00338 </td> <td> <span class="glyph compound" title="">&#10703;&#824;</span> </td> <tr id="entity-NotLeftTriangleEqual"><td> <code title="">NotLeftTriangleEqual;</code> </td> <td> U+022EC </td> <td> <span class="glyph" title="">&#8940;</span> </td> <tr id="entity-NotLess"><td> <code title="">NotLess;</code> </td> <td> U+0226E </td> <td> <span class="glyph" title="">&#8814;</span> </td> <tr id="entity-NotLessEqual"><td> <code title="">NotLessEqual;</code> </td> <td> U+02270 </td> <td> <span class="glyph" title="">&#8816;</span> </td> <tr id="entity-NotLessGreater"><td> <code title="">NotLessGreater;</code> </td> <td> U+02278 </td> <td> <span class="glyph" title="">&#8824;</span> </td> <tr id="entity-NotLessLess"><td> <code title="">NotLessLess;</code> </td> <td> U+0226A U+00338 </td> <td> <span class="glyph compound" title="">&#8810;&#824;</span> </td> <tr id="entity-NotLessSlantEqual"><td> <code title="">NotLessSlantEqual;</code> </td> <td> U+02A7D U+00338 </td> <td> <span class="glyph compound" title="">&#10877;&#824;</span> </td> <tr id="entity-NotLessTilde"><td> <code title="">NotLessTilde;</code> </td> <td> U+02274 </td> <td> <span class="glyph" title="">&#8820;</span> </td> <tr id="entity-NotNestedGreaterGreater"><td> <code title="">NotNestedGreaterGreater;</code> </td> <td> U+02AA2 U+00338 </td> <td> <span class="glyph compound" title="">&#10914;&#824;</span> </td> <tr id="entity-NotNestedLessLess"><td> <code title="">NotNestedLessLess;</code> </td> <td> U+02AA1 U+00338 </td> <td> <span class="glyph compound" title="">&#10913;&#824;</span> </td> <tr id="entity-NotPrecedes"><td> <code title="">NotPrecedes;</code> </td> <td> U+02280 </td> <td> <span class="glyph" title="">&#8832;</span> </td> <tr id="entity-NotPrecedesEqual"><td> <code title="">NotPrecedesEqual;</code> </td> <td> U+02AAF U+00338 </td> <td> <span class="glyph compound" title="">&#10927;&#824;</span> </td> <tr id="entity-NotPrecedesSlantEqual"><td> <code title="">NotPrecedesSlantEqual;</code> </td> <td> U+022E0 </td> <td> <span class="glyph" title="">&#8928;</span> </td> <tr id="entity-NotReverseElement"><td> <code title="">NotReverseElement;</code> </td> <td> U+0220C </td> <td> <span class="glyph" title="">&#8716;</span> </td> <tr id="entity-NotRightTriangle"><td> <code title="">NotRightTriangle;</code> </td> <td> U+022EB </td> <td> <span class="glyph" title="">&#8939;</span> </td> <tr id="entity-NotRightTriangleBar"><td> <code title="">NotRightTriangleBar;</code> </td> <td> U+029D0 U+00338 </td> <td> <span class="glyph compound" title="">&#10704;&#824;</span> </td> <tr id="entity-NotRightTriangleEqual"><td> <code title="">NotRightTriangleEqual;</code> </td> <td> U+022ED </td> <td> <span class="glyph" title="">&#8941;</span> </td> <tr id="entity-NotSquareSubset"><td> <code title="">NotSquareSubset;</code> </td> <td> U+0228F U+00338 </td> <td> <span class="glyph compound" title="">&#8847;&#824;</span> </td> <tr id="entity-NotSquareSubsetEqual"><td> <code title="">NotSquareSubsetEqual;</code> </td> <td> U+022E2 </td> <td> <span class="glyph" title="">&#8930;</span> </td> <tr id="entity-NotSquareSuperset"><td> <code title="">NotSquareSuperset;</code> </td> <td> U+02290 U+00338 </td> <td> <span class="glyph compound" title="">&#8848;&#824;</span> </td> <tr id="entity-NotSquareSupersetEqual"><td> <code title="">NotSquareSupersetEqual;</code> </td> <td> U+022E3 </td> <td> <span class="glyph" title="">&#8931;</span> </td> <tr id="entity-NotSubset"><td> <code title="">NotSubset;</code> </td> <td> U+02282 U+020D2 </td> <td> <span class="glyph compound" title="">&sub;&#8402;</span> </td> <tr id="entity-NotSubsetEqual"><td> <code title="">NotSubsetEqual;</code> </td> <td> U+02288 </td> <td> <span class="glyph" title="">&#8840;</span> </td> <tr id="entity-NotSucceeds"><td> <code title="">NotSucceeds;</code> </td> <td> U+02281 </td> <td> <span class="glyph" title="">&#8833;</span> </td> <tr id="entity-NotSucceedsEqual"><td> <code title="">NotSucceedsEqual;</code> </td> <td> U+02AB0 U+00338 </td> <td> <span class="glyph compound" title="">&#10928;&#824;</span> </td> <tr id="entity-NotSucceedsSlantEqual"><td> <code title="">NotSucceedsSlantEqual;</code> </td> <td> U+022E1 </td> <td> <span class="glyph" title="">&#8929;</span> </td> <tr id="entity-NotSucceedsTilde"><td> <code title="">NotSucceedsTilde;</code> </td> <td> U+0227F U+00338 </td> <td> <span class="glyph compound" title="">&#8831;&#824;</span> </td> <tr id="entity-NotSuperset"><td> <code title="">NotSuperset;</code> </td> <td> U+02283 U+020D2 </td> <td> <span class="glyph compound" title="">&sup;&#8402;</span> </td> <tr id="entity-NotSupersetEqual"><td> <code title="">NotSupersetEqual;</code> </td> <td> U+02289 </td> <td> <span class="glyph" title="">&#8841;</span> </td> <tr id="entity-NotTilde"><td> <code title="">NotTilde;</code> </td> <td> U+02241 </td> <td> <span class="glyph" title="">&#8769;</span> </td> <tr id="entity-NotTildeEqual"><td> <code title="">NotTildeEqual;</code> </td> <td> U+02244 </td> <td> <span class="glyph" title="">&#8772;</span> </td> <tr id="entity-NotTildeFullEqual"><td> <code title="">NotTildeFullEqual;</code> </td> <td> U+02247 </td> <td> <span class="glyph" title="">&#8775;</span> </td> <tr id="entity-NotTildeTilde"><td> <code title="">NotTildeTilde;</code> </td> <td> U+02249 </td> <td> <span class="glyph" title="">&#8777;</span> </td> <tr id="entity-NotVerticalBar"><td> <code title="">NotVerticalBar;</code> </td> <td> U+02224 </td> <td> <span class="glyph" title="">&#8740;</span> </td> <tr id="entity-Nscr"><td> <code title="">Nscr;</code> </td> <td> U+1D4A9 </td> <td> <span class="glyph" title="">&#119977;</span> </td> <tr id="entity-Ntilde"><td> <code title="">Ntilde;</code> </td> <td> U+000D1 </td> <td> <span class="glyph" title="">&Ntilde;</span> </td> <tr id="entity-Nu"><td> <code title="">Nu;</code> </td> <td> U+0039D </td> <td> <span class="glyph" title="">&Nu;</span> </td> <tr id="entity-OElig"><td> <code title="">OElig;</code> </td> <td> U+00152 </td> <td> <span class="glyph" title="">&OElig;</span> </td> <tr id="entity-Oacute"><td> <code title="">Oacute;</code> </td> <td> U+000D3 </td> <td> <span class="glyph" title="">&Oacute;</span> </td> <tr id="entity-Ocirc"><td> <code title="">Ocirc;</code> </td> <td> U+000D4 </td> <td> <span class="glyph" title="">&Ocirc;</span> </td> <tr id="entity-Ocy"><td> <code title="">Ocy;</code> </td> <td> U+0041E </td> <td> <span class="glyph" title="">&#1054;</span> </td> <tr id="entity-Odblac"><td> <code title="">Odblac;</code> </td> <td> U+00150 </td> <td> <span class="glyph" title="">&#336;</span> </td> <tr id="entity-Ofr"><td> <code title="">Ofr;</code> </td> <td> U+1D512 </td> <td> <span class="glyph" title="">&#120082;</span> </td> <tr id="entity-Ograve"><td> <code title="">Ograve;</code> </td> <td> U+000D2 </td> <td> <span class="glyph" title="">&Ograve;</span> </td> <tr id="entity-Omacr"><td> <code title="">Omacr;</code> </td> <td> U+0014C </td> <td> <span class="glyph" title="">&#332;</span> </td> <tr id="entity-Omega"><td> <code title="">Omega;</code> </td> <td> U+003A9 </td> <td> <span class="glyph" title="">&Omega;</span> </td> <tr id="entity-Omicron"><td> <code title="">Omicron;</code> </td> <td> U+0039F </td> <td> <span class="glyph" title="">&Omicron;</span> </td> <tr id="entity-Oopf"><td> <code title="">Oopf;</code> </td> <td> U+1D546 </td> <td> <span class="glyph" title="">&#120134;</span> </td> <tr id="entity-OpenCurlyDoubleQuote"><td> <code title="">OpenCurlyDoubleQuote;</code> </td> <td> U+0201C </td> <td> <span class="glyph" title="">&ldquo;</span> </td> <tr id="entity-OpenCurlyQuote"><td> <code title="">OpenCurlyQuote;</code> </td> <td> U+02018 </td> <td> <span class="glyph" title="">&lsquo;</span> </td> <tr id="entity-Or"><td> <code title="">Or;</code> </td> <td> U+02A54 </td> <td> <span class="glyph" title="">&#10836;</span> </td> <tr id="entity-Oscr"><td> <code title="">Oscr;</code> </td> <td> U+1D4AA </td> <td> <span class="glyph" title="">&#119978;</span> </td> <tr id="entity-Oslash"><td> <code title="">Oslash;</code> </td> <td> U+000D8 </td> <td> <span class="glyph" title="">&Oslash;</span> </td> <tr id="entity-Otilde"><td> <code title="">Otilde;</code> </td> <td> U+000D5 </td> <td> <span class="glyph" title="">&Otilde;</span> </td> <tr id="entity-Otimes"><td> <code title="">Otimes;</code> </td> <td> U+02A37 </td> <td> <span class="glyph" title="">&#10807;</span> </td> <tr id="entity-Ouml"><td> <code title="">Ouml;</code> </td> <td> U+000D6 </td> <td> <span class="glyph" title="">&Ouml;</span> </td> <tr id="entity-OverBar"><td> <code title="">OverBar;</code> </td> <td> U+0203E </td> <td> <span class="glyph" title="">&oline;</span> </td> <tr id="entity-OverBrace"><td> <code title="">OverBrace;</code> </td> <td> U+023DE </td> <td> <span class="glyph" title="">&#9182;</span> </td> <tr id="entity-OverBracket"><td> <code title="">OverBracket;</code> </td> <td> U+023B4 </td> <td> <span class="glyph" title="">&#9140;</span> </td> <tr id="entity-OverParenthesis"><td> <code title="">OverParenthesis;</code> </td> <td> U+023DC </td> <td> <span class="glyph" title="">&#9180;</span> </td> <tr id="entity-PartialD"><td> <code title="">PartialD;</code> </td> <td> U+02202 </td> <td> <span class="glyph" title="">&part;</span> </td> <tr id="entity-Pcy"><td> <code title="">Pcy;</code> </td> <td> U+0041F </td> <td> <span class="glyph" title="">&#1055;</span> </td> <tr id="entity-Pfr"><td> <code title="">Pfr;</code> </td> <td> U+1D513 </td> <td> <span class="glyph" title="">&#120083;</span> </td> <tr id="entity-Phi"><td> <code title="">Phi;</code> </td> <td> U+003A6 </td> <td> <span class="glyph" title="">&Phi;</span> </td> <tr id="entity-Pi"><td> <code title="">Pi;</code> </td> <td> U+003A0 </td> <td> <span class="glyph" title="">&Pi;</span> </td> <tr id="entity-PlusMinus"><td> <code title="">PlusMinus;</code> </td> <td> U+000B1 </td> <td> <span class="glyph" title="">&plusmn;</span> </td> <tr id="entity-Poincareplane"><td> <code title="">Poincareplane;</code> </td> <td> U+0210C </td> <td> <span class="glyph" title="">&#8460;</span> </td> <tr id="entity-Popf"><td> <code title="">Popf;</code> </td> <td> U+02119 </td> <td> <span class="glyph" title="">&#8473;</span> </td> <tr id="entity-Pr"><td> <code title="">Pr;</code> </td> <td> U+02ABB </td> <td> <span class="glyph" title="">&#10939;</span> </td> <tr id="entity-Precedes"><td> <code title="">Precedes;</code> </td> <td> U+0227A </td> <td> <span class="glyph" title="">&#8826;</span> </td> <tr id="entity-PrecedesEqual"><td> <code title="">PrecedesEqual;</code> </td> <td> U+02AAF </td> <td> <span class="glyph" title="">&#10927;</span> </td> <tr id="entity-PrecedesSlantEqual"><td> <code title="">PrecedesSlantEqual;</code> </td> <td> U+0227C </td> <td> <span class="glyph" title="">&#8828;</span> </td> <tr id="entity-PrecedesTilde"><td> <code title="">PrecedesTilde;</code> </td> <td> U+0227E </td> <td> <span class="glyph" title="">&#8830;</span> </td> <tr id="entity-Prime"><td> <code title="">Prime;</code> </td> <td> U+02033 </td> <td> <span class="glyph" title="">&Prime;</span> </td> <tr id="entity-Product"><td> <code title="">Product;</code> </td> <td> U+0220F </td> <td> <span class="glyph" title="">&prod;</span> </td> <tr id="entity-Proportion"><td> <code title="">Proportion;</code> </td> <td> U+02237 </td> <td> <span class="glyph" title="">&#8759;</span> </td> <tr id="entity-Proportional"><td> <code title="">Proportional;</code> </td> <td> U+0221D </td> <td> <span class="glyph" title="">&prop;</span> </td> <tr id="entity-Pscr"><td> <code title="">Pscr;</code> </td> <td> U+1D4AB </td> <td> <span class="glyph" title="">&#119979;</span> </td> <tr id="entity-Psi"><td> <code title="">Psi;</code> </td> <td> U+003A8 </td> <td> <span class="glyph" title="">&Psi;</span> </td> <tr id="entity-QUOT"><td> <code title="">QUOT;</code> </td> <td> U+00022 </td> <td> <span class="glyph" title="">"</span> </td> <tr id="entity-Qfr"><td> <code title="">Qfr;</code> </td> <td> U+1D514 </td> <td> <span class="glyph" title="">&#120084;</span> </td> <tr id="entity-Qopf"><td> <code title="">Qopf;</code> </td> <td> U+0211A </td> <td> <span class="glyph" title="">&#8474;</span> </td> <tr id="entity-Qscr"><td> <code title="">Qscr;</code> </td> <td> U+1D4AC </td> <td> <span class="glyph" title="">&#119980;</span> </td> <tr id="entity-RBarr"><td> <code title="">RBarr;</code> </td> <td> U+02910 </td> <td> <span class="glyph" title="">&#10512;</span> </td> <tr id="entity-REG"><td> <code title="">REG;</code> </td> <td> U+000AE </td> <td> <span class="glyph" title="">&reg;</span> </td> <tr id="entity-Racute"><td> <code title="">Racute;</code> </td> <td> U+00154 </td> <td> <span class="glyph" title="">&#340;</span> </td> <tr id="entity-Rang"><td> <code title="">Rang;</code> </td> <td> U+027EB </td> <td> <span class="glyph" title="">&#10219;</span> </td> <tr id="entity-Rarr"><td> <code title="">Rarr;</code> </td> <td> U+021A0 </td> <td> <span class="glyph" title="">&#8608;</span> </td> <tr id="entity-Rarrtl"><td> <code title="">Rarrtl;</code> </td> <td> U+02916 </td> <td> <span class="glyph" title="">&#10518;</span> </td> <tr id="entity-Rcaron"><td> <code title="">Rcaron;</code> </td> <td> U+00158 </td> <td> <span class="glyph" title="">&#344;</span> </td> <tr id="entity-Rcedil"><td> <code title="">Rcedil;</code> </td> <td> U+00156 </td> <td> <span class="glyph" title="">&#342;</span> </td> <tr id="entity-Rcy"><td> <code title="">Rcy;</code> </td> <td> U+00420 </td> <td> <span class="glyph" title="">&#1056;</span> </td> <tr id="entity-Re"><td> <code title="">Re;</code> </td> <td> U+0211C </td> <td> <span class="glyph" title="">&real;</span> </td> <tr id="entity-ReverseElement"><td> <code title="">ReverseElement;</code> </td> <td> U+0220B </td> <td> <span class="glyph" title="">&ni;</span> </td> <tr id="entity-ReverseEquilibrium"><td> <code title="">ReverseEquilibrium;</code> </td> <td> U+021CB </td> <td> <span class="glyph" title="">&#8651;</span> </td> <tr id="entity-ReverseUpEquilibrium"><td> <code title="">ReverseUpEquilibrium;</code> </td> <td> U+0296F </td> <td> <span class="glyph" title="">&#10607;</span> </td> <tr id="entity-Rfr"><td> <code title="">Rfr;</code> </td> <td> U+0211C </td> <td> <span class="glyph" title="">&real;</span> </td> <tr id="entity-Rho"><td> <code title="">Rho;</code> </td> <td> U+003A1 </td> <td> <span class="glyph" title="">&Rho;</span> </td> <tr id="entity-RightAngleBracket"><td> <code title="">RightAngleBracket;</code> </td> <td> U+027E9 </td> <td> <span class="glyph" title="">&#9002;</span> </td> <tr id="entity-RightArrow"><td> <code title="">RightArrow;</code> </td> <td> U+02192 </td> <td> <span class="glyph" title="">&rarr;</span> </td> <tr id="entity-RightArrowBar"><td> <code title="">RightArrowBar;</code> </td> <td> U+021E5 </td> <td> <span class="glyph" title="">&#8677;</span> </td> <tr id="entity-RightArrowLeftArrow"><td> <code title="">RightArrowLeftArrow;</code> </td> <td> U+021C4 </td> <td> <span class="glyph" title="">&#8644;</span> </td> <tr id="entity-RightCeiling"><td> <code title="">RightCeiling;</code> </td> <td> U+02309 </td> <td> <span class="glyph" title="">&rceil;</span> </td> <tr id="entity-RightDoubleBracket"><td> <code title="">RightDoubleBracket;</code> </td> <td> U+027E7 </td> <td> <span class="glyph" title="">&#10215;</span> </td> <tr id="entity-RightDownTeeVector"><td> <code title="">RightDownTeeVector;</code> </td> <td> U+0295D </td> <td> <span class="glyph" title="">&#10589;</span> </td> <tr id="entity-RightDownVector"><td> <code title="">RightDownVector;</code> </td> <td> U+021C2 </td> <td> <span class="glyph" title="">&#8642;</span> </td> <tr id="entity-RightDownVectorBar"><td> <code title="">RightDownVectorBar;</code> </td> <td> U+02955 </td> <td> <span class="glyph" title="">&#10581;</span> </td> <tr id="entity-RightFloor"><td> <code title="">RightFloor;</code> </td> <td> U+0230B </td> <td> <span class="glyph" title="">&rfloor;</span> </td> <tr id="entity-RightTee"><td> <code title="">RightTee;</code> </td> <td> U+022A2 </td> <td> <span class="glyph" title="">&#8866;</span> </td> <tr id="entity-RightTeeArrow"><td> <code title="">RightTeeArrow;</code> </td> <td> U+021A6 </td> <td> <span class="glyph" title="">&#8614;</span> </td> <tr id="entity-RightTeeVector"><td> <code title="">RightTeeVector;</code> </td> <td> U+0295B </td> <td> <span class="glyph" title="">&#10587;</span> </td> <tr id="entity-RightTriangle"><td> <code title="">RightTriangle;</code> </td> <td> U+022B3 </td> <td> <span class="glyph" title="">&#8883;</span> </td> <tr id="entity-RightTriangleBar"><td> <code title="">RightTriangleBar;</code> </td> <td> U+029D0 </td> <td> <span class="glyph" title="">&#10704;</span> </td> <tr id="entity-RightTriangleEqual"><td> <code title="">RightTriangleEqual;</code> </td> <td> U+022B5 </td> <td> <span class="glyph" title="">&#8885;</span> </td> <tr id="entity-RightUpDownVector"><td> <code title="">RightUpDownVector;</code> </td> <td> U+0294F </td> <td> <span class="glyph" title="">&#10575;</span> </td> <tr id="entity-RightUpTeeVector"><td> <code title="">RightUpTeeVector;</code> </td> <td> U+0295C </td> <td> <span class="glyph" title="">&#10588;</span> </td> <tr id="entity-RightUpVector"><td> <code title="">RightUpVector;</code> </td> <td> U+021BE </td> <td> <span class="glyph" title="">&#8638;</span> </td> <tr id="entity-RightUpVectorBar"><td> <code title="">RightUpVectorBar;</code> </td> <td> U+02954 </td> <td> <span class="glyph" title="">&#10580;</span> </td> <tr id="entity-RightVector"><td> <code title="">RightVector;</code> </td> <td> U+021C0 </td> <td> <span class="glyph" title="">&#8640;</span> </td> <tr id="entity-RightVectorBar"><td> <code title="">RightVectorBar;</code> </td> <td> U+02953 </td> <td> <span class="glyph" title="">&#10579;</span> </td> <tr id="entity-Rightarrow"><td> <code title="">Rightarrow;</code> </td> <td> U+021D2 </td> <td> <span class="glyph" title="">&rArr;</span> </td> <tr id="entity-Ropf"><td> <code title="">Ropf;</code> </td> <td> U+0211D </td> <td> <span class="glyph" title="">&#8477;</span> </td> <tr id="entity-RoundImplies"><td> <code title="">RoundImplies;</code> </td> <td> U+02970 </td> <td> <span class="glyph" title="">&#10608;</span> </td> <tr id="entity-Rrightarrow"><td> <code title="">Rrightarrow;</code> </td> <td> U+021DB </td> <td> <span class="glyph" title="">&#8667;</span> </td> <tr id="entity-Rscr"><td> <code title="">Rscr;</code> </td> <td> U+0211B </td> <td> <span class="glyph" title="">&#8475;</span> </td> <tr id="entity-Rsh"><td> <code title="">Rsh;</code> </td> <td> U+021B1 </td> <td> <span class="glyph" title="">&#8625;</span> </td> <tr id="entity-RuleDelayed"><td> <code title="">RuleDelayed;</code> </td> <td> U+029F4 </td> <td> <span class="glyph" title="">&#10740;</span> </td> <tr id="entity-SHCHcy"><td> <code title="">SHCHcy;</code> </td> <td> U+00429 </td> <td> <span class="glyph" title="">&#1065;</span> </td> <tr id="entity-SHcy"><td> <code title="">SHcy;</code> </td> <td> U+00428 </td> <td> <span class="glyph" title="">&#1064;</span> </td> <tr id="entity-SOFTcy"><td> <code title="">SOFTcy;</code> </td> <td> U+0042C </td> <td> <span class="glyph" title="">&#1068;</span> </td> <tr id="entity-Sacute"><td> <code title="">Sacute;</code> </td> <td> U+0015A </td> <td> <span class="glyph" title="">&#346;</span> </td> <tr id="entity-Sc"><td> <code title="">Sc;</code> </td> <td> U+02ABC </td> <td> <span class="glyph" title="">&#10940;</span> </td> <tr id="entity-Scaron"><td> <code title="">Scaron;</code> </td> <td> U+00160 </td> <td> <span class="glyph" title="">&Scaron;</span> </td> <tr id="entity-Scedil"><td> <code title="">Scedil;</code> </td> <td> U+0015E </td> <td> <span class="glyph" title="">&#350;</span> </td> <tr id="entity-Scirc"><td> <code title="">Scirc;</code> </td> <td> U+0015C </td> <td> <span class="glyph" title="">&#348;</span> </td> <tr id="entity-Scy"><td> <code title="">Scy;</code> </td> <td> U+00421 </td> <td> <span class="glyph" title="">&#1057;</span> </td> <tr id="entity-Sfr"><td> <code title="">Sfr;</code> </td> <td> U+1D516 </td> <td> <span class="glyph" title="">&#120086;</span> </td> <tr id="entity-ShortDownArrow"><td> <code title="">ShortDownArrow;</code> </td> <td> U+02193 </td> <td> <span class="glyph" title="">&darr;</span> </td> <tr id="entity-ShortLeftArrow"><td> <code title="">ShortLeftArrow;</code> </td> <td> U+02190 </td> <td> <span class="glyph" title="">&larr;</span> </td> <tr id="entity-ShortRightArrow"><td> <code title="">ShortRightArrow;</code> </td> <td> U+02192 </td> <td> <span class="glyph" title="">&rarr;</span> </td> <tr id="entity-ShortUpArrow"><td> <code title="">ShortUpArrow;</code> </td> <td> U+02191 </td> <td> <span class="glyph" title="">&uarr;</span> </td> <tr id="entity-Sigma"><td> <code title="">Sigma;</code> </td> <td> U+003A3 </td> <td> <span class="glyph" title="">&Sigma;</span> </td> <tr id="entity-SmallCircle"><td> <code title="">SmallCircle;</code> </td> <td> U+02218 </td> <td> <span class="glyph" title="">&#8728;</span> </td> <tr id="entity-Sopf"><td> <code title="">Sopf;</code> </td> <td> U+1D54A </td> <td> <span class="glyph" title="">&#120138;</span> </td> <tr id="entity-Sqrt"><td> <code title="">Sqrt;</code> </td> <td> U+0221A </td> <td> <span class="glyph" title="">&radic;</span> </td> <tr id="entity-Square"><td> <code title="">Square;</code> </td> <td> U+025A1 </td> <td> <span class="glyph" title="">&#9633;</span> </td> <tr id="entity-SquareIntersection"><td> <code title="">SquareIntersection;</code> </td> <td> U+02293 </td> <td> <span class="glyph" title="">&#8851;</span> </td> <tr id="entity-SquareSubset"><td> <code title="">SquareSubset;</code> </td> <td> U+0228F </td> <td> <span class="glyph" title="">&#8847;</span> </td> <tr id="entity-SquareSubsetEqual"><td> <code title="">SquareSubsetEqual;</code> </td> <td> U+02291 </td> <td> <span class="glyph" title="">&#8849;</span> </td> <tr id="entity-SquareSuperset"><td> <code title="">SquareSuperset;</code> </td> <td> U+02290 </td> <td> <span class="glyph" title="">&#8848;</span> </td> <tr id="entity-SquareSupersetEqual"><td> <code title="">SquareSupersetEqual;</code> </td> <td> U+02292 </td> <td> <span class="glyph" title="">&#8850;</span> </td> <tr id="entity-SquareUnion"><td> <code title="">SquareUnion;</code> </td> <td> U+02294 </td> <td> <span class="glyph" title="">&#8852;</span> </td> <tr id="entity-Sscr"><td> <code title="">Sscr;</code> </td> <td> U+1D4AE </td> <td> <span class="glyph" title="">&#119982;</span> </td> <tr id="entity-Star"><td> <code title="">Star;</code> </td> <td> U+022C6 </td> <td> <span class="glyph" title="">&#8902;</span> </td> <tr id="entity-Sub"><td> <code title="">Sub;</code> </td> <td> U+022D0 </td> <td> <span class="glyph" title="">&#8912;</span> </td> <tr id="entity-Subset"><td> <code title="">Subset;</code> </td> <td> U+022D0 </td> <td> <span class="glyph" title="">&#8912;</span> </td> <tr id="entity-SubsetEqual"><td> <code title="">SubsetEqual;</code> </td> <td> U+02286 </td> <td> <span class="glyph" title="">&sube;</span> </td> <tr id="entity-Succeeds"><td> <code title="">Succeeds;</code> </td> <td> U+0227B </td> <td> <span class="glyph" title="">&#8827;</span> </td> <tr id="entity-SucceedsEqual"><td> <code title="">SucceedsEqual;</code> </td> <td> U+02AB0 </td> <td> <span class="glyph" title="">&#10928;</span> </td> <tr id="entity-SucceedsSlantEqual"><td> <code title="">SucceedsSlantEqual;</code> </td> <td> U+0227D </td> <td> <span class="glyph" title="">&#8829;</span> </td> <tr id="entity-SucceedsTilde"><td> <code title="">SucceedsTilde;</code> </td> <td> U+0227F </td> <td> <span class="glyph" title="">&#8831;</span> </td> <tr id="entity-SuchThat"><td> <code title="">SuchThat;</code> </td> <td> U+0220B </td> <td> <span class="glyph" title="">&ni;</span> </td> <tr id="entity-Sum"><td> <code title="">Sum;</code> </td> <td> U+02211 </td> <td> <span class="glyph" title="">&sum;</span> </td> <tr id="entity-Sup"><td> <code title="">Sup;</code> </td> <td> U+022D1 </td> <td> <span class="glyph" title="">&#8913;</span> </td> <tr id="entity-Superset"><td> <code title="">Superset;</code> </td> <td> U+02283 </td> <td> <span class="glyph" title="">&sup;</span> </td> <tr id="entity-SupersetEqual"><td> <code title="">SupersetEqual;</code> </td> <td> U+02287 </td> <td> <span class="glyph" title="">&supe;</span> </td> <tr id="entity-Supset"><td> <code title="">Supset;</code> </td> <td> U+022D1 </td> <td> <span class="glyph" title="">&#8913;</span> </td> <tr id="entity-THORN"><td> <code title="">THORN;</code> </td> <td> U+000DE </td> <td> <span class="glyph" title="">&THORN;</span> </td> <tr id="entity-TRADE"><td> <code title="">TRADE;</code> </td> <td> U+02122 </td> <td> <span class="glyph" title="">&trade;</span> </td> <tr id="entity-TSHcy"><td> <code title="">TSHcy;</code> </td> <td> U+0040B </td> <td> <span class="glyph" title="">&#1035;</span> </td> <tr id="entity-TScy"><td> <code title="">TScy;</code> </td> <td> U+00426 </td> <td> <span class="glyph" title="">&#1062;</span> </td> <tr id="entity-Tab"><td> <code title="">Tab;</code> </td> <td> U+00009 </td> <td> <span class="glyph control" title="">&#9225;</span> </td> <tr id="entity-Tau"><td> <code title="">Tau;</code> </td> <td> U+003A4 </td> <td> <span class="glyph" title="">&Tau;</span> </td> <tr id="entity-Tcaron"><td> <code title="">Tcaron;</code> </td> <td> U+00164 </td> <td> <span class="glyph" title="">&#356;</span> </td> <tr id="entity-Tcedil"><td> <code title="">Tcedil;</code> </td> <td> U+00162 </td> <td> <span class="glyph" title="">&#354;</span> </td> <tr id="entity-Tcy"><td> <code title="">Tcy;</code> </td> <td> U+00422 </td> <td> <span class="glyph" title="">&#1058;</span> </td> <tr id="entity-Tfr"><td> <code title="">Tfr;</code> </td> <td> U+1D517 </td> <td> <span class="glyph" title="">&#120087;</span> </td> <tr id="entity-Therefore"><td> <code title="">Therefore;</code> </td> <td> U+02234 </td> <td> <span class="glyph" title="">&there4;</span> </td> <tr id="entity-Theta"><td> <code title="">Theta;</code> </td> <td> U+00398 </td> <td> <span class="glyph" title="">&Theta;</span> </td> <tr id="entity-ThickSpace"><td> <code title="">ThickSpace;</code> </td> <td> U+0205F U+0200A </td> <td> <span class="glyph compound" title="">&#8287;&#8202;</span> </td> <tr id="entity-ThinSpace"><td> <code title="">ThinSpace;</code> </td> <td> U+02009 </td> <td> <span class="glyph" title="">&thinsp;</span> </td> <tr id="entity-Tilde"><td> <code title="">Tilde;</code> </td> <td> U+0223C </td> <td> <span class="glyph" title="">&sim;</span> </td> <tr id="entity-TildeEqual"><td> <code title="">TildeEqual;</code> </td> <td> U+02243 </td> <td> <span class="glyph" title="">&#8771;</span> </td> <tr id="entity-TildeFullEqual"><td> <code title="">TildeFullEqual;</code> </td> <td> U+02245 </td> <td> <span class="glyph" title="">&cong;</span> </td> <tr id="entity-TildeTilde"><td> <code title="">TildeTilde;</code> </td> <td> U+02248 </td> <td> <span class="glyph" title="">&asymp;</span> </td> <tr id="entity-Topf"><td> <code title="">Topf;</code> </td> <td> U+1D54B </td> <td> <span class="glyph" title="">&#120139;</span> </td> <tr id="entity-TripleDot"><td> <code title="">TripleDot;</code> </td> <td> U+020DB </td> <td> <span class="glyph composition" title="">&#9676;&#8411;</span> </td> <tr id="entity-Tscr"><td> <code title="">Tscr;</code> </td> <td> U+1D4AF </td> <td> <span class="glyph" title="">&#119983;</span> </td> <tr id="entity-Tstrok"><td> <code title="">Tstrok;</code> </td> <td> U+00166 </td> <td> <span class="glyph" title="">&#358;</span> </td> <tr id="entity-Uacute"><td> <code title="">Uacute;</code> </td> <td> U+000DA </td> <td> <span class="glyph" title="">&Uacute;</span> </td> <tr id="entity-Uarr"><td> <code title="">Uarr;</code> </td> <td> U+0219F </td> <td> <span class="glyph" title="">&#8607;</span> </td> <tr id="entity-Uarrocir"><td> <code title="">Uarrocir;</code> </td> <td> U+02949 </td> <td> <span class="glyph" title="">&#10569;</span> </td> <tr id="entity-Ubrcy"><td> <code title="">Ubrcy;</code> </td> <td> U+0040E </td> <td> <span class="glyph" title="">&#1038;</span> </td> <tr id="entity-Ubreve"><td> <code title="">Ubreve;</code> </td> <td> U+0016C </td> <td> <span class="glyph" title="">&#364;</span> </td> <tr id="entity-Ucirc"><td> <code title="">Ucirc;</code> </td> <td> U+000DB </td> <td> <span class="glyph" title="">&Ucirc;</span> </td> <tr id="entity-Ucy"><td> <code title="">Ucy;</code> </td> <td> U+00423 </td> <td> <span class="glyph" title="">&#1059;</span> </td> <tr id="entity-Udblac"><td> <code title="">Udblac;</code> </td> <td> U+00170 </td> <td> <span class="glyph" title="">&#368;</span> </td> <tr id="entity-Ufr"><td> <code title="">Ufr;</code> </td> <td> U+1D518 </td> <td> <span class="glyph" title="">&#120088;</span> </td> <tr id="entity-Ugrave"><td> <code title="">Ugrave;</code> </td> <td> U+000D9 </td> <td> <span class="glyph" title="">&Ugrave;</span> </td> <tr id="entity-Umacr"><td> <code title="">Umacr;</code> </td> <td> U+0016A </td> <td> <span class="glyph" title="">&#362;</span> </td> <tr id="entity-UnderBar"><td> <code title="">UnderBar;</code> </td> <td> U+0005F </td> <td> <span class="glyph" title="">_</span> </td> <tr id="entity-UnderBrace"><td> <code title="">UnderBrace;</code> </td> <td> U+023DF </td> <td> <span class="glyph" title="">&#9183;</span> </td> <tr id="entity-UnderBracket"><td> <code title="">UnderBracket;</code> </td> <td> U+023B5 </td> <td> <span class="glyph" title="">&#9141;</span> </td> <tr id="entity-UnderParenthesis"><td> <code title="">UnderParenthesis;</code> </td> <td> U+023DD </td> <td> <span class="glyph" title="">&#9181;</span> </td> <tr id="entity-Union"><td> <code title="">Union;</code> </td> <td> U+022C3 </td> <td> <span class="glyph" title="">&#8899;</span> </td> <tr id="entity-UnionPlus"><td> <code title="">UnionPlus;</code> </td> <td> U+0228E </td> <td> <span class="glyph" title="">&#8846;</span> </td> <tr id="entity-Uogon"><td> <code title="">Uogon;</code> </td> <td> U+00172 </td> <td> <span class="glyph" title="">&#370;</span> </td> <tr id="entity-Uopf"><td> <code title="">Uopf;</code> </td> <td> U+1D54C </td> <td> <span class="glyph" title="">&#120140;</span> </td> <tr id="entity-UpArrow"><td> <code title="">UpArrow;</code> </td> <td> U+02191 </td> <td> <span class="glyph" title="">&uarr;</span> </td> <tr id="entity-UpArrowBar"><td> <code title="">UpArrowBar;</code> </td> <td> U+02912 </td> <td> <span class="glyph" title="">&#10514;</span> </td> <tr id="entity-UpArrowDownArrow"><td> <code title="">UpArrowDownArrow;</code> </td> <td> U+021C5 </td> <td> <span class="glyph" title="">&#8645;</span> </td> <tr id="entity-UpDownArrow"><td> <code title="">UpDownArrow;</code> </td> <td> U+02195 </td> <td> <span class="glyph" title="">&#8597;</span> </td> <tr id="entity-UpEquilibrium"><td> <code title="">UpEquilibrium;</code> </td> <td> U+0296E </td> <td> <span class="glyph" title="">&#10606;</span> </td> <tr id="entity-UpTee"><td> <code title="">UpTee;</code> </td> <td> U+022A5 </td> <td> <span class="glyph" title="">&perp;</span> </td> <tr id="entity-UpTeeArrow"><td> <code title="">UpTeeArrow;</code> </td> <td> U+021A5 </td> <td> <span class="glyph" title="">&#8613;</span> </td> <tr id="entity-Uparrow"><td> <code title="">Uparrow;</code> </td> <td> U+021D1 </td> <td> <span class="glyph" title="">&uArr;</span> </td> <tr id="entity-Updownarrow"><td> <code title="">Updownarrow;</code> </td> <td> U+021D5 </td> <td> <span class="glyph" title="">&#8661;</span> </td> <tr id="entity-UpperLeftArrow"><td> <code title="">UpperLeftArrow;</code> </td> <td> U+02196 </td> <td> <span class="glyph" title="">&#8598;</span> </td> <tr id="entity-UpperRightArrow"><td> <code title="">UpperRightArrow;</code> </td> <td> U+02197 </td> <td> <span class="glyph" title="">&#8599;</span> </td> <tr id="entity-Upsi"><td> <code title="">Upsi;</code> </td> <td> U+003D2 </td> <td> <span class="glyph" title="">&upsih;</span> </td> <tr id="entity-Upsilon"><td> <code title="">Upsilon;</code> </td> <td> U+003A5 </td> <td> <span class="glyph" title="">&Upsilon;</span> </td> <tr id="entity-Uring"><td> <code title="">Uring;</code> </td> <td> U+0016E </td> <td> <span class="glyph" title="">&#366;</span> </td> <tr id="entity-Uscr"><td> <code title="">Uscr;</code> </td> <td> U+1D4B0 </td> <td> <span class="glyph" title="">&#119984;</span> </td> <tr id="entity-Utilde"><td> <code title="">Utilde;</code> </td> <td> U+00168 </td> <td> <span class="glyph" title="">&#360;</span> </td> <tr id="entity-Uuml"><td> <code title="">Uuml;</code> </td> <td> U+000DC </td> <td> <span class="glyph" title="">&Uuml;</span> </td> <tr id="entity-VDash"><td> <code title="">VDash;</code> </td> <td> U+022AB </td> <td> <span class="glyph" title="">&#8875;</span> </td> <tr id="entity-Vbar"><td> <code title="">Vbar;</code> </td> <td> U+02AEB </td> <td> <span class="glyph" title="">&#10987;</span> </td> <tr id="entity-Vcy"><td> <code title="">Vcy;</code> </td> <td> U+00412 </td> <td> <span class="glyph" title="">&#1042;</span> </td> <tr id="entity-Vdash"><td> <code title="">Vdash;</code> </td> <td> U+022A9 </td> <td> <span class="glyph" title="">&#8873;</span> </td> <tr id="entity-Vdashl"><td> <code title="">Vdashl;</code> </td> <td> U+02AE6 </td> <td> <span class="glyph" title="">&#10982;</span> </td> <tr id="entity-Vee"><td> <code title="">Vee;</code> </td> <td> U+022C1 </td> <td> <span class="glyph" title="">&#8897;</span> </td> <tr id="entity-Verbar"><td> <code title="">Verbar;</code> </td> <td> U+02016 </td> <td> <span class="glyph" title="">&#8214;</span> </td> <tr id="entity-Vert"><td> <code title="">Vert;</code> </td> <td> U+02016 </td> <td> <span class="glyph" title="">&#8214;</span> </td> <tr id="entity-VerticalBar"><td> <code title="">VerticalBar;</code> </td> <td> U+02223 </td> <td> <span class="glyph" title="">&#8739;</span> </td> <tr id="entity-VerticalLine"><td> <code title="">VerticalLine;</code> </td> <td> U+0007C </td> <td> <span class="glyph" title="">|</span> </td> <tr id="entity-VerticalSeparator"><td> <code title="">VerticalSeparator;</code> </td> <td> U+02758 </td> <td> <span class="glyph" title="">&#10072;</span> </td> <tr id="entity-VerticalTilde"><td> <code title="">VerticalTilde;</code> </td> <td> U+02240 </td> <td> <span class="glyph" title="">&#8768;</span> </td> <tr id="entity-VeryThinSpace"><td> <code title="">VeryThinSpace;</code> </td> <td> U+0200A </td> <td> <span class="glyph" title="">&#8202;</span> </td> <tr id="entity-Vfr"><td> <code title="">Vfr;</code> </td> <td> U+1D519 </td> <td> <span class="glyph" title="">&#120089;</span> </td> <tr id="entity-Vopf"><td> <code title="">Vopf;</code> </td> <td> U+1D54D </td> <td> <span class="glyph" title="">&#120141;</span> </td> <tr id="entity-Vscr"><td> <code title="">Vscr;</code> </td> <td> U+1D4B1 </td> <td> <span class="glyph" title="">&#119985;</span> </td> <tr id="entity-Vvdash"><td> <code title="">Vvdash;</code> </td> <td> U+022AA </td> <td> <span class="glyph" title="">&#8874;</span> </td> <tr id="entity-Wcirc"><td> <code title="">Wcirc;</code> </td> <td> U+00174 </td> <td> <span class="glyph" title="">&#372;</span> </td> <tr id="entity-Wedge"><td> <code title="">Wedge;</code> </td> <td> U+022C0 </td> <td> <span class="glyph" title="">&#8896;</span> </td> <tr id="entity-Wfr"><td> <code title="">Wfr;</code> </td> <td> U+1D51A </td> <td> <span class="glyph" title="">&#120090;</span> </td> <tr id="entity-Wopf"><td> <code title="">Wopf;</code> </td> <td> U+1D54E </td> <td> <span class="glyph" title="">&#120142;</span> </td> <tr id="entity-Wscr"><td> <code title="">Wscr;</code> </td> <td> U+1D4B2 </td> <td> <span class="glyph" title="">&#119986;</span> </td> <tr id="entity-Xfr"><td> <code title="">Xfr;</code> </td> <td> U+1D51B </td> <td> <span class="glyph" title="">&#120091;</span> </td> <tr id="entity-Xi"><td> <code title="">Xi;</code> </td> <td> U+0039E </td> <td> <span class="glyph" title="">&Xi;</span> </td> <tr id="entity-Xopf"><td> <code title="">Xopf;</code> </td> <td> U+1D54F </td> <td> <span class="glyph" title="">&#120143;</span> </td> <tr id="entity-Xscr"><td> <code title="">Xscr;</code> </td> <td> U+1D4B3 </td> <td> <span class="glyph" title="">&#119987;</span> </td> <tr id="entity-YAcy"><td> <code title="">YAcy;</code> </td> <td> U+0042F </td> <td> <span class="glyph" title="">&#1071;</span> </td> <tr id="entity-YIcy"><td> <code title="">YIcy;</code> </td> <td> U+00407 </td> <td> <span class="glyph" title="">&#1031;</span> </td> <tr id="entity-YUcy"><td> <code title="">YUcy;</code> </td> <td> U+0042E </td> <td> <span class="glyph" title="">&#1070;</span> </td> <tr id="entity-Yacute"><td> <code title="">Yacute;</code> </td> <td> U+000DD </td> <td> <span class="glyph" title="">&Yacute;</span> </td> <tr id="entity-Ycirc"><td> <code title="">Ycirc;</code> </td> <td> U+00176 </td> <td> <span class="glyph" title="">&#374;</span> </td> <tr id="entity-Ycy"><td> <code title="">Ycy;</code> </td> <td> U+0042B </td> <td> <span class="glyph" title="">&#1067;</span> </td> <tr id="entity-Yfr"><td> <code title="">Yfr;</code> </td> <td> U+1D51C </td> <td> <span class="glyph" title="">&#120092;</span> </td> <tr id="entity-Yopf"><td> <code title="">Yopf;</code> </td> <td> U+1D550 </td> <td> <span class="glyph" title="">&#120144;</span> </td> <tr id="entity-Yscr"><td> <code title="">Yscr;</code> </td> <td> U+1D4B4 </td> <td> <span class="glyph" title="">&#119988;</span> </td> <tr id="entity-Yuml"><td> <code title="">Yuml;</code> </td> <td> U+00178 </td> <td> <span class="glyph" title="">&Yuml;</span> </td> <tr id="entity-ZHcy"><td> <code title="">ZHcy;</code> </td> <td> U+00416 </td> <td> <span class="glyph" title="">&#1046;</span> </td> <tr id="entity-Zacute"><td> <code title="">Zacute;</code> </td> <td> U+00179 </td> <td> <span class="glyph" title="">&#377;</span> </td> <tr id="entity-Zcaron"><td> <code title="">Zcaron;</code> </td> <td> U+0017D </td> <td> <span class="glyph" title="">&#381;</span> </td> <tr id="entity-Zcy"><td> <code title="">Zcy;</code> </td> <td> U+00417 </td> <td> <span class="glyph" title="">&#1047;</span> </td> <tr id="entity-Zdot"><td> <code title="">Zdot;</code> </td> <td> U+0017B </td> <td> <span class="glyph" title="">&#379;</span> </td> <tr id="entity-ZeroWidthSpace"><td> <code title="">ZeroWidthSpace;</code> </td> <td> U+0200B </td> <td> <span class="glyph" title="">&#8203;</span> </td> <tr id="entity-Zeta"><td> <code title="">Zeta;</code> </td> <td> U+00396 </td> <td> <span class="glyph" title="">&Zeta;</span> </td> <tr id="entity-Zfr"><td> <code title="">Zfr;</code> </td> <td> U+02128 </td> <td> <span class="glyph" title="">&#8488;</span> </td> <tr id="entity-Zopf"><td> <code title="">Zopf;</code> </td> <td> U+02124 </td> <td> <span class="glyph" title="">&#8484;</span> </td> <tr id="entity-Zscr"><td> <code title="">Zscr;</code> </td> <td> U+1D4B5 </td> <td> <span class="glyph" title="">&#119989;</span> </td> <tr id="entity-aacute"><td> <code title="">aacute;</code> </td> <td> U+000E1 </td> <td> <span class="glyph" title="">&aacute;</span> </td> <tr id="entity-abreve"><td> <code title="">abreve;</code> </td> <td> U+00103 </td> <td> <span class="glyph" title="">&#259;</span> </td> <tr id="entity-ac"><td> <code title="">ac;</code> </td> <td> U+0223E </td> <td> <span class="glyph" title="">&#8766;</span> </td> <tr id="entity-acE"><td> <code title="">acE;</code> </td> <td> U+0223E U+00333 </td> <td> <span class="glyph compound" title="">&#8766;&#819;</span> </td> <tr id="entity-acd"><td> <code title="">acd;</code> </td> <td> U+0223F </td> <td> <span class="glyph" title="">&#8767;</span> </td> <tr id="entity-acirc"><td> <code title="">acirc;</code> </td> <td> U+000E2 </td> <td> <span class="glyph" title="">&acirc;</span> </td> <tr id="entity-acute"><td> <code title="">acute;</code> </td> <td> U+000B4 </td> <td> <span class="glyph" title="">&acute;</span> </td> <tr id="entity-acy"><td> <code title="">acy;</code> </td> <td> U+00430 </td> <td> <span class="glyph" title="">&#1072;</span> </td> <tr id="entity-aelig"><td> <code title="">aelig;</code> </td> <td> U+000E6 </td> <td> <span class="glyph" title="">&aelig;</span> </td> <tr id="entity-af"><td> <code title="">af;</code> </td> <td> U+02061 </td> <td> <span class="glyph" title="">&#8289;</span> </td> <tr id="entity-afr"><td> <code title="">afr;</code> </td> <td> U+1D51E </td> <td> <span class="glyph" title="">&#120094;</span> </td> <tr id="entity-agrave"><td> <code title="">agrave;</code> </td> <td> U+000E0 </td> <td> <span class="glyph" title="">&agrave;</span> </td> <tr id="entity-alefsym"><td> <code title="">alefsym;</code> </td> <td> U+02135 </td> <td> <span class="glyph" title="">&alefsym;</span> </td> <tr id="entity-aleph"><td> <code title="">aleph;</code> </td> <td> U+02135 </td> <td> <span class="glyph" title="">&alefsym;</span> </td> <tr id="entity-alpha"><td> <code title="">alpha;</code> </td> <td> U+003B1 </td> <td> <span class="glyph" title="">&alpha;</span> </td> <tr id="entity-amacr"><td> <code title="">amacr;</code> </td> <td> U+00101 </td> <td> <span class="glyph" title="">&#257;</span> </td> <tr id="entity-amalg"><td> <code title="">amalg;</code> </td> <td> U+02A3F </td> <td> <span class="glyph" title="">&#10815;</span> </td> <tr id="entity-amp"><td> <code title="">amp;</code> </td> <td> U+00026 </td> <td> <span class="glyph" title="">&amp;</span> </td> <tr id="entity-and"><td> <code title="">and;</code> </td> <td> U+02227 </td> <td> <span class="glyph" title="">&and;</span> </td> <tr id="entity-andand"><td> <code title="">andand;</code> </td> <td> U+02A55 </td> <td> <span class="glyph" title="">&#10837;</span> </td> <tr id="entity-andd"><td> <code title="">andd;</code> </td> <td> U+02A5C </td> <td> <span class="glyph" title="">&#10844;</span> </td> <tr id="entity-andslope"><td> <code title="">andslope;</code> </td> <td> U+02A58 </td> <td> <span class="glyph" title="">&#10840;</span> </td> <tr id="entity-andv"><td> <code title="">andv;</code> </td> <td> U+02A5A </td> <td> <span class="glyph" title="">&#10842;</span> </td> <tr id="entity-ang"><td> <code title="">ang;</code> </td> <td> U+02220 </td> <td> <span class="glyph" title="">&ang;</span> </td> <tr id="entity-ange"><td> <code title="">ange;</code> </td> <td> U+029A4 </td> <td> <span class="glyph" title="">&#10660;</span> </td> <tr id="entity-angle"><td> <code title="">angle;</code> </td> <td> U+02220 </td> <td> <span class="glyph" title="">&ang;</span> </td> <tr id="entity-angmsd"><td> <code title="">angmsd;</code> </td> <td> U+02221 </td> <td> <span class="glyph" title="">&#8737;</span> </td> <tr id="entity-angmsdaa"><td> <code title="">angmsdaa;</code> </td> <td> U+029A8 </td> <td> <span class="glyph" title="">&#10664;</span> </td> <tr id="entity-angmsdab"><td> <code title="">angmsdab;</code> </td> <td> U+029A9 </td> <td> <span class="glyph" title="">&#10665;</span> </td> <tr id="entity-angmsdac"><td> <code title="">angmsdac;</code> </td> <td> U+029AA </td> <td> <span class="glyph" title="">&#10666;</span> </td> <tr id="entity-angmsdad"><td> <code title="">angmsdad;</code> </td> <td> U+029AB </td> <td> <span class="glyph" title="">&#10667;</span> </td> <tr id="entity-angmsdae"><td> <code title="">angmsdae;</code> </td> <td> U+029AC </td> <td> <span class="glyph" title="">&#10668;</span> </td> <tr id="entity-angmsdaf"><td> <code title="">angmsdaf;</code> </td> <td> U+029AD </td> <td> <span class="glyph" title="">&#10669;</span> </td> <tr id="entity-angmsdag"><td> <code title="">angmsdag;</code> </td> <td> U+029AE </td> <td> <span class="glyph" title="">&#10670;</span> </td> <tr id="entity-angmsdah"><td> <code title="">angmsdah;</code> </td> <td> U+029AF </td> <td> <span class="glyph" title="">&#10671;</span> </td> <tr id="entity-angrt"><td> <code title="">angrt;</code> </td> <td> U+0221F </td> <td> <span class="glyph" title="">&#8735;</span> </td> <tr id="entity-angrtvb"><td> <code title="">angrtvb;</code> </td> <td> U+022BE </td> <td> <span class="glyph" title="">&#8894;</span> </td> <tr id="entity-angrtvbd"><td> <code title="">angrtvbd;</code> </td> <td> U+0299D </td> <td> <span class="glyph" title="">&#10653;</span> </td> <tr id="entity-angsph"><td> <code title="">angsph;</code> </td> <td> U+02222 </td> <td> <span class="glyph" title="">&#8738;</span> </td> <tr id="entity-angst"><td> <code title="">angst;</code> </td> <td> U+000C5 </td> <td> <span class="glyph" title="">&Aring;</span> </td> <tr id="entity-angzarr"><td> <code title="">angzarr;</code> </td> <td> U+0237C </td> <td> <span class="glyph" title="">&#9084;</span> </td> <tr id="entity-aogon"><td> <code title="">aogon;</code> </td> <td> U+00105 </td> <td> <span class="glyph" title="">&#261;</span> </td> <tr id="entity-aopf"><td> <code title="">aopf;</code> </td> <td> U+1D552 </td> <td> <span class="glyph" title="">&#120146;</span> </td> <tr id="entity-ap"><td> <code title="">ap;</code> </td> <td> U+02248 </td> <td> <span class="glyph" title="">&asymp;</span> </td> <tr id="entity-apE"><td> <code title="">apE;</code> </td> <td> U+02A70 </td> <td> <span class="glyph" title="">&#10864;</span> </td> <tr id="entity-apacir"><td> <code title="">apacir;</code> </td> <td> U+02A6F </td> <td> <span class="glyph" title="">&#10863;</span> </td> <tr id="entity-ape"><td> <code title="">ape;</code> </td> <td> U+0224A </td> <td> <span class="glyph" title="">&#8778;</span> </td> <tr id="entity-apid"><td> <code title="">apid;</code> </td> <td> U+0224B </td> <td> <span class="glyph" title="">&#8779;</span> </td> <tr id="entity-apos"><td> <code title="">apos;</code> </td> <td> U+00027 </td> <td> <span class="glyph" title="">'</span> </td> <tr id="entity-approx"><td> <code title="">approx;</code> </td> <td> U+02248 </td> <td> <span class="glyph" title="">&asymp;</span> </td> <tr id="entity-approxeq"><td> <code title="">approxeq;</code> </td> <td> U+0224A </td> <td> <span class="glyph" title="">&#8778;</span> </td> <tr id="entity-aring"><td> <code title="">aring;</code> </td> <td> U+000E5 </td> <td> <span class="glyph" title="">&aring;</span> </td> <tr id="entity-ascr"><td> <code title="">ascr;</code> </td> <td> U+1D4B6 </td> <td> <span class="glyph" title="">&#119990;</span> </td> <tr id="entity-ast"><td> <code title="">ast;</code> </td> <td> U+0002A </td> <td> <span class="glyph" title="">*</span> </td> <tr id="entity-asymp"><td> <code title="">asymp;</code> </td> <td> U+02248 </td> <td> <span class="glyph" title="">&asymp;</span> </td> <tr id="entity-asympeq"><td> <code title="">asympeq;</code> </td> <td> U+0224D </td> <td> <span class="glyph" title="">&#8781;</span> </td> <tr id="entity-atilde"><td> <code title="">atilde;</code> </td> <td> U+000E3 </td> <td> <span class="glyph" title="">&atilde;</span> </td> <tr id="entity-auml"><td> <code title="">auml;</code> </td> <td> U+000E4 </td> <td> <span class="glyph" title="">&auml;</span> </td> <tr id="entity-awconint"><td> <code title="">awconint;</code> </td> <td> U+02233 </td> <td> <span class="glyph" title="">&#8755;</span> </td> <tr id="entity-awint"><td> <code title="">awint;</code> </td> <td> U+02A11 </td> <td> <span class="glyph" title="">&#10769;</span> </td> <tr id="entity-bNot"><td> <code title="">bNot;</code> </td> <td> U+02AED </td> <td> <span class="glyph" title="">&#10989;</span> </td> <tr id="entity-backcong"><td> <code title="">backcong;</code> </td> <td> U+0224C </td> <td> <span class="glyph" title="">&#8780;</span> </td> <tr id="entity-backepsilon"><td> <code title="">backepsilon;</code> </td> <td> U+003F6 </td> <td> <span class="glyph" title="">&#1014;</span> </td> <tr id="entity-backprime"><td> <code title="">backprime;</code> </td> <td> U+02035 </td> <td> <span class="glyph" title="">&#8245;</span> </td> <tr id="entity-backsim"><td> <code title="">backsim;</code> </td> <td> U+0223D </td> <td> <span class="glyph" title="">&#8765;</span> </td> <tr id="entity-backsimeq"><td> <code title="">backsimeq;</code> </td> <td> U+022CD </td> <td> <span class="glyph" title="">&#8909;</span> </td> <tr id="entity-barvee"><td> <code title="">barvee;</code> </td> <td> U+022BD </td> <td> <span class="glyph" title="">&#8893;</span> </td> <tr id="entity-barwed"><td> <code title="">barwed;</code> </td> <td> U+02305 </td> <td> <span class="glyph" title="">&#8965;</span> </td> <tr id="entity-barwedge"><td> <code title="">barwedge;</code> </td> <td> U+02305 </td> <td> <span class="glyph" title="">&#8965;</span> </td> <tr id="entity-bbrk"><td> <code title="">bbrk;</code> </td> <td> U+023B5 </td> <td> <span class="glyph" title="">&#9141;</span> </td> <tr id="entity-bbrktbrk"><td> <code title="">bbrktbrk;</code> </td> <td> U+023B6 </td> <td> <span class="glyph" title="">&#9142;</span> </td> <tr id="entity-bcong"><td> <code title="">bcong;</code> </td> <td> U+0224C </td> <td> <span class="glyph" title="">&#8780;</span> </td> <tr id="entity-bcy"><td> <code title="">bcy;</code> </td> <td> U+00431 </td> <td> <span class="glyph" title="">&#1073;</span> </td> <tr id="entity-bdquo"><td> <code title="">bdquo;</code> </td> <td> U+0201E </td> <td> <span class="glyph" title="">&bdquo;</span> </td> <tr id="entity-becaus"><td> <code title="">becaus;</code> </td> <td> U+02235 </td> <td> <span class="glyph" title="">&#8757;</span> </td> <tr id="entity-because"><td> <code title="">because;</code> </td> <td> U+02235 </td> <td> <span class="glyph" title="">&#8757;</span> </td> <tr id="entity-bemptyv"><td> <code title="">bemptyv;</code> </td> <td> U+029B0 </td> <td> <span class="glyph" title="">&#10672;</span> </td> <tr id="entity-bepsi"><td> <code title="">bepsi;</code> </td> <td> U+003F6 </td> <td> <span class="glyph" title="">&#1014;</span> </td> <tr id="entity-bernou"><td> <code title="">bernou;</code> </td> <td> U+0212C </td> <td> <span class="glyph" title="">&#8492;</span> </td> <tr id="entity-beta"><td> <code title="">beta;</code> </td> <td> U+003B2 </td> <td> <span class="glyph" title="">&beta;</span> </td> <tr id="entity-beth"><td> <code title="">beth;</code> </td> <td> U+02136 </td> <td> <span class="glyph" title="">&#8502;</span> </td> <tr id="entity-between"><td> <code title="">between;</code> </td> <td> U+0226C </td> <td> <span class="glyph" title="">&#8812;</span> </td> <tr id="entity-bfr"><td> <code title="">bfr;</code> </td> <td> U+1D51F </td> <td> <span class="glyph" title="">&#120095;</span> </td> <tr id="entity-bigcap"><td> <code title="">bigcap;</code> </td> <td> U+022C2 </td> <td> <span class="glyph" title="">&#8898;</span> </td> <tr id="entity-bigcirc"><td> <code title="">bigcirc;</code> </td> <td> U+025EF </td> <td> <span class="glyph" title="">&#9711;</span> </td> <tr id="entity-bigcup"><td> <code title="">bigcup;</code> </td> <td> U+022C3 </td> <td> <span class="glyph" title="">&#8899;</span> </td> <tr id="entity-bigodot"><td> <code title="">bigodot;</code> </td> <td> U+02A00 </td> <td> <span class="glyph" title="">&#10752;</span> </td> <tr id="entity-bigoplus"><td> <code title="">bigoplus;</code> </td> <td> U+02A01 </td> <td> <span class="glyph" title="">&#10753;</span> </td> <tr id="entity-bigotimes"><td> <code title="">bigotimes;</code> </td> <td> U+02A02 </td> <td> <span class="glyph" title="">&#10754;</span> </td> <tr id="entity-bigsqcup"><td> <code title="">bigsqcup;</code> </td> <td> U+02A06 </td> <td> <span class="glyph" title="">&#10758;</span> </td> <tr id="entity-bigstar"><td> <code title="">bigstar;</code> </td> <td> U+02605 </td> <td> <span class="glyph" title="">&#9733;</span> </td> <tr id="entity-bigtriangledown"><td> <code title="">bigtriangledown;</code> </td> <td> U+025BD </td> <td> <span class="glyph" title="">&#9661;</span> </td> <tr id="entity-bigtriangleup"><td> <code title="">bigtriangleup;</code> </td> <td> U+025B3 </td> <td> <span class="glyph" title="">&#9651;</span> </td> <tr id="entity-biguplus"><td> <code title="">biguplus;</code> </td> <td> U+02A04 </td> <td> <span class="glyph" title="">&#10756;</span> </td> <tr id="entity-bigvee"><td> <code title="">bigvee;</code> </td> <td> U+022C1 </td> <td> <span class="glyph" title="">&#8897;</span> </td> <tr id="entity-bigwedge"><td> <code title="">bigwedge;</code> </td> <td> U+022C0 </td> <td> <span class="glyph" title="">&#8896;</span> </td> <tr id="entity-bkarow"><td> <code title="">bkarow;</code> </td> <td> U+0290D </td> <td> <span class="glyph" title="">&#10509;</span> </td> <tr id="entity-blacklozenge"><td> <code title="">blacklozenge;</code> </td> <td> U+029EB </td> <td> <span class="glyph" title="">&#10731;</span> </td> <tr id="entity-blacksquare"><td> <code title="">blacksquare;</code> </td> <td> U+025AA </td> <td> <span class="glyph" title="">&#9642;</span> </td> <tr id="entity-blacktriangle"><td> <code title="">blacktriangle;</code> </td> <td> U+025B4 </td> <td> <span class="glyph" title="">&#9652;</span> </td> <tr id="entity-blacktriangledown"><td> <code title="">blacktriangledown;</code> </td> <td> U+025BE </td> <td> <span class="glyph" title="">&#9662;</span> </td> <tr id="entity-blacktriangleleft"><td> <code title="">blacktriangleleft;</code> </td> <td> U+025C2 </td> <td> <span class="glyph" title="">&#9666;</span> </td> <tr id="entity-blacktriangleright"><td> <code title="">blacktriangleright;</code> </td> <td> U+025B8 </td> <td> <span class="glyph" title="">&#9656;</span> </td> <tr id="entity-blank"><td> <code title="">blank;</code> </td> <td> U+02423 </td> <td> <span class="glyph" title="">&#9251;</span> </td> <tr id="entity-blk12"><td> <code title="">blk12;</code> </td> <td> U+02592 </td> <td> <span class="glyph" title="">&#9618;</span> </td> <tr id="entity-blk14"><td> <code title="">blk14;</code> </td> <td> U+02591 </td> <td> <span class="glyph" title="">&#9617;</span> </td> <tr id="entity-blk34"><td> <code title="">blk34;</code> </td> <td> U+02593 </td> <td> <span class="glyph" title="">&#9619;</span> </td> <tr id="entity-block"><td> <code title="">block;</code> </td> <td> U+02588 </td> <td> <span class="glyph" title="">&#9608;</span> </td> <tr id="entity-bne"><td> <code title="">bne;</code> </td> <td> U+0003D U+020E5 </td> <td> <span class="glyph compound" title="">=&#8421;</span> </td> <tr id="entity-bnequiv"><td> <code title="">bnequiv;</code> </td> <td> U+02261 U+020E5 </td> <td> <span class="glyph compound" title="">&equiv;&#8421;</span> </td> <tr id="entity-bnot"><td> <code title="">bnot;</code> </td> <td> U+02310 </td> <td> <span class="glyph" title="">&#8976;</span> </td> <tr id="entity-bopf"><td> <code title="">bopf;</code> </td> <td> U+1D553 </td> <td> <span class="glyph" title="">&#120147;</span> </td> <tr id="entity-bot"><td> <code title="">bot;</code> </td> <td> U+022A5 </td> <td> <span class="glyph" title="">&perp;</span> </td> <tr id="entity-bottom"><td> <code title="">bottom;</code> </td> <td> U+022A5 </td> <td> <span class="glyph" title="">&perp;</span> </td> <tr id="entity-bowtie"><td> <code title="">bowtie;</code> </td> <td> U+022C8 </td> <td> <span class="glyph" title="">&#8904;</span> </td> <tr id="entity-boxDL"><td> <code title="">boxDL;</code> </td> <td> U+02557 </td> <td> <span class="glyph" title="">&#9559;</span> </td> <tr id="entity-boxDR"><td> <code title="">boxDR;</code> </td> <td> U+02554 </td> <td> <span class="glyph" title="">&#9556;</span> </td> <tr id="entity-boxDl"><td> <code title="">boxDl;</code> </td> <td> U+02556 </td> <td> <span class="glyph" title="">&#9558;</span> </td> <tr id="entity-boxDr"><td> <code title="">boxDr;</code> </td> <td> U+02553 </td> <td> <span class="glyph" title="">&#9555;</span> </td> <tr id="entity-boxH"><td> <code title="">boxH;</code> </td> <td> U+02550 </td> <td> <span class="glyph" title="">&#9552;</span> </td> <tr id="entity-boxHD"><td> <code title="">boxHD;</code> </td> <td> U+02566 </td> <td> <span class="glyph" title="">&#9574;</span> </td> <tr id="entity-boxHU"><td> <code title="">boxHU;</code> </td> <td> U+02569 </td> <td> <span class="glyph" title="">&#9577;</span> </td> <tr id="entity-boxHd"><td> <code title="">boxHd;</code> </td> <td> U+02564 </td> <td> <span class="glyph" title="">&#9572;</span> </td> <tr id="entity-boxHu"><td> <code title="">boxHu;</code> </td> <td> U+02567 </td> <td> <span class="glyph" title="">&#9575;</span> </td> <tr id="entity-boxUL"><td> <code title="">boxUL;</code> </td> <td> U+0255D </td> <td> <span class="glyph" title="">&#9565;</span> </td> <tr id="entity-boxUR"><td> <code title="">boxUR;</code> </td> <td> U+0255A </td> <td> <span class="glyph" title="">&#9562;</span> </td> <tr id="entity-boxUl"><td> <code title="">boxUl;</code> </td> <td> U+0255C </td> <td> <span class="glyph" title="">&#9564;</span> </td> <tr id="entity-boxUr"><td> <code title="">boxUr;</code> </td> <td> U+02559 </td> <td> <span class="glyph" title="">&#9561;</span> </td> <tr id="entity-boxV"><td> <code title="">boxV;</code> </td> <td> U+02551 </td> <td> <span class="glyph" title="">&#9553;</span> </td> <tr id="entity-boxVH"><td> <code title="">boxVH;</code> </td> <td> U+0256C </td> <td> <span class="glyph" title="">&#9580;</span> </td> <tr id="entity-boxVL"><td> <code title="">boxVL;</code> </td> <td> U+02563 </td> <td> <span class="glyph" title="">&#9571;</span> </td> <tr id="entity-boxVR"><td> <code title="">boxVR;</code> </td> <td> U+02560 </td> <td> <span class="glyph" title="">&#9568;</span> </td> <tr id="entity-boxVh"><td> <code title="">boxVh;</code> </td> <td> U+0256B </td> <td> <span class="glyph" title="">&#9579;</span> </td> <tr id="entity-boxVl"><td> <code title="">boxVl;</code> </td> <td> U+02562 </td> <td> <span class="glyph" title="">&#9570;</span> </td> <tr id="entity-boxVr"><td> <code title="">boxVr;</code> </td> <td> U+0255F </td> <td> <span class="glyph" title="">&#9567;</span> </td> <tr id="entity-boxbox"><td> <code title="">boxbox;</code> </td> <td> U+029C9 </td> <td> <span class="glyph" title="">&#10697;</span> </td> <tr id="entity-boxdL"><td> <code title="">boxdL;</code> </td> <td> U+02555 </td> <td> <span class="glyph" title="">&#9557;</span> </td> <tr id="entity-boxdR"><td> <code title="">boxdR;</code> </td> <td> U+02552 </td> <td> <span class="glyph" title="">&#9554;</span> </td> <tr id="entity-boxdl"><td> <code title="">boxdl;</code> </td> <td> U+02510 </td> <td> <span class="glyph" title="">&#9488;</span> </td> <tr id="entity-boxdr"><td> <code title="">boxdr;</code> </td> <td> U+0250C </td> <td> <span class="glyph" title="">&#9484;</span> </td> <tr id="entity-boxh"><td> <code title="">boxh;</code> </td> <td> U+02500 </td> <td> <span class="glyph" title="">&#9472;</span> </td> <tr id="entity-boxhD"><td> <code title="">boxhD;</code> </td> <td> U+02565 </td> <td> <span class="glyph" title="">&#9573;</span> </td> <tr id="entity-boxhU"><td> <code title="">boxhU;</code> </td> <td> U+02568 </td> <td> <span class="glyph" title="">&#9576;</span> </td> <tr id="entity-boxhd"><td> <code title="">boxhd;</code> </td> <td> U+0252C </td> <td> <span class="glyph" title="">&#9516;</span> </td> <tr id="entity-boxhu"><td> <code title="">boxhu;</code> </td> <td> U+02534 </td> <td> <span class="glyph" title="">&#9524;</span> </td> <tr id="entity-boxminus"><td> <code title="">boxminus;</code> </td> <td> U+0229F </td> <td> <span class="glyph" title="">&#8863;</span> </td> <tr id="entity-boxplus"><td> <code title="">boxplus;</code> </td> <td> U+0229E </td> <td> <span class="glyph" title="">&#8862;</span> </td> <tr id="entity-boxtimes"><td> <code title="">boxtimes;</code> </td> <td> U+022A0 </td> <td> <span class="glyph" title="">&#8864;</span> </td> <tr id="entity-boxuL"><td> <code title="">boxuL;</code> </td> <td> U+0255B </td> <td> <span class="glyph" title="">&#9563;</span> </td> <tr id="entity-boxuR"><td> <code title="">boxuR;</code> </td> <td> U+02558 </td> <td> <span class="glyph" title="">&#9560;</span> </td> <tr id="entity-boxul"><td> <code title="">boxul;</code> </td> <td> U+02518 </td> <td> <span class="glyph" title="">&#9496;</span> </td> <tr id="entity-boxur"><td> <code title="">boxur;</code> </td> <td> U+02514 </td> <td> <span class="glyph" title="">&#9492;</span> </td> <tr id="entity-boxv"><td> <code title="">boxv;</code> </td> <td> U+02502 </td> <td> <span class="glyph" title="">&#9474;</span> </td> <tr id="entity-boxvH"><td> <code title="">boxvH;</code> </td> <td> U+0256A </td> <td> <span class="glyph" title="">&#9578;</span> </td> <tr id="entity-boxvL"><td> <code title="">boxvL;</code> </td> <td> U+02561 </td> <td> <span class="glyph" title="">&#9569;</span> </td> <tr id="entity-boxvR"><td> <code title="">boxvR;</code> </td> <td> U+0255E </td> <td> <span class="glyph" title="">&#9566;</span> </td> <tr id="entity-boxvh"><td> <code title="">boxvh;</code> </td> <td> U+0253C </td> <td> <span class="glyph" title="">&#9532;</span> </td> <tr id="entity-boxvl"><td> <code title="">boxvl;</code> </td> <td> U+02524 </td> <td> <span class="glyph" title="">&#9508;</span> </td> <tr id="entity-boxvr"><td> <code title="">boxvr;</code> </td> <td> U+0251C </td> <td> <span class="glyph" title="">&#9500;</span> </td> <tr id="entity-bprime"><td> <code title="">bprime;</code> </td> <td> U+02035 </td> <td> <span class="glyph" title="">&#8245;</span> </td> <tr id="entity-breve"><td> <code title="">breve;</code> </td> <td> U+002D8 </td> <td> <span class="glyph" title="">&#728;</span> </td> <tr id="entity-brvbar"><td> <code title="">brvbar;</code> </td> <td> U+000A6 </td> <td> <span class="glyph" title="">&brvbar;</span> </td> <tr id="entity-bscr"><td> <code title="">bscr;</code> </td> <td> U+1D4B7 </td> <td> <span class="glyph" title="">&#119991;</span> </td> <tr id="entity-bsemi"><td> <code title="">bsemi;</code> </td> <td> U+0204F </td> <td> <span class="glyph" title="">&#8271;</span> </td> <tr id="entity-bsim"><td> <code title="">bsim;</code> </td> <td> U+0223D </td> <td> <span class="glyph" title="">&#8765;</span> </td> <tr id="entity-bsime"><td> <code title="">bsime;</code> </td> <td> U+022CD </td> <td> <span class="glyph" title="">&#8909;</span> </td> <tr id="entity-bsol"><td> <code title="">bsol;</code> </td> <td> U+0005C </td> <td> <span class="glyph" title="">\</span> </td> <tr id="entity-bsolb"><td> <code title="">bsolb;</code> </td> <td> U+029C5 </td> <td> <span class="glyph" title="">&#10693;</span> </td> <tr id="entity-bsolhsub"><td> <code title="">bsolhsub;</code> </td> <td> U+027C8 </td> <td> <span class="glyph" title="">&#10184;</span> </td> <tr id="entity-bull"><td> <code title="">bull;</code> </td> <td> U+02022 </td> <td> <span class="glyph" title="">&bull;</span> </td> <tr id="entity-bullet"><td> <code title="">bullet;</code> </td> <td> U+02022 </td> <td> <span class="glyph" title="">&bull;</span> </td> <tr id="entity-bump"><td> <code title="">bump;</code> </td> <td> U+0224E </td> <td> <span class="glyph" title="">&#8782;</span> </td> <tr id="entity-bumpE"><td> <code title="">bumpE;</code> </td> <td> U+02AAE </td> <td> <span class="glyph" title="">&#10926;</span> </td> <tr id="entity-bumpe"><td> <code title="">bumpe;</code> </td> <td> U+0224F </td> <td> <span class="glyph" title="">&#8783;</span> </td> <tr id="entity-bumpeq"><td> <code title="">bumpeq;</code> </td> <td> U+0224F </td> <td> <span class="glyph" title="">&#8783;</span> </td> <tr id="entity-cacute"><td> <code title="">cacute;</code> </td> <td> U+00107 </td> <td> <span class="glyph" title="">&#263;</span> </td> <tr id="entity-cap"><td> <code title="">cap;</code> </td> <td> U+02229 </td> <td> <span class="glyph" title="">&cap;</span> </td> <tr id="entity-capand"><td> <code title="">capand;</code> </td> <td> U+02A44 </td> <td> <span class="glyph" title="">&#10820;</span> </td> <tr id="entity-capbrcup"><td> <code title="">capbrcup;</code> </td> <td> U+02A49 </td> <td> <span class="glyph" title="">&#10825;</span> </td> <tr id="entity-capcap"><td> <code title="">capcap;</code> </td> <td> U+02A4B </td> <td> <span class="glyph" title="">&#10827;</span> </td> <tr id="entity-capcup"><td> <code title="">capcup;</code> </td> <td> U+02A47 </td> <td> <span class="glyph" title="">&#10823;</span> </td> <tr id="entity-capdot"><td> <code title="">capdot;</code> </td> <td> U+02A40 </td> <td> <span class="glyph" title="">&#10816;</span> </td> <tr id="entity-caps"><td> <code title="">caps;</code> </td> <td> U+02229 U+0FE00 </td> <td> <span class="glyph compound" title="">&cap;&#65024;</span> </td> <tr id="entity-caret"><td> <code title="">caret;</code> </td> <td> U+02041 </td> <td> <span class="glyph" title="">&#8257;</span> </td> <tr id="entity-caron"><td> <code title="">caron;</code> </td> <td> U+002C7 </td> <td> <span class="glyph" title="">&#711;</span> </td> <tr id="entity-ccaps"><td> <code title="">ccaps;</code> </td> <td> U+02A4D </td> <td> <span class="glyph" title="">&#10829;</span> </td> <tr id="entity-ccaron"><td> <code title="">ccaron;</code> </td> <td> U+0010D </td> <td> <span class="glyph" title="">&#269;</span> </td> <tr id="entity-ccedil"><td> <code title="">ccedil;</code> </td> <td> U+000E7 </td> <td> <span class="glyph" title="">&ccedil;</span> </td> <tr id="entity-ccirc"><td> <code title="">ccirc;</code> </td> <td> U+00109 </td> <td> <span class="glyph" title="">&#265;</span> </td> <tr id="entity-ccups"><td> <code title="">ccups;</code> </td> <td> U+02A4C </td> <td> <span class="glyph" title="">&#10828;</span> </td> <tr id="entity-ccupssm"><td> <code title="">ccupssm;</code> </td> <td> U+02A50 </td> <td> <span class="glyph" title="">&#10832;</span> </td> <tr id="entity-cdot"><td> <code title="">cdot;</code> </td> <td> U+0010B </td> <td> <span class="glyph" title="">&#267;</span> </td> <tr id="entity-cedil"><td> <code title="">cedil;</code> </td> <td> U+000B8 </td> <td> <span class="glyph" title="">&cedil;</span> </td> <tr id="entity-cemptyv"><td> <code title="">cemptyv;</code> </td> <td> U+029B2 </td> <td> <span class="glyph" title="">&#10674;</span> </td> <tr id="entity-cent"><td> <code title="">cent;</code> </td> <td> U+000A2 </td> <td> <span class="glyph" title="">&cent;</span> </td> <tr id="entity-centerdot"><td> <code title="">centerdot;</code> </td> <td> U+000B7 </td> <td> <span class="glyph" title="">&middot;</span> </td> <tr id="entity-cfr"><td> <code title="">cfr;</code> </td> <td> U+1D520 </td> <td> <span class="glyph" title="">&#120096;</span> </td> <tr id="entity-chcy"><td> <code title="">chcy;</code> </td> <td> U+00447 </td> <td> <span class="glyph" title="">&#1095;</span> </td> <tr id="entity-check"><td> <code title="">check;</code> </td> <td> U+02713 </td> <td> <span class="glyph" title="">&#10003;</span> </td> <tr id="entity-checkmark"><td> <code title="">checkmark;</code> </td> <td> U+02713 </td> <td> <span class="glyph" title="">&#10003;</span> </td> <tr id="entity-chi"><td> <code title="">chi;</code> </td> <td> U+003C7 </td> <td> <span class="glyph" title="">&chi;</span> </td> <tr id="entity-cir"><td> <code title="">cir;</code> </td> <td> U+025CB </td> <td> <span class="glyph" title="">&#9675;</span> </td> <tr id="entity-cirE"><td> <code title="">cirE;</code> </td> <td> U+029C3 </td> <td> <span class="glyph" title="">&#10691;</span> </td> <tr id="entity-circ"><td> <code title="">circ;</code> </td> <td> U+002C6 </td> <td> <span class="glyph" title="">&circ;</span> </td> <tr id="entity-circeq"><td> <code title="">circeq;</code> </td> <td> U+02257 </td> <td> <span class="glyph" title="">&#8791;</span> </td> <tr id="entity-circlearrowleft"><td> <code title="">circlearrowleft;</code> </td> <td> U+021BA </td> <td> <span class="glyph" title="">&#8634;</span> </td> <tr id="entity-circlearrowright"><td> <code title="">circlearrowright;</code> </td> <td> U+021BB </td> <td> <span class="glyph" title="">&#8635;</span> </td> <tr id="entity-circledR"><td> <code title="">circledR;</code> </td> <td> U+000AE </td> <td> <span class="glyph" title="">&reg;</span> </td> <tr id="entity-circledS"><td> <code title="">circledS;</code> </td> <td> U+024C8 </td> <td> <span class="glyph" title="">&#9416;</span> </td> <tr id="entity-circledast"><td> <code title="">circledast;</code> </td> <td> U+0229B </td> <td> <span class="glyph" title="">&#8859;</span> </td> <tr id="entity-circledcirc"><td> <code title="">circledcirc;</code> </td> <td> U+0229A </td> <td> <span class="glyph" title="">&#8858;</span> </td> <tr id="entity-circleddash"><td> <code title="">circleddash;</code> </td> <td> U+0229D </td> <td> <span class="glyph" title="">&#8861;</span> </td> <tr id="entity-cire"><td> <code title="">cire;</code> </td> <td> U+02257 </td> <td> <span class="glyph" title="">&#8791;</span> </td> <tr id="entity-cirfnint"><td> <code title="">cirfnint;</code> </td> <td> U+02A10 </td> <td> <span class="glyph" title="">&#10768;</span> </td> <tr id="entity-cirmid"><td> <code title="">cirmid;</code> </td> <td> U+02AEF </td> <td> <span class="glyph" title="">&#10991;</span> </td> <tr id="entity-cirscir"><td> <code title="">cirscir;</code> </td> <td> U+029C2 </td> <td> <span class="glyph" title="">&#10690;</span> </td> <tr id="entity-clubs"><td> <code title="">clubs;</code> </td> <td> U+02663 </td> <td> <span class="glyph" title="">&clubs;</span> </td> <tr id="entity-clubsuit"><td> <code title="">clubsuit;</code> </td> <td> U+02663 </td> <td> <span class="glyph" title="">&clubs;</span> </td> <tr id="entity-colon"><td> <code title="">colon;</code> </td> <td> U+0003A </td> <td> <span class="glyph" title="">:</span> </td> <tr id="entity-colone"><td> <code title="">colone;</code> </td> <td> U+02254 </td> <td> <span class="glyph" title="">&#8788;</span> </td> <tr id="entity-coloneq"><td> <code title="">coloneq;</code> </td> <td> U+02254 </td> <td> <span class="glyph" title="">&#8788;</span> </td> <tr id="entity-comma"><td> <code title="">comma;</code> </td> <td> U+0002C </td> <td> <span class="glyph" title="">,</span> </td> <tr id="entity-commat"><td> <code title="">commat;</code> </td> <td> U+00040 </td> <td> <span class="glyph" title="">@</span> </td> <tr id="entity-comp"><td> <code title="">comp;</code> </td> <td> U+02201 </td> <td> <span class="glyph" title="">&#8705;</span> </td> <tr id="entity-compfn"><td> <code title="">compfn;</code> </td> <td> U+02218 </td> <td> <span class="glyph" title="">&#8728;</span> </td> <tr id="entity-complement"><td> <code title="">complement;</code> </td> <td> U+02201 </td> <td> <span class="glyph" title="">&#8705;</span> </td> <tr id="entity-complexes"><td> <code title="">complexes;</code> </td> <td> U+02102 </td> <td> <span class="glyph" title="">&#8450;</span> </td> <tr id="entity-cong"><td> <code title="">cong;</code> </td> <td> U+02245 </td> <td> <span class="glyph" title="">&cong;</span> </td> <tr id="entity-congdot"><td> <code title="">congdot;</code> </td> <td> U+02A6D </td> <td> <span class="glyph" title="">&#10861;</span> </td> <tr id="entity-conint"><td> <code title="">conint;</code> </td> <td> U+0222E </td> <td> <span class="glyph" title="">&#8750;</span> </td> <tr id="entity-copf"><td> <code title="">copf;</code> </td> <td> U+1D554 </td> <td> <span class="glyph" title="">&#120148;</span> </td> <tr id="entity-coprod"><td> <code title="">coprod;</code> </td> <td> U+02210 </td> <td> <span class="glyph" title="">&#8720;</span> </td> <tr id="entity-copy"><td> <code title="">copy;</code> </td> <td> U+000A9 </td> <td> <span class="glyph" title="">&copy;</span> </td> <tr id="entity-copysr"><td> <code title="">copysr;</code> </td> <td> U+02117 </td> <td> <span class="glyph" title="">&#8471;</span> </td> <tr id="entity-crarr"><td> <code title="">crarr;</code> </td> <td> U+021B5 </td> <td> <span class="glyph" title="">&crarr;</span> </td> <tr id="entity-cross"><td> <code title="">cross;</code> </td> <td> U+02717 </td> <td> <span class="glyph" title="">&#10007;</span> </td> <tr id="entity-cscr"><td> <code title="">cscr;</code> </td> <td> U+1D4B8 </td> <td> <span class="glyph" title="">&#119992;</span> </td> <tr id="entity-csub"><td> <code title="">csub;</code> </td> <td> U+02ACF </td> <td> <span class="glyph" title="">&#10959;</span> </td> <tr id="entity-csube"><td> <code title="">csube;</code> </td> <td> U+02AD1 </td> <td> <span class="glyph" title="">&#10961;</span> </td> <tr id="entity-csup"><td> <code title="">csup;</code> </td> <td> U+02AD0 </td> <td> <span class="glyph" title="">&#10960;</span> </td> <tr id="entity-csupe"><td> <code title="">csupe;</code> </td> <td> U+02AD2 </td> <td> <span class="glyph" title="">&#10962;</span> </td> <tr id="entity-ctdot"><td> <code title="">ctdot;</code> </td> <td> U+022EF </td> <td> <span class="glyph" title="">&#8943;</span> </td> <tr id="entity-cudarrl"><td> <code title="">cudarrl;</code> </td> <td> U+02938 </td> <td> <span class="glyph" title="">&#10552;</span> </td> <tr id="entity-cudarrr"><td> <code title="">cudarrr;</code> </td> <td> U+02935 </td> <td> <span class="glyph" title="">&#10549;</span> </td> <tr id="entity-cuepr"><td> <code title="">cuepr;</code> </td> <td> U+022DE </td> <td> <span class="glyph" title="">&#8926;</span> </td> <tr id="entity-cuesc"><td> <code title="">cuesc;</code> </td> <td> U+022DF </td> <td> <span class="glyph" title="">&#8927;</span> </td> <tr id="entity-cularr"><td> <code title="">cularr;</code> </td> <td> U+021B6 </td> <td> <span class="glyph" title="">&#8630;</span> </td> <tr id="entity-cularrp"><td> <code title="">cularrp;</code> </td> <td> U+0293D </td> <td> <span class="glyph" title="">&#10557;</span> </td> <tr id="entity-cup"><td> <code title="">cup;</code> </td> <td> U+0222A </td> <td> <span class="glyph" title="">&cup;</span> </td> <tr id="entity-cupbrcap"><td> <code title="">cupbrcap;</code> </td> <td> U+02A48 </td> <td> <span class="glyph" title="">&#10824;</span> </td> <tr id="entity-cupcap"><td> <code title="">cupcap;</code> </td> <td> U+02A46 </td> <td> <span class="glyph" title="">&#10822;</span> </td> <tr id="entity-cupcup"><td> <code title="">cupcup;</code> </td> <td> U+02A4A </td> <td> <span class="glyph" title="">&#10826;</span> </td> <tr id="entity-cupdot"><td> <code title="">cupdot;</code> </td> <td> U+0228D </td> <td> <span class="glyph" title="">&#8845;</span> </td> <tr id="entity-cupor"><td> <code title="">cupor;</code> </td> <td> U+02A45 </td> <td> <span class="glyph" title="">&#10821;</span> </td> <tr id="entity-cups"><td> <code title="">cups;</code> </td> <td> U+0222A U+0FE00 </td> <td> <span class="glyph compound" title="">&cup;&#65024;</span> </td> <tr id="entity-curarr"><td> <code title="">curarr;</code> </td> <td> U+021B7 </td> <td> <span class="glyph" title="">&#8631;</span> </td> <tr id="entity-curarrm"><td> <code title="">curarrm;</code> </td> <td> U+0293C </td> <td> <span class="glyph" title="">&#10556;</span> </td> <tr id="entity-curlyeqprec"><td> <code title="">curlyeqprec;</code> </td> <td> U+022DE </td> <td> <span class="glyph" title="">&#8926;</span> </td> <tr id="entity-curlyeqsucc"><td> <code title="">curlyeqsucc;</code> </td> <td> U+022DF </td> <td> <span class="glyph" title="">&#8927;</span> </td> <tr id="entity-curlyvee"><td> <code title="">curlyvee;</code> </td> <td> U+022CE </td> <td> <span class="glyph" title="">&#8910;</span> </td> <tr id="entity-curlywedge"><td> <code title="">curlywedge;</code> </td> <td> U+022CF </td> <td> <span class="glyph" title="">&#8911;</span> </td> <tr id="entity-curren"><td> <code title="">curren;</code> </td> <td> U+000A4 </td> <td> <span class="glyph" title="">&curren;</span> </td> <tr id="entity-curvearrowleft"><td> <code title="">curvearrowleft;</code> </td> <td> U+021B6 </td> <td> <span class="glyph" title="">&#8630;</span> </td> <tr id="entity-curvearrowright"><td> <code title="">curvearrowright;</code> </td> <td> U+021B7 </td> <td> <span class="glyph" title="">&#8631;</span> </td> <tr id="entity-cuvee"><td> <code title="">cuvee;</code> </td> <td> U+022CE </td> <td> <span class="glyph" title="">&#8910;</span> </td> <tr id="entity-cuwed"><td> <code title="">cuwed;</code> </td> <td> U+022CF </td> <td> <span class="glyph" title="">&#8911;</span> </td> <tr id="entity-cwconint"><td> <code title="">cwconint;</code> </td> <td> U+02232 </td> <td> <span class="glyph" title="">&#8754;</span> </td> <tr id="entity-cwint"><td> <code title="">cwint;</code> </td> <td> U+02231 </td> <td> <span class="glyph" title="">&#8753;</span> </td> <tr id="entity-cylcty"><td> <code title="">cylcty;</code> </td> <td> U+0232D </td> <td> <span class="glyph" title="">&#9005;</span> </td> <tr id="entity-dArr"><td> <code title="">dArr;</code> </td> <td> U+021D3 </td> <td> <span class="glyph" title="">&dArr;</span> </td> <tr id="entity-dHar"><td> <code title="">dHar;</code> </td> <td> U+02965 </td> <td> <span class="glyph" title="">&#10597;</span> </td> <tr id="entity-dagger"><td> <code title="">dagger;</code> </td> <td> U+02020 </td> <td> <span class="glyph" title="">&dagger;</span> </td> <tr id="entity-daleth"><td> <code title="">daleth;</code> </td> <td> U+02138 </td> <td> <span class="glyph" title="">&#8504;</span> </td> <tr id="entity-darr"><td> <code title="">darr;</code> </td> <td> U+02193 </td> <td> <span class="glyph" title="">&darr;</span> </td> <tr id="entity-dash"><td> <code title="">dash;</code> </td> <td> U+02010 </td> <td> <span class="glyph" title="">&#8208;</span> </td> <tr id="entity-dashv"><td> <code title="">dashv;</code> </td> <td> U+022A3 </td> <td> <span class="glyph" title="">&#8867;</span> </td> <tr id="entity-dbkarow"><td> <code title="">dbkarow;</code> </td> <td> U+0290F </td> <td> <span class="glyph" title="">&#10511;</span> </td> <tr id="entity-dblac"><td> <code title="">dblac;</code> </td> <td> U+002DD </td> <td> <span class="glyph" title="">&#733;</span> </td> <tr id="entity-dcaron"><td> <code title="">dcaron;</code> </td> <td> U+0010F </td> <td> <span class="glyph" title="">&#271;</span> </td> <tr id="entity-dcy"><td> <code title="">dcy;</code> </td> <td> U+00434 </td> <td> <span class="glyph" title="">&#1076;</span> </td> <tr id="entity-dd"><td> <code title="">dd;</code> </td> <td> U+02146 </td> <td> <span class="glyph" title="">&#8518;</span> </td> <tr id="entity-ddagger"><td> <code title="">ddagger;</code> </td> <td> U+02021 </td> <td> <span class="glyph" title="">&Dagger;</span> </td> <tr id="entity-ddarr"><td> <code title="">ddarr;</code> </td> <td> U+021CA </td> <td> <span class="glyph" title="">&#8650;</span> </td> <tr id="entity-ddotseq"><td> <code title="">ddotseq;</code> </td> <td> U+02A77 </td> <td> <span class="glyph" title="">&#10871;</span> </td> <tr id="entity-deg"><td> <code title="">deg;</code> </td> <td> U+000B0 </td> <td> <span class="glyph" title="">&deg;</span> </td> <tr id="entity-delta"><td> <code title="">delta;</code> </td> <td> U+003B4 </td> <td> <span class="glyph" title="">&delta;</span> </td> <tr id="entity-demptyv"><td> <code title="">demptyv;</code> </td> <td> U+029B1 </td> <td> <span class="glyph" title="">&#10673;</span> </td> <tr id="entity-dfisht"><td> <code title="">dfisht;</code> </td> <td> U+0297F </td> <td> <span class="glyph" title="">&#10623;</span> </td> <tr id="entity-dfr"><td> <code title="">dfr;</code> </td> <td> U+1D521 </td> <td> <span class="glyph" title="">&#120097;</span> </td> <tr id="entity-dharl"><td> <code title="">dharl;</code> </td> <td> U+021C3 </td> <td> <span class="glyph" title="">&#8643;</span> </td> <tr id="entity-dharr"><td> <code title="">dharr;</code> </td> <td> U+021C2 </td> <td> <span class="glyph" title="">&#8642;</span> </td> <tr id="entity-diam"><td> <code title="">diam;</code> </td> <td> U+022C4 </td> <td> <span class="glyph" title="">&#8900;</span> </td> <tr id="entity-diamond"><td> <code title="">diamond;</code> </td> <td> U+022C4 </td> <td> <span class="glyph" title="">&#8900;</span> </td> <tr id="entity-diamondsuit"><td> <code title="">diamondsuit;</code> </td> <td> U+02666 </td> <td> <span class="glyph" title="">&diams;</span> </td> <tr id="entity-diams"><td> <code title="">diams;</code> </td> <td> U+02666 </td> <td> <span class="glyph" title="">&diams;</span> </td> <tr id="entity-die"><td> <code title="">die;</code> </td> <td> U+000A8 </td> <td> <span class="glyph" title="">&uml;</span> </td> <tr id="entity-digamma"><td> <code title="">digamma;</code> </td> <td> U+003DD </td> <td> <span class="glyph" title="">&#989;</span> </td> <tr id="entity-disin"><td> <code title="">disin;</code> </td> <td> U+022F2 </td> <td> <span class="glyph" title="">&#8946;</span> </td> <tr id="entity-div"><td> <code title="">div;</code> </td> <td> U+000F7 </td> <td> <span class="glyph" title="">&divide;</span> </td> <tr id="entity-divide"><td> <code title="">divide;</code> </td> <td> U+000F7 </td> <td> <span class="glyph" title="">&divide;</span> </td> <tr id="entity-divideontimes"><td> <code title="">divideontimes;</code> </td> <td> U+022C7 </td> <td> <span class="glyph" title="">&#8903;</span> </td> <tr id="entity-divonx"><td> <code title="">divonx;</code> </td> <td> U+022C7 </td> <td> <span class="glyph" title="">&#8903;</span> </td> <tr id="entity-djcy"><td> <code title="">djcy;</code> </td> <td> U+00452 </td> <td> <span class="glyph" title="">&#1106;</span> </td> <tr id="entity-dlcorn"><td> <code title="">dlcorn;</code> </td> <td> U+0231E </td> <td> <span class="glyph" title="">&#8990;</span> </td> <tr id="entity-dlcrop"><td> <code title="">dlcrop;</code> </td> <td> U+0230D </td> <td> <span class="glyph" title="">&#8973;</span> </td> <tr id="entity-dollar"><td> <code title="">dollar;</code> </td> <td> U+00024 </td> <td> <span class="glyph" title="">$</span> </td> <tr id="entity-dopf"><td> <code title="">dopf;</code> </td> <td> U+1D555 </td> <td> <span class="glyph" title="">&#120149;</span> </td> <tr id="entity-dot"><td> <code title="">dot;</code> </td> <td> U+002D9 </td> <td> <span class="glyph" title="">&#729;</span> </td> <tr id="entity-doteq"><td> <code title="">doteq;</code> </td> <td> U+02250 </td> <td> <span class="glyph" title="">&#8784;</span> </td> <tr id="entity-doteqdot"><td> <code title="">doteqdot;</code> </td> <td> U+02251 </td> <td> <span class="glyph" title="">&#8785;</span> </td> <tr id="entity-dotminus"><td> <code title="">dotminus;</code> </td> <td> U+02238 </td> <td> <span class="glyph" title="">&#8760;</span> </td> <tr id="entity-dotplus"><td> <code title="">dotplus;</code> </td> <td> U+02214 </td> <td> <span class="glyph" title="">&#8724;</span> </td> <tr id="entity-dotsquare"><td> <code title="">dotsquare;</code> </td> <td> U+022A1 </td> <td> <span class="glyph" title="">&#8865;</span> </td> <tr id="entity-doublebarwedge"><td> <code title="">doublebarwedge;</code> </td> <td> U+02306 </td> <td> <span class="glyph" title="">&#8966;</span> </td> <tr id="entity-downarrow"><td> <code title="">downarrow;</code> </td> <td> U+02193 </td> <td> <span class="glyph" title="">&darr;</span> </td> <tr id="entity-downdownarrows"><td> <code title="">downdownarrows;</code> </td> <td> U+021CA </td> <td> <span class="glyph" title="">&#8650;</span> </td> <tr id="entity-downharpoonleft"><td> <code title="">downharpoonleft;</code> </td> <td> U+021C3 </td> <td> <span class="glyph" title="">&#8643;</span> </td> <tr id="entity-downharpoonright"><td> <code title="">downharpoonright;</code> </td> <td> U+021C2 </td> <td> <span class="glyph" title="">&#8642;</span> </td> <tr id="entity-drbkarow"><td> <code title="">drbkarow;</code> </td> <td> U+02910 </td> <td> <span class="glyph" title="">&#10512;</span> </td> <tr id="entity-drcorn"><td> <code title="">drcorn;</code> </td> <td> U+0231F </td> <td> <span class="glyph" title="">&#8991;</span> </td> <tr id="entity-drcrop"><td> <code title="">drcrop;</code> </td> <td> U+0230C </td> <td> <span class="glyph" title="">&#8972;</span> </td> <tr id="entity-dscr"><td> <code title="">dscr;</code> </td> <td> U+1D4B9 </td> <td> <span class="glyph" title="">&#119993;</span> </td> <tr id="entity-dscy"><td> <code title="">dscy;</code> </td> <td> U+00455 </td> <td> <span class="glyph" title="">&#1109;</span> </td> <tr id="entity-dsol"><td> <code title="">dsol;</code> </td> <td> U+029F6 </td> <td> <span class="glyph" title="">&#10742;</span> </td> <tr id="entity-dstrok"><td> <code title="">dstrok;</code> </td> <td> U+00111 </td> <td> <span class="glyph" title="">&#273;</span> </td> <tr id="entity-dtdot"><td> <code title="">dtdot;</code> </td> <td> U+022F1 </td> <td> <span class="glyph" title="">&#8945;</span> </td> <tr id="entity-dtri"><td> <code title="">dtri;</code> </td> <td> U+025BF </td> <td> <span class="glyph" title="">&#9663;</span> </td> <tr id="entity-dtrif"><td> <code title="">dtrif;</code> </td> <td> U+025BE </td> <td> <span class="glyph" title="">&#9662;</span> </td> <tr id="entity-duarr"><td> <code title="">duarr;</code> </td> <td> U+021F5 </td> <td> <span class="glyph" title="">&#8693;</span> </td> <tr id="entity-duhar"><td> <code title="">duhar;</code> </td> <td> U+0296F </td> <td> <span class="glyph" title="">&#10607;</span> </td> <tr id="entity-dwangle"><td> <code title="">dwangle;</code> </td> <td> U+029A6 </td> <td> <span class="glyph" title="">&#10662;</span> </td> <tr id="entity-dzcy"><td> <code title="">dzcy;</code> </td> <td> U+0045F </td> <td> <span class="glyph" title="">&#1119;</span> </td> <tr id="entity-dzigrarr"><td> <code title="">dzigrarr;</code> </td> <td> U+027FF </td> <td> <span class="glyph" title="">&#10239;</span> </td> <tr id="entity-eDDot"><td> <code title="">eDDot;</code> </td> <td> U+02A77 </td> <td> <span class="glyph" title="">&#10871;</span> </td> <tr id="entity-eDot"><td> <code title="">eDot;</code> </td> <td> U+02251 </td> <td> <span class="glyph" title="">&#8785;</span> </td> <tr id="entity-eacute"><td> <code title="">eacute;</code> </td> <td> U+000E9 </td> <td> <span class="glyph" title="">&eacute;</span> </td> <tr id="entity-easter"><td> <code title="">easter;</code> </td> <td> U+02A6E </td> <td> <span class="glyph" title="">&#10862;</span> </td> <tr id="entity-ecaron"><td> <code title="">ecaron;</code> </td> <td> U+0011B </td> <td> <span class="glyph" title="">&#283;</span> </td> <tr id="entity-ecir"><td> <code title="">ecir;</code> </td> <td> U+02256 </td> <td> <span class="glyph" title="">&#8790;</span> </td> <tr id="entity-ecirc"><td> <code title="">ecirc;</code> </td> <td> U+000EA </td> <td> <span class="glyph" title="">&ecirc;</span> </td> <tr id="entity-ecolon"><td> <code title="">ecolon;</code> </td> <td> U+02255 </td> <td> <span class="glyph" title="">&#8789;</span> </td> <tr id="entity-ecy"><td> <code title="">ecy;</code> </td> <td> U+0044D </td> <td> <span class="glyph" title="">&#1101;</span> </td> <tr id="entity-edot"><td> <code title="">edot;</code> </td> <td> U+00117 </td> <td> <span class="glyph" title="">&#279;</span> </td> <tr id="entity-ee"><td> <code title="">ee;</code> </td> <td> U+02147 </td> <td> <span class="glyph" title="">&#8519;</span> </td> <tr id="entity-efDot"><td> <code title="">efDot;</code> </td> <td> U+02252 </td> <td> <span class="glyph" title="">&#8786;</span> </td> <tr id="entity-efr"><td> <code title="">efr;</code> </td> <td> U+1D522 </td> <td> <span class="glyph" title="">&#120098;</span> </td> <tr id="entity-eg"><td> <code title="">eg;</code> </td> <td> U+02A9A </td> <td> <span class="glyph" title="">&#10906;</span> </td> <tr id="entity-egrave"><td> <code title="">egrave;</code> </td> <td> U+000E8 </td> <td> <span class="glyph" title="">&egrave;</span> </td> <tr id="entity-egs"><td> <code title="">egs;</code> </td> <td> U+02A96 </td> <td> <span class="glyph" title="">&#10902;</span> </td> <tr id="entity-egsdot"><td> <code title="">egsdot;</code> </td> <td> U+02A98 </td> <td> <span class="glyph" title="">&#10904;</span> </td> <tr id="entity-el"><td> <code title="">el;</code> </td> <td> U+02A99 </td> <td> <span class="glyph" title="">&#10905;</span> </td> <tr id="entity-elinters"><td> <code title="">elinters;</code> </td> <td> U+023E7 </td> <td> <span class="glyph" title="">&#9191;</span> </td> <tr id="entity-ell"><td> <code title="">ell;</code> </td> <td> U+02113 </td> <td> <span class="glyph" title="">&#8467;</span> </td> <tr id="entity-els"><td> <code title="">els;</code> </td> <td> U+02A95 </td> <td> <span class="glyph" title="">&#10901;</span> </td> <tr id="entity-elsdot"><td> <code title="">elsdot;</code> </td> <td> U+02A97 </td> <td> <span class="glyph" title="">&#10903;</span> </td> <tr id="entity-emacr"><td> <code title="">emacr;</code> </td> <td> U+00113 </td> <td> <span class="glyph" title="">&#275;</span> </td> <tr id="entity-empty"><td> <code title="">empty;</code> </td> <td> U+02205 </td> <td> <span class="glyph" title="">&empty;</span> </td> <tr id="entity-emptyset"><td> <code title="">emptyset;</code> </td> <td> U+02205 </td> <td> <span class="glyph" title="">&empty;</span> </td> <tr id="entity-emptyv"><td> <code title="">emptyv;</code> </td> <td> U+02205 </td> <td> <span class="glyph" title="">&empty;</span> </td> <tr id="entity-emsp"><td> <code title="">emsp;</code> </td> <td> U+02003 </td> <td> <span class="glyph" title="">&emsp;</span> </td> <tr id="entity-emsp13"><td> <code title="">emsp13;</code> </td> <td> U+02004 </td> <td> <span class="glyph" title="">&#8196;</span> </td> <tr id="entity-emsp14"><td> <code title="">emsp14;</code> </td> <td> U+02005 </td> <td> <span class="glyph" title="">&#8197;</span> </td> <tr id="entity-eng"><td> <code title="">eng;</code> </td> <td> U+0014B </td> <td> <span class="glyph" title="">&#331;</span> </td> <tr id="entity-ensp"><td> <code title="">ensp;</code> </td> <td> U+02002 </td> <td> <span class="glyph" title="">&ensp;</span> </td> <tr id="entity-eogon"><td> <code title="">eogon;</code> </td> <td> U+00119 </td> <td> <span class="glyph" title="">&#281;</span> </td> <tr id="entity-eopf"><td> <code title="">eopf;</code> </td> <td> U+1D556 </td> <td> <span class="glyph" title="">&#120150;</span> </td> <tr id="entity-epar"><td> <code title="">epar;</code> </td> <td> U+022D5 </td> <td> <span class="glyph" title="">&#8917;</span> </td> <tr id="entity-eparsl"><td> <code title="">eparsl;</code> </td> <td> U+029E3 </td> <td> <span class="glyph" title="">&#10723;</span> </td> <tr id="entity-eplus"><td> <code title="">eplus;</code> </td> <td> U+02A71 </td> <td> <span class="glyph" title="">&#10865;</span> </td> <tr id="entity-epsi"><td> <code title="">epsi;</code> </td> <td> U+003B5 </td> <td> <span class="glyph" title="">&epsilon;</span> </td> <tr id="entity-epsilon"><td> <code title="">epsilon;</code> </td> <td> U+003B5 </td> <td> <span class="glyph" title="">&epsilon;</span> </td> <tr id="entity-epsiv"><td> <code title="">epsiv;</code> </td> <td> U+003F5 </td> <td> <span class="glyph" title="">&#1013;</span> </td> <tr id="entity-eqcirc"><td> <code title="">eqcirc;</code> </td> <td> U+02256 </td> <td> <span class="glyph" title="">&#8790;</span> </td> <tr id="entity-eqcolon"><td> <code title="">eqcolon;</code> </td> <td> U+02255 </td> <td> <span class="glyph" title="">&#8789;</span> </td> <tr id="entity-eqsim"><td> <code title="">eqsim;</code> </td> <td> U+02242 </td> <td> <span class="glyph" title="">&#8770;</span> </td> <tr id="entity-eqslantgtr"><td> <code title="">eqslantgtr;</code> </td> <td> U+02A96 </td> <td> <span class="glyph" title="">&#10902;</span> </td> <tr id="entity-eqslantless"><td> <code title="">eqslantless;</code> </td> <td> U+02A95 </td> <td> <span class="glyph" title="">&#10901;</span> </td> <tr id="entity-equals"><td> <code title="">equals;</code> </td> <td> U+0003D </td> <td> <span class="glyph" title="">=</span> </td> <tr id="entity-equest"><td> <code title="">equest;</code> </td> <td> U+0225F </td> <td> <span class="glyph" title="">&#8799;</span> </td> <tr id="entity-equiv"><td> <code title="">equiv;</code> </td> <td> U+02261 </td> <td> <span class="glyph" title="">&equiv;</span> </td> <tr id="entity-equivDD"><td> <code title="">equivDD;</code> </td> <td> U+02A78 </td> <td> <span class="glyph" title="">&#10872;</span> </td> <tr id="entity-eqvparsl"><td> <code title="">eqvparsl;</code> </td> <td> U+029E5 </td> <td> <span class="glyph" title="">&#10725;</span> </td> <tr id="entity-erDot"><td> <code title="">erDot;</code> </td> <td> U+02253 </td> <td> <span class="glyph" title="">&#8787;</span> </td> <tr id="entity-erarr"><td> <code title="">erarr;</code> </td> <td> U+02971 </td> <td> <span class="glyph" title="">&#10609;</span> </td> <tr id="entity-escr"><td> <code title="">escr;</code> </td> <td> U+0212F </td> <td> <span class="glyph" title="">&#8495;</span> </td> <tr id="entity-esdot"><td> <code title="">esdot;</code> </td> <td> U+02250 </td> <td> <span class="glyph" title="">&#8784;</span> </td> <tr id="entity-esim"><td> <code title="">esim;</code> </td> <td> U+02242 </td> <td> <span class="glyph" title="">&#8770;</span> </td> <tr id="entity-eta"><td> <code title="">eta;</code> </td> <td> U+003B7 </td> <td> <span class="glyph" title="">&eta;</span> </td> <tr id="entity-eth"><td> <code title="">eth;</code> </td> <td> U+000F0 </td> <td> <span class="glyph" title="">&eth;</span> </td> <tr id="entity-euml"><td> <code title="">euml;</code> </td> <td> U+000EB </td> <td> <span class="glyph" title="">&euml;</span> </td> <tr id="entity-euro"><td> <code title="">euro;</code> </td> <td> U+020AC </td> <td> <span class="glyph" title="">&euro;</span> </td> <tr id="entity-excl"><td> <code title="">excl;</code> </td> <td> U+00021 </td> <td> <span class="glyph" title="">!</span> </td> <tr id="entity-exist"><td> <code title="">exist;</code> </td> <td> U+02203 </td> <td> <span class="glyph" title="">&exist;</span> </td> <tr id="entity-expectation"><td> <code title="">expectation;</code> </td> <td> U+02130 </td> <td> <span class="glyph" title="">&#8496;</span> </td> <tr id="entity-exponentiale"><td> <code title="">exponentiale;</code> </td> <td> U+02147 </td> <td> <span class="glyph" title="">&#8519;</span> </td> <tr id="entity-fallingdotseq"><td> <code title="">fallingdotseq;</code> </td> <td> U+02252 </td> <td> <span class="glyph" title="">&#8786;</span> </td> <tr id="entity-fcy"><td> <code title="">fcy;</code> </td> <td> U+00444 </td> <td> <span class="glyph" title="">&#1092;</span> </td> <tr id="entity-female"><td> <code title="">female;</code> </td> <td> U+02640 </td> <td> <span class="glyph" title="">&#9792;</span> </td> <tr id="entity-ffilig"><td> <code title="">ffilig;</code> </td> <td> U+0FB03 </td> <td> <span class="glyph" title="">&#64259;</span> </td> <tr id="entity-fflig"><td> <code title="">fflig;</code> </td> <td> U+0FB00 </td> <td> <span class="glyph" title="">&#64256;</span> </td> <tr id="entity-ffllig"><td> <code title="">ffllig;</code> </td> <td> U+0FB04 </td> <td> <span class="glyph" title="">&#64260;</span> </td> <tr id="entity-ffr"><td> <code title="">ffr;</code> </td> <td> U+1D523 </td> <td> <span class="glyph" title="">&#120099;</span> </td> <tr id="entity-filig"><td> <code title="">filig;</code> </td> <td> U+0FB01 </td> <td> <span class="glyph" title="">&#64257;</span> </td> <tr id="entity-fjlig"><td> <code title="">fjlig;</code> </td> <td> U+00066 U+0006A </td> <td> <span class="glyph compound" title="">fj</span> </td> <tr id="entity-flat"><td> <code title="">flat;</code> </td> <td> U+0266D </td> <td> <span class="glyph" title="">&#9837;</span> </td> <tr id="entity-fllig"><td> <code title="">fllig;</code> </td> <td> U+0FB02 </td> <td> <span class="glyph" title="">&#64258;</span> </td> <tr id="entity-fltns"><td> <code title="">fltns;</code> </td> <td> U+025B1 </td> <td> <span class="glyph" title="">&#9649;</span> </td> <tr id="entity-fnof"><td> <code title="">fnof;</code> </td> <td> U+00192 </td> <td> <span class="glyph" title="">&fnof;</span> </td> <tr id="entity-fopf"><td> <code title="">fopf;</code> </td> <td> U+1D557 </td> <td> <span class="glyph" title="">&#120151;</span> </td> <tr id="entity-forall"><td> <code title="">forall;</code> </td> <td> U+02200 </td> <td> <span class="glyph" title="">&forall;</span> </td> <tr id="entity-fork"><td> <code title="">fork;</code> </td> <td> U+022D4 </td> <td> <span class="glyph" title="">&#8916;</span> </td> <tr id="entity-forkv"><td> <code title="">forkv;</code> </td> <td> U+02AD9 </td> <td> <span class="glyph" title="">&#10969;</span> </td> <tr id="entity-fpartint"><td> <code title="">fpartint;</code> </td> <td> U+02A0D </td> <td> <span class="glyph" title="">&#10765;</span> </td> <tr id="entity-frac12"><td> <code title="">frac12;</code> </td> <td> U+000BD </td> <td> <span class="glyph" title="">&frac12;</span> </td> <tr id="entity-frac13"><td> <code title="">frac13;</code> </td> <td> U+02153 </td> <td> <span class="glyph" title="">&#8531;</span> </td> <tr id="entity-frac14"><td> <code title="">frac14;</code> </td> <td> U+000BC </td> <td> <span class="glyph" title="">&frac14;</span> </td> <tr id="entity-frac15"><td> <code title="">frac15;</code> </td> <td> U+02155 </td> <td> <span class="glyph" title="">&#8533;</span> </td> <tr id="entity-frac16"><td> <code title="">frac16;</code> </td> <td> U+02159 </td> <td> <span class="glyph" title="">&#8537;</span> </td> <tr id="entity-frac18"><td> <code title="">frac18;</code> </td> <td> U+0215B </td> <td> <span class="glyph" title="">&#8539;</span> </td> <tr id="entity-frac23"><td> <code title="">frac23;</code> </td> <td> U+02154 </td> <td> <span class="glyph" title="">&#8532;</span> </td> <tr id="entity-frac25"><td> <code title="">frac25;</code> </td> <td> U+02156 </td> <td> <span class="glyph" title="">&#8534;</span> </td> <tr id="entity-frac34"><td> <code title="">frac34;</code> </td> <td> U+000BE </td> <td> <span class="glyph" title="">&frac34;</span> </td> <tr id="entity-frac35"><td> <code title="">frac35;</code> </td> <td> U+02157 </td> <td> <span class="glyph" title="">&#8535;</span> </td> <tr id="entity-frac38"><td> <code title="">frac38;</code> </td> <td> U+0215C </td> <td> <span class="glyph" title="">&#8540;</span> </td> <tr id="entity-frac45"><td> <code title="">frac45;</code> </td> <td> U+02158 </td> <td> <span class="glyph" title="">&#8536;</span> </td> <tr id="entity-frac56"><td> <code title="">frac56;</code> </td> <td> U+0215A </td> <td> <span class="glyph" title="">&#8538;</span> </td> <tr id="entity-frac58"><td> <code title="">frac58;</code> </td> <td> U+0215D </td> <td> <span class="glyph" title="">&#8541;</span> </td> <tr id="entity-frac78"><td> <code title="">frac78;</code> </td> <td> U+0215E </td> <td> <span class="glyph" title="">&#8542;</span> </td> <tr id="entity-frasl"><td> <code title="">frasl;</code> </td> <td> U+02044 </td> <td> <span class="glyph" title="">&frasl;</span> </td> <tr id="entity-frown"><td> <code title="">frown;</code> </td> <td> U+02322 </td> <td> <span class="glyph" title="">&#8994;</span> </td> <tr id="entity-fscr"><td> <code title="">fscr;</code> </td> <td> U+1D4BB </td> <td> <span class="glyph" title="">&#119995;</span> </td> <tr id="entity-gE"><td> <code title="">gE;</code> </td> <td> U+02267 </td> <td> <span class="glyph" title="">&#8807;</span> </td> <tr id="entity-gEl"><td> <code title="">gEl;</code> </td> <td> U+02A8C </td> <td> <span class="glyph" title="">&#10892;</span> </td> <tr id="entity-gacute"><td> <code title="">gacute;</code> </td> <td> U+001F5 </td> <td> <span class="glyph" title="">&#501;</span> </td> <tr id="entity-gamma"><td> <code title="">gamma;</code> </td> <td> U+003B3 </td> <td> <span class="glyph" title="">&gamma;</span> </td> <tr id="entity-gammad"><td> <code title="">gammad;</code> </td> <td> U+003DD </td> <td> <span class="glyph" title="">&#989;</span> </td> <tr id="entity-gap"><td> <code title="">gap;</code> </td> <td> U+02A86 </td> <td> <span class="glyph" title="">&#10886;</span> </td> <tr id="entity-gbreve"><td> <code title="">gbreve;</code> </td> <td> U+0011F </td> <td> <span class="glyph" title="">&#287;</span> </td> <tr id="entity-gcirc"><td> <code title="">gcirc;</code> </td> <td> U+0011D </td> <td> <span class="glyph" title="">&#285;</span> </td> <tr id="entity-gcy"><td> <code title="">gcy;</code> </td> <td> U+00433 </td> <td> <span class="glyph" title="">&#1075;</span> </td> <tr id="entity-gdot"><td> <code title="">gdot;</code> </td> <td> U+00121 </td> <td> <span class="glyph" title="">&#289;</span> </td> <tr id="entity-ge"><td> <code title="">ge;</code> </td> <td> U+02265 </td> <td> <span class="glyph" title="">&ge;</span> </td> <tr id="entity-gel"><td> <code title="">gel;</code> </td> <td> U+022DB </td> <td> <span class="glyph" title="">&#8923;</span> </td> <tr id="entity-geq"><td> <code title="">geq;</code> </td> <td> U+02265 </td> <td> <span class="glyph" title="">&ge;</span> </td> <tr id="entity-geqq"><td> <code title="">geqq;</code> </td> <td> U+02267 </td> <td> <span class="glyph" title="">&#8807;</span> </td> <tr id="entity-geqslant"><td> <code title="">geqslant;</code> </td> <td> U+02A7E </td> <td> <span class="glyph" title="">&#10878;</span> </td> <tr id="entity-ges"><td> <code title="">ges;</code> </td> <td> U+02A7E </td> <td> <span class="glyph" title="">&#10878;</span> </td> <tr id="entity-gescc"><td> <code title="">gescc;</code> </td> <td> U+02AA9 </td> <td> <span class="glyph" title="">&#10921;</span> </td> <tr id="entity-gesdot"><td> <code title="">gesdot;</code> </td> <td> U+02A80 </td> <td> <span class="glyph" title="">&#10880;</span> </td> <tr id="entity-gesdoto"><td> <code title="">gesdoto;</code> </td> <td> U+02A82 </td> <td> <span class="glyph" title="">&#10882;</span> </td> <tr id="entity-gesdotol"><td> <code title="">gesdotol;</code> </td> <td> U+02A84 </td> <td> <span class="glyph" title="">&#10884;</span> </td> <tr id="entity-gesl"><td> <code title="">gesl;</code> </td> <td> U+022DB U+0FE00 </td> <td> <span class="glyph compound" title="">&#8923;&#65024;</span> </td> <tr id="entity-gesles"><td> <code title="">gesles;</code> </td> <td> U+02A94 </td> <td> <span class="glyph" title="">&#10900;</span> </td> <tr id="entity-gfr"><td> <code title="">gfr;</code> </td> <td> U+1D524 </td> <td> <span class="glyph" title="">&#120100;</span> </td> <tr id="entity-gg"><td> <code title="">gg;</code> </td> <td> U+0226B </td> <td> <span class="glyph" title="">&#8811;</span> </td> <tr id="entity-ggg"><td> <code title="">ggg;</code> </td> <td> U+022D9 </td> <td> <span class="glyph" title="">&#8921;</span> </td> <tr id="entity-gimel"><td> <code title="">gimel;</code> </td> <td> U+02137 </td> <td> <span class="glyph" title="">&#8503;</span> </td> <tr id="entity-gjcy"><td> <code title="">gjcy;</code> </td> <td> U+00453 </td> <td> <span class="glyph" title="">&#1107;</span> </td> <tr id="entity-gl"><td> <code title="">gl;</code> </td> <td> U+02277 </td> <td> <span class="glyph" title="">&#8823;</span> </td> <tr id="entity-glE"><td> <code title="">glE;</code> </td> <td> U+02A92 </td> <td> <span class="glyph" title="">&#10898;</span> </td> <tr id="entity-gla"><td> <code title="">gla;</code> </td> <td> U+02AA5 </td> <td> <span class="glyph" title="">&#10917;</span> </td> <tr id="entity-glj"><td> <code title="">glj;</code> </td> <td> U+02AA4 </td> <td> <span class="glyph" title="">&#10916;</span> </td> <tr id="entity-gnE"><td> <code title="">gnE;</code> </td> <td> U+02269 </td> <td> <span class="glyph" title="">&#8809;</span> </td> <tr id="entity-gnap"><td> <code title="">gnap;</code> </td> <td> U+02A8A </td> <td> <span class="glyph" title="">&#10890;</span> </td> <tr id="entity-gnapprox"><td> <code title="">gnapprox;</code> </td> <td> U+02A8A </td> <td> <span class="glyph" title="">&#10890;</span> </td> <tr id="entity-gne"><td> <code title="">gne;</code> </td> <td> U+02A88 </td> <td> <span class="glyph" title="">&#10888;</span> </td> <tr id="entity-gneq"><td> <code title="">gneq;</code> </td> <td> U+02A88 </td> <td> <span class="glyph" title="">&#10888;</span> </td> <tr id="entity-gneqq"><td> <code title="">gneqq;</code> </td> <td> U+02269 </td> <td> <span class="glyph" title="">&#8809;</span> </td> <tr id="entity-gnsim"><td> <code title="">gnsim;</code> </td> <td> U+022E7 </td> <td> <span class="glyph" title="">&#8935;</span> </td> <tr id="entity-gopf"><td> <code title="">gopf;</code> </td> <td> U+1D558 </td> <td> <span class="glyph" title="">&#120152;</span> </td> <tr id="entity-grave"><td> <code title="">grave;</code> </td> <td> U+00060 </td> <td> <span class="glyph" title="">`</span> </td> <tr id="entity-gscr"><td> <code title="">gscr;</code> </td> <td> U+0210A </td> <td> <span class="glyph" title="">&#8458;</span> </td> <tr id="entity-gsim"><td> <code title="">gsim;</code> </td> <td> U+02273 </td> <td> <span class="glyph" title="">&#8819;</span> </td> <tr id="entity-gsime"><td> <code title="">gsime;</code> </td> <td> U+02A8E </td> <td> <span class="glyph" title="">&#10894;</span> </td> <tr id="entity-gsiml"><td> <code title="">gsiml;</code> </td> <td> U+02A90 </td> <td> <span class="glyph" title="">&#10896;</span> </td> <tr id="entity-gt"><td> <code title="">gt;</code> </td> <td> U+0003E </td> <td> <span class="glyph" title="">&gt;</span> </td> <tr id="entity-gtcc"><td> <code title="">gtcc;</code> </td> <td> U+02AA7 </td> <td> <span class="glyph" title="">&#10919;</span> </td> <tr id="entity-gtcir"><td> <code title="">gtcir;</code> </td> <td> U+02A7A </td> <td> <span class="glyph" title="">&#10874;</span> </td> <tr id="entity-gtdot"><td> <code title="">gtdot;</code> </td> <td> U+022D7 </td> <td> <span class="glyph" title="">&#8919;</span> </td> <tr id="entity-gtlPar"><td> <code title="">gtlPar;</code> </td> <td> U+02995 </td> <td> <span class="glyph" title="">&#10645;</span> </td> <tr id="entity-gtquest"><td> <code title="">gtquest;</code> </td> <td> U+02A7C </td> <td> <span class="glyph" title="">&#10876;</span> </td> <tr id="entity-gtrapprox"><td> <code title="">gtrapprox;</code> </td> <td> U+02A86 </td> <td> <span class="glyph" title="">&#10886;</span> </td> <tr id="entity-gtrarr"><td> <code title="">gtrarr;</code> </td> <td> U+02978 </td> <td> <span class="glyph" title="">&#10616;</span> </td> <tr id="entity-gtrdot"><td> <code title="">gtrdot;</code> </td> <td> U+022D7 </td> <td> <span class="glyph" title="">&#8919;</span> </td> <tr id="entity-gtreqless"><td> <code title="">gtreqless;</code> </td> <td> U+022DB </td> <td> <span class="glyph" title="">&#8923;</span> </td> <tr id="entity-gtreqqless"><td> <code title="">gtreqqless;</code> </td> <td> U+02A8C </td> <td> <span class="glyph" title="">&#10892;</span> </td> <tr id="entity-gtrless"><td> <code title="">gtrless;</code> </td> <td> U+02277 </td> <td> <span class="glyph" title="">&#8823;</span> </td> <tr id="entity-gtrsim"><td> <code title="">gtrsim;</code> </td> <td> U+02273 </td> <td> <span class="glyph" title="">&#8819;</span> </td> <tr id="entity-gvertneqq"><td> <code title="">gvertneqq;</code> </td> <td> U+02269 U+0FE00 </td> <td> <span class="glyph compound" title="">&#8809;&#65024;</span> </td> <tr id="entity-gvnE"><td> <code title="">gvnE;</code> </td> <td> U+02269 U+0FE00 </td> <td> <span class="glyph compound" title="">&#8809;&#65024;</span> </td> <tr id="entity-hArr"><td> <code title="">hArr;</code> </td> <td> U+021D4 </td> <td> <span class="glyph" title="">&hArr;</span> </td> <tr id="entity-hairsp"><td> <code title="">hairsp;</code> </td> <td> U+0200A </td> <td> <span class="glyph" title="">&#8202;</span> </td> <tr id="entity-half"><td> <code title="">half;</code> </td> <td> U+000BD </td> <td> <span class="glyph" title="">&frac12;</span> </td> <tr id="entity-hamilt"><td> <code title="">hamilt;</code> </td> <td> U+0210B </td> <td> <span class="glyph" title="">&#8459;</span> </td> <tr id="entity-hardcy"><td> <code title="">hardcy;</code> </td> <td> U+0044A </td> <td> <span class="glyph" title="">&#1098;</span> </td> <tr id="entity-harr"><td> <code title="">harr;</code> </td> <td> U+02194 </td> <td> <span class="glyph" title="">&harr;</span> </td> <tr id="entity-harrcir"><td> <code title="">harrcir;</code> </td> <td> U+02948 </td> <td> <span class="glyph" title="">&#10568;</span> </td> <tr id="entity-harrw"><td> <code title="">harrw;</code> </td> <td> U+021AD </td> <td> <span class="glyph" title="">&#8621;</span> </td> <tr id="entity-hbar"><td> <code title="">hbar;</code> </td> <td> U+0210F </td> <td> <span class="glyph" title="">&#8463;</span> </td> <tr id="entity-hcirc"><td> <code title="">hcirc;</code> </td> <td> U+00125 </td> <td> <span class="glyph" title="">&#293;</span> </td> <tr id="entity-hearts"><td> <code title="">hearts;</code> </td> <td> U+02665 </td> <td> <span class="glyph" title="">&hearts;</span> </td> <tr id="entity-heartsuit"><td> <code title="">heartsuit;</code> </td> <td> U+02665 </td> <td> <span class="glyph" title="">&hearts;</span> </td> <tr id="entity-hellip"><td> <code title="">hellip;</code> </td> <td> U+02026 </td> <td> <span class="glyph" title="">&hellip;</span> </td> <tr id="entity-hercon"><td> <code title="">hercon;</code> </td> <td> U+022B9 </td> <td> <span class="glyph" title="">&#8889;</span> </td> <tr id="entity-hfr"><td> <code title="">hfr;</code> </td> <td> U+1D525 </td> <td> <span class="glyph" title="">&#120101;</span> </td> <tr id="entity-hksearow"><td> <code title="">hksearow;</code> </td> <td> U+02925 </td> <td> <span class="glyph" title="">&#10533;</span> </td> <tr id="entity-hkswarow"><td> <code title="">hkswarow;</code> </td> <td> U+02926 </td> <td> <span class="glyph" title="">&#10534;</span> </td> <tr id="entity-hoarr"><td> <code title="">hoarr;</code> </td> <td> U+021FF </td> <td> <span class="glyph" title="">&#8703;</span> </td> <tr id="entity-homtht"><td> <code title="">homtht;</code> </td> <td> U+0223B </td> <td> <span class="glyph" title="">&#8763;</span> </td> <tr id="entity-hookleftarrow"><td> <code title="">hookleftarrow;</code> </td> <td> U+021A9 </td> <td> <span class="glyph" title="">&#8617;</span> </td> <tr id="entity-hookrightarrow"><td> <code title="">hookrightarrow;</code> </td> <td> U+021AA </td> <td> <span class="glyph" title="">&#8618;</span> </td> <tr id="entity-hopf"><td> <code title="">hopf;</code> </td> <td> U+1D559 </td> <td> <span class="glyph" title="">&#120153;</span> </td> <tr id="entity-horbar"><td> <code title="">horbar;</code> </td> <td> U+02015 </td> <td> <span class="glyph" title="">&#8213;</span> </td> <tr id="entity-hscr"><td> <code title="">hscr;</code> </td> <td> U+1D4BD </td> <td> <span class="glyph" title="">&#119997;</span> </td> <tr id="entity-hslash"><td> <code title="">hslash;</code> </td> <td> U+0210F </td> <td> <span class="glyph" title="">&#8463;</span> </td> <tr id="entity-hstrok"><td> <code title="">hstrok;</code> </td> <td> U+00127 </td> <td> <span class="glyph" title="">&#295;</span> </td> <tr id="entity-hybull"><td> <code title="">hybull;</code> </td> <td> U+02043 </td> <td> <span class="glyph" title="">&#8259;</span> </td> <tr id="entity-hyphen"><td> <code title="">hyphen;</code> </td> <td> U+02010 </td> <td> <span class="glyph" title="">&#8208;</span> </td> <tr id="entity-iacute"><td> <code title="">iacute;</code> </td> <td> U+000ED </td> <td> <span class="glyph" title="">&iacute;</span> </td> <tr id="entity-ic"><td> <code title="">ic;</code> </td> <td> U+02063 </td> <td> <span class="glyph" title="">&#8291;</span> </td> <tr id="entity-icirc"><td> <code title="">icirc;</code> </td> <td> U+000EE </td> <td> <span class="glyph" title="">&icirc;</span> </td> <tr id="entity-icy"><td> <code title="">icy;</code> </td> <td> U+00438 </td> <td> <span class="glyph" title="">&#1080;</span> </td> <tr id="entity-iecy"><td> <code title="">iecy;</code> </td> <td> U+00435 </td> <td> <span class="glyph" title="">&#1077;</span> </td> <tr id="entity-iexcl"><td> <code title="">iexcl;</code> </td> <td> U+000A1 </td> <td> <span class="glyph" title="">&iexcl;</span> </td> <tr id="entity-iff"><td> <code title="">iff;</code> </td> <td> U+021D4 </td> <td> <span class="glyph" title="">&hArr;</span> </td> <tr id="entity-ifr"><td> <code title="">ifr;</code> </td> <td> U+1D526 </td> <td> <span class="glyph" title="">&#120102;</span> </td> <tr id="entity-igrave"><td> <code title="">igrave;</code> </td> <td> U+000EC </td> <td> <span class="glyph" title="">&igrave;</span> </td> <tr id="entity-ii"><td> <code title="">ii;</code> </td> <td> U+02148 </td> <td> <span class="glyph" title="">&#8520;</span> </td> <tr id="entity-iiiint"><td> <code title="">iiiint;</code> </td> <td> U+02A0C </td> <td> <span class="glyph" title="">&#10764;</span> </td> <tr id="entity-iiint"><td> <code title="">iiint;</code> </td> <td> U+0222D </td> <td> <span class="glyph" title="">&#8749;</span> </td> <tr id="entity-iinfin"><td> <code title="">iinfin;</code> </td> <td> U+029DC </td> <td> <span class="glyph" title="">&#10716;</span> </td> <tr id="entity-iiota"><td> <code title="">iiota;</code> </td> <td> U+02129 </td> <td> <span class="glyph" title="">&#8489;</span> </td> <tr id="entity-ijlig"><td> <code title="">ijlig;</code> </td> <td> U+00133 </td> <td> <span class="glyph" title="">&#307;</span> </td> <tr id="entity-imacr"><td> <code title="">imacr;</code> </td> <td> U+0012B </td> <td> <span class="glyph" title="">&#299;</span> </td> <tr id="entity-image"><td> <code title="">image;</code> </td> <td> U+02111 </td> <td> <span class="glyph" title="">&image;</span> </td> <tr id="entity-imagline"><td> <code title="">imagline;</code> </td> <td> U+02110 </td> <td> <span class="glyph" title="">&#8464;</span> </td> <tr id="entity-imagpart"><td> <code title="">imagpart;</code> </td> <td> U+02111 </td> <td> <span class="glyph" title="">&image;</span> </td> <tr id="entity-imath"><td> <code title="">imath;</code> </td> <td> U+00131 </td> <td> <span class="glyph" title="">&#305;</span> </td> <tr id="entity-imof"><td> <code title="">imof;</code> </td> <td> U+022B7 </td> <td> <span class="glyph" title="">&#8887;</span> </td> <tr id="entity-imped"><td> <code title="">imped;</code> </td> <td> U+001B5 </td> <td> <span class="glyph" title="">&#437;</span> </td> <tr id="entity-in"><td> <code title="">in;</code> </td> <td> U+02208 </td> <td> <span class="glyph" title="">&isin;</span> </td> <tr id="entity-incare"><td> <code title="">incare;</code> </td> <td> U+02105 </td> <td> <span class="glyph" title="">&#8453;</span> </td> <tr id="entity-infin"><td> <code title="">infin;</code> </td> <td> U+0221E </td> <td> <span class="glyph" title="">&infin;</span> </td> <tr id="entity-infintie"><td> <code title="">infintie;</code> </td> <td> U+029DD </td> <td> <span class="glyph" title="">&#10717;</span> </td> <tr id="entity-inodot"><td> <code title="">inodot;</code> </td> <td> U+00131 </td> <td> <span class="glyph" title="">&#305;</span> </td> <tr id="entity-int"><td> <code title="">int;</code> </td> <td> U+0222B </td> <td> <span class="glyph" title="">&int;</span> </td> <tr id="entity-intcal"><td> <code title="">intcal;</code> </td> <td> U+022BA </td> <td> <span class="glyph" title="">&#8890;</span> </td> <tr id="entity-integers"><td> <code title="">integers;</code> </td> <td> U+02124 </td> <td> <span class="glyph" title="">&#8484;</span> </td> <tr id="entity-intercal"><td> <code title="">intercal;</code> </td> <td> U+022BA </td> <td> <span class="glyph" title="">&#8890;</span> </td> <tr id="entity-intlarhk"><td> <code title="">intlarhk;</code> </td> <td> U+02A17 </td> <td> <span class="glyph" title="">&#10775;</span> </td> <tr id="entity-intprod"><td> <code title="">intprod;</code> </td> <td> U+02A3C </td> <td> <span class="glyph" title="">&#10812;</span> </td> <tr id="entity-iocy"><td> <code title="">iocy;</code> </td> <td> U+00451 </td> <td> <span class="glyph" title="">&#1105;</span> </td> <tr id="entity-iogon"><td> <code title="">iogon;</code> </td> <td> U+0012F </td> <td> <span class="glyph" title="">&#303;</span> </td> <tr id="entity-iopf"><td> <code title="">iopf;</code> </td> <td> U+1D55A </td> <td> <span class="glyph" title="">&#120154;</span> </td> <tr id="entity-iota"><td> <code title="">iota;</code> </td> <td> U+003B9 </td> <td> <span class="glyph" title="">&iota;</span> </td> <tr id="entity-iprod"><td> <code title="">iprod;</code> </td> <td> U+02A3C </td> <td> <span class="glyph" title="">&#10812;</span> </td> <tr id="entity-iquest"><td> <code title="">iquest;</code> </td> <td> U+000BF </td> <td> <span class="glyph" title="">&iquest;</span> </td> <tr id="entity-iscr"><td> <code title="">iscr;</code> </td> <td> U+1D4BE </td> <td> <span class="glyph" title="">&#119998;</span> </td> <tr id="entity-isin"><td> <code title="">isin;</code> </td> <td> U+02208 </td> <td> <span class="glyph" title="">&isin;</span> </td> <tr id="entity-isinE"><td> <code title="">isinE;</code> </td> <td> U+022F9 </td> <td> <span class="glyph" title="">&#8953;</span> </td> <tr id="entity-isindot"><td> <code title="">isindot;</code> </td> <td> U+022F5 </td> <td> <span class="glyph" title="">&#8949;</span> </td> <tr id="entity-isins"><td> <code title="">isins;</code> </td> <td> U+022F4 </td> <td> <span class="glyph" title="">&#8948;</span> </td> <tr id="entity-isinsv"><td> <code title="">isinsv;</code> </td> <td> U+022F3 </td> <td> <span class="glyph" title="">&#8947;</span> </td> <tr id="entity-isinv"><td> <code title="">isinv;</code> </td> <td> U+02208 </td> <td> <span class="glyph" title="">&isin;</span> </td> <tr id="entity-it"><td> <code title="">it;</code> </td> <td> U+02062 </td> <td> <span class="glyph" title="">&#8290;</span> </td> <tr id="entity-itilde"><td> <code title="">itilde;</code> </td> <td> U+00129 </td> <td> <span class="glyph" title="">&#297;</span> </td> <tr id="entity-iukcy"><td> <code title="">iukcy;</code> </td> <td> U+00456 </td> <td> <span class="glyph" title="">&#1110;</span> </td> <tr id="entity-iuml"><td> <code title="">iuml;</code> </td> <td> U+000EF </td> <td> <span class="glyph" title="">&iuml;</span> </td> <tr id="entity-jcirc"><td> <code title="">jcirc;</code> </td> <td> U+00135 </td> <td> <span class="glyph" title="">&#309;</span> </td> <tr id="entity-jcy"><td> <code title="">jcy;</code> </td> <td> U+00439 </td> <td> <span class="glyph" title="">&#1081;</span> </td> <tr id="entity-jfr"><td> <code title="">jfr;</code> </td> <td> U+1D527 </td> <td> <span class="glyph" title="">&#120103;</span> </td> <tr id="entity-jmath"><td> <code title="">jmath;</code> </td> <td> U+00237 </td> <td> <span class="glyph" title="">&#567;</span> </td> <tr id="entity-jopf"><td> <code title="">jopf;</code> </td> <td> U+1D55B </td> <td> <span class="glyph" title="">&#120155;</span> </td> <tr id="entity-jscr"><td> <code title="">jscr;</code> </td> <td> U+1D4BF </td> <td> <span class="glyph" title="">&#119999;</span> </td> <tr id="entity-jsercy"><td> <code title="">jsercy;</code> </td> <td> U+00458 </td> <td> <span class="glyph" title="">&#1112;</span> </td> <tr id="entity-jukcy"><td> <code title="">jukcy;</code> </td> <td> U+00454 </td> <td> <span class="glyph" title="">&#1108;</span> </td> <tr id="entity-kappa"><td> <code title="">kappa;</code> </td> <td> U+003BA </td> <td> <span class="glyph" title="">&kappa;</span> </td> <tr id="entity-kappav"><td> <code title="">kappav;</code> </td> <td> U+003F0 </td> <td> <span class="glyph" title="">&#1008;</span> </td> <tr id="entity-kcedil"><td> <code title="">kcedil;</code> </td> <td> U+00137 </td> <td> <span class="glyph" title="">&#311;</span> </td> <tr id="entity-kcy"><td> <code title="">kcy;</code> </td> <td> U+0043A </td> <td> <span class="glyph" title="">&#1082;</span> </td> <tr id="entity-kfr"><td> <code title="">kfr;</code> </td> <td> U+1D528 </td> <td> <span class="glyph" title="">&#120104;</span> </td> <tr id="entity-kgreen"><td> <code title="">kgreen;</code> </td> <td> U+00138 </td> <td> <span class="glyph" title="">&#312;</span> </td> <tr id="entity-khcy"><td> <code title="">khcy;</code> </td> <td> U+00445 </td> <td> <span class="glyph" title="">&#1093;</span> </td> <tr id="entity-kjcy"><td> <code title="">kjcy;</code> </td> <td> U+0045C </td> <td> <span class="glyph" title="">&#1116;</span> </td> <tr id="entity-kopf"><td> <code title="">kopf;</code> </td> <td> U+1D55C </td> <td> <span class="glyph" title="">&#120156;</span> </td> <tr id="entity-kscr"><td> <code title="">kscr;</code> </td> <td> U+1D4C0 </td> <td> <span class="glyph" title="">&#120000;</span> </td> <tr id="entity-lAarr"><td> <code title="">lAarr;</code> </td> <td> U+021DA </td> <td> <span class="glyph" title="">&#8666;</span> </td> <tr id="entity-lArr"><td> <code title="">lArr;</code> </td> <td> U+021D0 </td> <td> <span class="glyph" title="">&lArr;</span> </td> <tr id="entity-lAtail"><td> <code title="">lAtail;</code> </td> <td> U+0291B </td> <td> <span class="glyph" title="">&#10523;</span> </td> <tr id="entity-lBarr"><td> <code title="">lBarr;</code> </td> <td> U+0290E </td> <td> <span class="glyph" title="">&#10510;</span> </td> <tr id="entity-lE"><td> <code title="">lE;</code> </td> <td> U+02266 </td> <td> <span class="glyph" title="">&#8806;</span> </td> <tr id="entity-lEg"><td> <code title="">lEg;</code> </td> <td> U+02A8B </td> <td> <span class="glyph" title="">&#10891;</span> </td> <tr id="entity-lHar"><td> <code title="">lHar;</code> </td> <td> U+02962 </td> <td> <span class="glyph" title="">&#10594;</span> </td> <tr id="entity-lacute"><td> <code title="">lacute;</code> </td> <td> U+0013A </td> <td> <span class="glyph" title="">&#314;</span> </td> <tr id="entity-laemptyv"><td> <code title="">laemptyv;</code> </td> <td> U+029B4 </td> <td> <span class="glyph" title="">&#10676;</span> </td> <tr id="entity-lagran"><td> <code title="">lagran;</code> </td> <td> U+02112 </td> <td> <span class="glyph" title="">&#8466;</span> </td> <tr id="entity-lambda"><td> <code title="">lambda;</code> </td> <td> U+003BB </td> <td> <span class="glyph" title="">&lambda;</span> </td> <tr id="entity-lang"><td> <code title="">lang;</code> </td> <td> U+027E8 </td> <td> <span class="glyph" title="">&#9001;</span> </td> <tr id="entity-langd"><td> <code title="">langd;</code> </td> <td> U+02991 </td> <td> <span class="glyph" title="">&#10641;</span> </td> <tr id="entity-langle"><td> <code title="">langle;</code> </td> <td> U+027E8 </td> <td> <span class="glyph" title="">&#9001;</span> </td> <tr id="entity-lap"><td> <code title="">lap;</code> </td> <td> U+02A85 </td> <td> <span class="glyph" title="">&#10885;</span> </td> <tr id="entity-laquo"><td> <code title="">laquo;</code> </td> <td> U+000AB </td> <td> <span class="glyph" title="">&laquo;</span> </td> <tr id="entity-larr"><td> <code title="">larr;</code> </td> <td> U+02190 </td> <td> <span class="glyph" title="">&larr;</span> </td> <tr id="entity-larrb"><td> <code title="">larrb;</code> </td> <td> U+021E4 </td> <td> <span class="glyph" title="">&#8676;</span> </td> <tr id="entity-larrbfs"><td> <code title="">larrbfs;</code> </td> <td> U+0291F </td> <td> <span class="glyph" title="">&#10527;</span> </td> <tr id="entity-larrfs"><td> <code title="">larrfs;</code> </td> <td> U+0291D </td> <td> <span class="glyph" title="">&#10525;</span> </td> <tr id="entity-larrhk"><td> <code title="">larrhk;</code> </td> <td> U+021A9 </td> <td> <span class="glyph" title="">&#8617;</span> </td> <tr id="entity-larrlp"><td> <code title="">larrlp;</code> </td> <td> U+021AB </td> <td> <span class="glyph" title="">&#8619;</span> </td> <tr id="entity-larrpl"><td> <code title="">larrpl;</code> </td> <td> U+02939 </td> <td> <span class="glyph" title="">&#10553;</span> </td> <tr id="entity-larrsim"><td> <code title="">larrsim;</code> </td> <td> U+02973 </td> <td> <span class="glyph" title="">&#10611;</span> </td> <tr id="entity-larrtl"><td> <code title="">larrtl;</code> </td> <td> U+021A2 </td> <td> <span class="glyph" title="">&#8610;</span> </td> <tr id="entity-lat"><td> <code title="">lat;</code> </td> <td> U+02AAB </td> <td> <span class="glyph" title="">&#10923;</span> </td> <tr id="entity-latail"><td> <code title="">latail;</code> </td> <td> U+02919 </td> <td> <span class="glyph" title="">&#10521;</span> </td> <tr id="entity-late"><td> <code title="">late;</code> </td> <td> U+02AAD </td> <td> <span class="glyph" title="">&#10925;</span> </td> <tr id="entity-lates"><td> <code title="">lates;</code> </td> <td> U+02AAD U+0FE00 </td> <td> <span class="glyph compound" title="">&#10925;&#65024;</span> </td> <tr id="entity-lbarr"><td> <code title="">lbarr;</code> </td> <td> U+0290C </td> <td> <span class="glyph" title="">&#10508;</span> </td> <tr id="entity-lbbrk"><td> <code title="">lbbrk;</code> </td> <td> U+02772 </td> <td> <span class="glyph" title="">&#10098;</span> </td> <tr id="entity-lbrace"><td> <code title="">lbrace;</code> </td> <td> U+0007B </td> <td> <span class="glyph" title="">{</span> </td> <tr id="entity-lbrack"><td> <code title="">lbrack;</code> </td> <td> U+0005B </td> <td> <span class="glyph" title="">[</span> </td> <tr id="entity-lbrke"><td> <code title="">lbrke;</code> </td> <td> U+0298B </td> <td> <span class="glyph" title="">&#10635;</span> </td> <tr id="entity-lbrksld"><td> <code title="">lbrksld;</code> </td> <td> U+0298F </td> <td> <span class="glyph" title="">&#10639;</span> </td> <tr id="entity-lbrkslu"><td> <code title="">lbrkslu;</code> </td> <td> U+0298D </td> <td> <span class="glyph" title="">&#10637;</span> </td> <tr id="entity-lcaron"><td> <code title="">lcaron;</code> </td> <td> U+0013E </td> <td> <span class="glyph" title="">&#318;</span> </td> <tr id="entity-lcedil"><td> <code title="">lcedil;</code> </td> <td> U+0013C </td> <td> <span class="glyph" title="">&#316;</span> </td> <tr id="entity-lceil"><td> <code title="">lceil;</code> </td> <td> U+02308 </td> <td> <span class="glyph" title="">&lceil;</span> </td> <tr id="entity-lcub"><td> <code title="">lcub;</code> </td> <td> U+0007B </td> <td> <span class="glyph" title="">{</span> </td> <tr id="entity-lcy"><td> <code title="">lcy;</code> </td> <td> U+0043B </td> <td> <span class="glyph" title="">&#1083;</span> </td> <tr id="entity-ldca"><td> <code title="">ldca;</code> </td> <td> U+02936 </td> <td> <span class="glyph" title="">&#10550;</span> </td> <tr id="entity-ldquo"><td> <code title="">ldquo;</code> </td> <td> U+0201C </td> <td> <span class="glyph" title="">&ldquo;</span> </td> <tr id="entity-ldquor"><td> <code title="">ldquor;</code> </td> <td> U+0201E </td> <td> <span class="glyph" title="">&bdquo;</span> </td> <tr id="entity-ldrdhar"><td> <code title="">ldrdhar;</code> </td> <td> U+02967 </td> <td> <span class="glyph" title="">&#10599;</span> </td> <tr id="entity-ldrushar"><td> <code title="">ldrushar;</code> </td> <td> U+0294B </td> <td> <span class="glyph" title="">&#10571;</span> </td> <tr id="entity-ldsh"><td> <code title="">ldsh;</code> </td> <td> U+021B2 </td> <td> <span class="glyph" title="">&#8626;</span> </td> <tr id="entity-le"><td> <code title="">le;</code> </td> <td> U+02264 </td> <td> <span class="glyph" title="">&le;</span> </td> <tr id="entity-leftarrow"><td> <code title="">leftarrow;</code> </td> <td> U+02190 </td> <td> <span class="glyph" title="">&larr;</span> </td> <tr id="entity-leftarrowtail"><td> <code title="">leftarrowtail;</code> </td> <td> U+021A2 </td> <td> <span class="glyph" title="">&#8610;</span> </td> <tr id="entity-leftharpoondown"><td> <code title="">leftharpoondown;</code> </td> <td> U+021BD </td> <td> <span class="glyph" title="">&#8637;</span> </td> <tr id="entity-leftharpoonup"><td> <code title="">leftharpoonup;</code> </td> <td> U+021BC </td> <td> <span class="glyph" title="">&#8636;</span> </td> <tr id="entity-leftleftarrows"><td> <code title="">leftleftarrows;</code> </td> <td> U+021C7 </td> <td> <span class="glyph" title="">&#8647;</span> </td> <tr id="entity-leftrightarrow"><td> <code title="">leftrightarrow;</code> </td> <td> U+02194 </td> <td> <span class="glyph" title="">&harr;</span> </td> <tr id="entity-leftrightarrows"><td> <code title="">leftrightarrows;</code> </td> <td> U+021C6 </td> <td> <span class="glyph" title="">&#8646;</span> </td> <tr id="entity-leftrightharpoons"><td> <code title="">leftrightharpoons;</code> </td> <td> U+021CB </td> <td> <span class="glyph" title="">&#8651;</span> </td> <tr id="entity-leftrightsquigarrow"><td> <code title="">leftrightsquigarrow;</code> </td> <td> U+021AD </td> <td> <span class="glyph" title="">&#8621;</span> </td> <tr id="entity-leftthreetimes"><td> <code title="">leftthreetimes;</code> </td> <td> U+022CB </td> <td> <span class="glyph" title="">&#8907;</span> </td> <tr id="entity-leg"><td> <code title="">leg;</code> </td> <td> U+022DA </td> <td> <span class="glyph" title="">&#8922;</span> </td> <tr id="entity-leq"><td> <code title="">leq;</code> </td> <td> U+02264 </td> <td> <span class="glyph" title="">&le;</span> </td> <tr id="entity-leqq"><td> <code title="">leqq;</code> </td> <td> U+02266 </td> <td> <span class="glyph" title="">&#8806;</span> </td> <tr id="entity-leqslant"><td> <code title="">leqslant;</code> </td> <td> U+02A7D </td> <td> <span class="glyph" title="">&#10877;</span> </td> <tr id="entity-les"><td> <code title="">les;</code> </td> <td> U+02A7D </td> <td> <span class="glyph" title="">&#10877;</span> </td> <tr id="entity-lescc"><td> <code title="">lescc;</code> </td> <td> U+02AA8 </td> <td> <span class="glyph" title="">&#10920;</span> </td> <tr id="entity-lesdot"><td> <code title="">lesdot;</code> </td> <td> U+02A7F </td> <td> <span class="glyph" title="">&#10879;</span> </td> <tr id="entity-lesdoto"><td> <code title="">lesdoto;</code> </td> <td> U+02A81 </td> <td> <span class="glyph" title="">&#10881;</span> </td> <tr id="entity-lesdotor"><td> <code title="">lesdotor;</code> </td> <td> U+02A83 </td> <td> <span class="glyph" title="">&#10883;</span> </td> <tr id="entity-lesg"><td> <code title="">lesg;</code> </td> <td> U+022DA U+0FE00 </td> <td> <span class="glyph compound" title="">&#8922;&#65024;</span> </td> <tr id="entity-lesges"><td> <code title="">lesges;</code> </td> <td> U+02A93 </td> <td> <span class="glyph" title="">&#10899;</span> </td> <tr id="entity-lessapprox"><td> <code title="">lessapprox;</code> </td> <td> U+02A85 </td> <td> <span class="glyph" title="">&#10885;</span> </td> <tr id="entity-lessdot"><td> <code title="">lessdot;</code> </td> <td> U+022D6 </td> <td> <span class="glyph" title="">&#8918;</span> </td> <tr id="entity-lesseqgtr"><td> <code title="">lesseqgtr;</code> </td> <td> U+022DA </td> <td> <span class="glyph" title="">&#8922;</span> </td> <tr id="entity-lesseqqgtr"><td> <code title="">lesseqqgtr;</code> </td> <td> U+02A8B </td> <td> <span class="glyph" title="">&#10891;</span> </td> <tr id="entity-lessgtr"><td> <code title="">lessgtr;</code> </td> <td> U+02276 </td> <td> <span class="glyph" title="">&#8822;</span> </td> <tr id="entity-lesssim"><td> <code title="">lesssim;</code> </td> <td> U+02272 </td> <td> <span class="glyph" title="">&#8818;</span> </td> <tr id="entity-lfisht"><td> <code title="">lfisht;</code> </td> <td> U+0297C </td> <td> <span class="glyph" title="">&#10620;</span> </td> <tr id="entity-lfloor"><td> <code title="">lfloor;</code> </td> <td> U+0230A </td> <td> <span class="glyph" title="">&lfloor;</span> </td> <tr id="entity-lfr"><td> <code title="">lfr;</code> </td> <td> U+1D529 </td> <td> <span class="glyph" title="">&#120105;</span> </td> <tr id="entity-lg"><td> <code title="">lg;</code> </td> <td> U+02276 </td> <td> <span class="glyph" title="">&#8822;</span> </td> <tr id="entity-lgE"><td> <code title="">lgE;</code> </td> <td> U+02A91 </td> <td> <span class="glyph" title="">&#10897;</span> </td> <tr id="entity-lhard"><td> <code title="">lhard;</code> </td> <td> U+021BD </td> <td> <span class="glyph" title="">&#8637;</span> </td> <tr id="entity-lharu"><td> <code title="">lharu;</code> </td> <td> U+021BC </td> <td> <span class="glyph" title="">&#8636;</span> </td> <tr id="entity-lharul"><td> <code title="">lharul;</code> </td> <td> U+0296A </td> <td> <span class="glyph" title="">&#10602;</span> </td> <tr id="entity-lhblk"><td> <code title="">lhblk;</code> </td> <td> U+02584 </td> <td> <span class="glyph" title="">&#9604;</span> </td> <tr id="entity-ljcy"><td> <code title="">ljcy;</code> </td> <td> U+00459 </td> <td> <span class="glyph" title="">&#1113;</span> </td> <tr id="entity-ll"><td> <code title="">ll;</code> </td> <td> U+0226A </td> <td> <span class="glyph" title="">&#8810;</span> </td> <tr id="entity-llarr"><td> <code title="">llarr;</code> </td> <td> U+021C7 </td> <td> <span class="glyph" title="">&#8647;</span> </td> <tr id="entity-llcorner"><td> <code title="">llcorner;</code> </td> <td> U+0231E </td> <td> <span class="glyph" title="">&#8990;</span> </td> <tr id="entity-llhard"><td> <code title="">llhard;</code> </td> <td> U+0296B </td> <td> <span class="glyph" title="">&#10603;</span> </td> <tr id="entity-lltri"><td> <code title="">lltri;</code> </td> <td> U+025FA </td> <td> <span class="glyph" title="">&#9722;</span> </td> <tr id="entity-lmidot"><td> <code title="">lmidot;</code> </td> <td> U+00140 </td> <td> <span class="glyph" title="">&#320;</span> </td> <tr id="entity-lmoust"><td> <code title="">lmoust;</code> </td> <td> U+023B0 </td> <td> <span class="glyph" title="">&#9136;</span> </td> <tr id="entity-lmoustache"><td> <code title="">lmoustache;</code> </td> <td> U+023B0 </td> <td> <span class="glyph" title="">&#9136;</span> </td> <tr id="entity-lnE"><td> <code title="">lnE;</code> </td> <td> U+02268 </td> <td> <span class="glyph" title="">&#8808;</span> </td> <tr id="entity-lnap"><td> <code title="">lnap;</code> </td> <td> U+02A89 </td> <td> <span class="glyph" title="">&#10889;</span> </td> <tr id="entity-lnapprox"><td> <code title="">lnapprox;</code> </td> <td> U+02A89 </td> <td> <span class="glyph" title="">&#10889;</span> </td> <tr id="entity-lne"><td> <code title="">lne;</code> </td> <td> U+02A87 </td> <td> <span class="glyph" title="">&#10887;</span> </td> <tr id="entity-lneq"><td> <code title="">lneq;</code> </td> <td> U+02A87 </td> <td> <span class="glyph" title="">&#10887;</span> </td> <tr id="entity-lneqq"><td> <code title="">lneqq;</code> </td> <td> U+02268 </td> <td> <span class="glyph" title="">&#8808;</span> </td> <tr id="entity-lnsim"><td> <code title="">lnsim;</code> </td> <td> U+022E6 </td> <td> <span class="glyph" title="">&#8934;</span> </td> <tr id="entity-loang"><td> <code title="">loang;</code> </td> <td> U+027EC </td> <td> <span class="glyph" title="">&#10220;</span> </td> <tr id="entity-loarr"><td> <code title="">loarr;</code> </td> <td> U+021FD </td> <td> <span class="glyph" title="">&#8701;</span> </td> <tr id="entity-lobrk"><td> <code title="">lobrk;</code> </td> <td> U+027E6 </td> <td> <span class="glyph" title="">&#10214;</span> </td> <tr id="entity-longleftarrow"><td> <code title="">longleftarrow;</code> </td> <td> U+027F5 </td> <td> <span class="glyph" title="">&#10229;</span> </td> <tr id="entity-longleftrightarrow"><td> <code title="">longleftrightarrow;</code> </td> <td> U+027F7 </td> <td> <span class="glyph" title="">&#10231;</span> </td> <tr id="entity-longmapsto"><td> <code title="">longmapsto;</code> </td> <td> U+027FC </td> <td> <span class="glyph" title="">&#10236;</span> </td> <tr id="entity-longrightarrow"><td> <code title="">longrightarrow;</code> </td> <td> U+027F6 </td> <td> <span class="glyph" title="">&#10230;</span> </td> <tr id="entity-looparrowleft"><td> <code title="">looparrowleft;</code> </td> <td> U+021AB </td> <td> <span class="glyph" title="">&#8619;</span> </td> <tr id="entity-looparrowright"><td> <code title="">looparrowright;</code> </td> <td> U+021AC </td> <td> <span class="glyph" title="">&#8620;</span> </td> <tr id="entity-lopar"><td> <code title="">lopar;</code> </td> <td> U+02985 </td> <td> <span class="glyph" title="">&#10629;</span> </td> <tr id="entity-lopf"><td> <code title="">lopf;</code> </td> <td> U+1D55D </td> <td> <span class="glyph" title="">&#120157;</span> </td> <tr id="entity-loplus"><td> <code title="">loplus;</code> </td> <td> U+02A2D </td> <td> <span class="glyph" title="">&#10797;</span> </td> <tr id="entity-lotimes"><td> <code title="">lotimes;</code> </td> <td> U+02A34 </td> <td> <span class="glyph" title="">&#10804;</span> </td> <tr id="entity-lowast"><td> <code title="">lowast;</code> </td> <td> U+02217 </td> <td> <span class="glyph" title="">&lowast;</span> </td> <tr id="entity-lowbar"><td> <code title="">lowbar;</code> </td> <td> U+0005F </td> <td> <span class="glyph" title="">_</span> </td> <tr id="entity-loz"><td> <code title="">loz;</code> </td> <td> U+025CA </td> <td> <span class="glyph" title="">&loz;</span> </td> <tr id="entity-lozenge"><td> <code title="">lozenge;</code> </td> <td> U+025CA </td> <td> <span class="glyph" title="">&loz;</span> </td> <tr id="entity-lozf"><td> <code title="">lozf;</code> </td> <td> U+029EB </td> <td> <span class="glyph" title="">&#10731;</span> </td> <tr id="entity-lpar"><td> <code title="">lpar;</code> </td> <td> U+00028 </td> <td> <span class="glyph" title="">(</span> </td> <tr id="entity-lparlt"><td> <code title="">lparlt;</code> </td> <td> U+02993 </td> <td> <span class="glyph" title="">&#10643;</span> </td> <tr id="entity-lrarr"><td> <code title="">lrarr;</code> </td> <td> U+021C6 </td> <td> <span class="glyph" title="">&#8646;</span> </td> <tr id="entity-lrcorner"><td> <code title="">lrcorner;</code> </td> <td> U+0231F </td> <td> <span class="glyph" title="">&#8991;</span> </td> <tr id="entity-lrhar"><td> <code title="">lrhar;</code> </td> <td> U+021CB </td> <td> <span class="glyph" title="">&#8651;</span> </td> <tr id="entity-lrhard"><td> <code title="">lrhard;</code> </td> <td> U+0296D </td> <td> <span class="glyph" title="">&#10605;</span> </td> <tr id="entity-lrm"><td> <code title="">lrm;</code> </td> <td> U+0200E </td> <td> <span class="glyph" title="">&lrm;</span> </td> <tr id="entity-lrtri"><td> <code title="">lrtri;</code> </td> <td> U+022BF </td> <td> <span class="glyph" title="">&#8895;</span> </td> <tr id="entity-lsaquo"><td> <code title="">lsaquo;</code> </td> <td> U+02039 </td> <td> <span class="glyph" title="">&lsaquo;</span> </td> <tr id="entity-lscr"><td> <code title="">lscr;</code> </td> <td> U+1D4C1 </td> <td> <span class="glyph" title="">&#120001;</span> </td> <tr id="entity-lsh"><td> <code title="">lsh;</code> </td> <td> U+021B0 </td> <td> <span class="glyph" title="">&#8624;</span> </td> <tr id="entity-lsim"><td> <code title="">lsim;</code> </td> <td> U+02272 </td> <td> <span class="glyph" title="">&#8818;</span> </td> <tr id="entity-lsime"><td> <code title="">lsime;</code> </td> <td> U+02A8D </td> <td> <span class="glyph" title="">&#10893;</span> </td> <tr id="entity-lsimg"><td> <code title="">lsimg;</code> </td> <td> U+02A8F </td> <td> <span class="glyph" title="">&#10895;</span> </td> <tr id="entity-lsqb"><td> <code title="">lsqb;</code> </td> <td> U+0005B </td> <td> <span class="glyph" title="">[</span> </td> <tr id="entity-lsquo"><td> <code title="">lsquo;</code> </td> <td> U+02018 </td> <td> <span class="glyph" title="">&lsquo;</span> </td> <tr id="entity-lsquor"><td> <code title="">lsquor;</code> </td> <td> U+0201A </td> <td> <span class="glyph" title="">&sbquo;</span> </td> <tr id="entity-lstrok"><td> <code title="">lstrok;</code> </td> <td> U+00142 </td> <td> <span class="glyph" title="">&#322;</span> </td> <tr id="entity-lt"><td> <code title="">lt;</code> </td> <td> U+0003C </td> <td> <span class="glyph" title="">&lt;</span> </td> <tr id="entity-ltcc"><td> <code title="">ltcc;</code> </td> <td> U+02AA6 </td> <td> <span class="glyph" title="">&#10918;</span> </td> <tr id="entity-ltcir"><td> <code title="">ltcir;</code> </td> <td> U+02A79 </td> <td> <span class="glyph" title="">&#10873;</span> </td> <tr id="entity-ltdot"><td> <code title="">ltdot;</code> </td> <td> U+022D6 </td> <td> <span class="glyph" title="">&#8918;</span> </td> <tr id="entity-lthree"><td> <code title="">lthree;</code> </td> <td> U+022CB </td> <td> <span class="glyph" title="">&#8907;</span> </td> <tr id="entity-ltimes"><td> <code title="">ltimes;</code> </td> <td> U+022C9 </td> <td> <span class="glyph" title="">&#8905;</span> </td> <tr id="entity-ltlarr"><td> <code title="">ltlarr;</code> </td> <td> U+02976 </td> <td> <span class="glyph" title="">&#10614;</span> </td> <tr id="entity-ltquest"><td> <code title="">ltquest;</code> </td> <td> U+02A7B </td> <td> <span class="glyph" title="">&#10875;</span> </td> <tr id="entity-ltrPar"><td> <code title="">ltrPar;</code> </td> <td> U+02996 </td> <td> <span class="glyph" title="">&#10646;</span> </td> <tr id="entity-ltri"><td> <code title="">ltri;</code> </td> <td> U+025C3 </td> <td> <span class="glyph" title="">&#9667;</span> </td> <tr id="entity-ltrie"><td> <code title="">ltrie;</code> </td> <td> U+022B4 </td> <td> <span class="glyph" title="">&#8884;</span> </td> <tr id="entity-ltrif"><td> <code title="">ltrif;</code> </td> <td> U+025C2 </td> <td> <span class="glyph" title="">&#9666;</span> </td> <tr id="entity-lurdshar"><td> <code title="">lurdshar;</code> </td> <td> U+0294A </td> <td> <span class="glyph" title="">&#10570;</span> </td> <tr id="entity-luruhar"><td> <code title="">luruhar;</code> </td> <td> U+02966 </td> <td> <span class="glyph" title="">&#10598;</span> </td> <tr id="entity-lvertneqq"><td> <code title="">lvertneqq;</code> </td> <td> U+02268 U+0FE00 </td> <td> <span class="glyph compound" title="">&#8808;&#65024;</span> </td> <tr id="entity-lvnE"><td> <code title="">lvnE;</code> </td> <td> U+02268 U+0FE00 </td> <td> <span class="glyph compound" title="">&#8808;&#65024;</span> </td> <tr id="entity-mDDot"><td> <code title="">mDDot;</code> </td> <td> U+0223A </td> <td> <span class="glyph" title="">&#8762;</span> </td> <tr id="entity-macr"><td> <code title="">macr;</code> </td> <td> U+000AF </td> <td> <span class="glyph" title="">&macr;</span> </td> <tr id="entity-male"><td> <code title="">male;</code> </td> <td> U+02642 </td> <td> <span class="glyph" title="">&#9794;</span> </td> <tr id="entity-malt"><td> <code title="">malt;</code> </td> <td> U+02720 </td> <td> <span class="glyph" title="">&#10016;</span> </td> <tr id="entity-maltese"><td> <code title="">maltese;</code> </td> <td> U+02720 </td> <td> <span class="glyph" title="">&#10016;</span> </td> <tr id="entity-map"><td> <code title="">map;</code> </td> <td> U+021A6 </td> <td> <span class="glyph" title="">&#8614;</span> </td> <tr id="entity-mapsto"><td> <code title="">mapsto;</code> </td> <td> U+021A6 </td> <td> <span class="glyph" title="">&#8614;</span> </td> <tr id="entity-mapstodown"><td> <code title="">mapstodown;</code> </td> <td> U+021A7 </td> <td> <span class="glyph" title="">&#8615;</span> </td> <tr id="entity-mapstoleft"><td> <code title="">mapstoleft;</code> </td> <td> U+021A4 </td> <td> <span class="glyph" title="">&#8612;</span> </td> <tr id="entity-mapstoup"><td> <code title="">mapstoup;</code> </td> <td> U+021A5 </td> <td> <span class="glyph" title="">&#8613;</span> </td> <tr id="entity-marker"><td> <code title="">marker;</code> </td> <td> U+025AE </td> <td> <span class="glyph" title="">&#9646;</span> </td> <tr id="entity-mcomma"><td> <code title="">mcomma;</code> </td> <td> U+02A29 </td> <td> <span class="glyph" title="">&#10793;</span> </td> <tr id="entity-mcy"><td> <code title="">mcy;</code> </td> <td> U+0043C </td> <td> <span class="glyph" title="">&#1084;</span> </td> <tr id="entity-mdash"><td> <code title="">mdash;</code> </td> <td> U+02014 </td> <td> <span class="glyph" title="">&mdash;</span> </td> <tr id="entity-measuredangle"><td> <code title="">measuredangle;</code> </td> <td> U+02221 </td> <td> <span class="glyph" title="">&#8737;</span> </td> <tr id="entity-mfr"><td> <code title="">mfr;</code> </td> <td> U+1D52A </td> <td> <span class="glyph" title="">&#120106;</span> </td> <tr id="entity-mho"><td> <code title="">mho;</code> </td> <td> U+02127 </td> <td> <span class="glyph" title="">&#8487;</span> </td> <tr id="entity-micro"><td> <code title="">micro;</code> </td> <td> U+000B5 </td> <td> <span class="glyph" title="">&micro;</span> </td> <tr id="entity-mid"><td> <code title="">mid;</code> </td> <td> U+02223 </td> <td> <span class="glyph" title="">&#8739;</span> </td> <tr id="entity-midast"><td> <code title="">midast;</code> </td> <td> U+0002A </td> <td> <span class="glyph" title="">*</span> </td> <tr id="entity-midcir"><td> <code title="">midcir;</code> </td> <td> U+02AF0 </td> <td> <span class="glyph" title="">&#10992;</span> </td> <tr id="entity-middot"><td> <code title="">middot;</code> </td> <td> U+000B7 </td> <td> <span class="glyph" title="">&middot;</span> </td> <tr id="entity-minus"><td> <code title="">minus;</code> </td> <td> U+02212 </td> <td> <span class="glyph" title="">&minus;</span> </td> <tr id="entity-minusb"><td> <code title="">minusb;</code> </td> <td> U+0229F </td> <td> <span class="glyph" title="">&#8863;</span> </td> <tr id="entity-minusd"><td> <code title="">minusd;</code> </td> <td> U+02238 </td> <td> <span class="glyph" title="">&#8760;</span> </td> <tr id="entity-minusdu"><td> <code title="">minusdu;</code> </td> <td> U+02A2A </td> <td> <span class="glyph" title="">&#10794;</span> </td> <tr id="entity-mlcp"><td> <code title="">mlcp;</code> </td> <td> U+02ADB </td> <td> <span class="glyph" title="">&#10971;</span> </td> <tr id="entity-mldr"><td> <code title="">mldr;</code> </td> <td> U+02026 </td> <td> <span class="glyph" title="">&hellip;</span> </td> <tr id="entity-mnplus"><td> <code title="">mnplus;</code> </td> <td> U+02213 </td> <td> <span class="glyph" title="">&#8723;</span> </td> <tr id="entity-models"><td> <code title="">models;</code> </td> <td> U+022A7 </td> <td> <span class="glyph" title="">&#8871;</span> </td> <tr id="entity-mopf"><td> <code title="">mopf;</code> </td> <td> U+1D55E </td> <td> <span class="glyph" title="">&#120158;</span> </td> <tr id="entity-mp"><td> <code title="">mp;</code> </td> <td> U+02213 </td> <td> <span class="glyph" title="">&#8723;</span> </td> <tr id="entity-mscr"><td> <code title="">mscr;</code> </td> <td> U+1D4C2 </td> <td> <span class="glyph" title="">&#120002;</span> </td> <tr id="entity-mstpos"><td> <code title="">mstpos;</code> </td> <td> U+0223E </td> <td> <span class="glyph" title="">&#8766;</span> </td> <tr id="entity-mu"><td> <code title="">mu;</code> </td> <td> U+003BC </td> <td> <span class="glyph" title="">&mu;</span> </td> <tr id="entity-multimap"><td> <code title="">multimap;</code> </td> <td> U+022B8 </td> <td> <span class="glyph" title="">&#8888;</span> </td> <tr id="entity-mumap"><td> <code title="">mumap;</code> </td> <td> U+022B8 </td> <td> <span class="glyph" title="">&#8888;</span> </td> <tr id="entity-nGg"><td> <code title="">nGg;</code> </td> <td> U+022D9 U+00338 </td> <td> <span class="glyph compound" title="">&#8921;&#824;</span> </td> <tr id="entity-nGt"><td> <code title="">nGt;</code> </td> <td> U+0226B U+020D2 </td> <td> <span class="glyph compound" title="">&#8811;&#8402;</span> </td> <tr id="entity-nGtv"><td> <code title="">nGtv;</code> </td> <td> U+0226B U+00338 </td> <td> <span class="glyph compound" title="">&#8811;&#824;</span> </td> <tr id="entity-nLeftarrow"><td> <code title="">nLeftarrow;</code> </td> <td> U+021CD </td> <td> <span class="glyph" title="">&#8653;</span> </td> <tr id="entity-nLeftrightarrow"><td> <code title="">nLeftrightarrow;</code> </td> <td> U+021CE </td> <td> <span class="glyph" title="">&#8654;</span> </td> <tr id="entity-nLl"><td> <code title="">nLl;</code> </td> <td> U+022D8 U+00338 </td> <td> <span class="glyph compound" title="">&#8920;&#824;</span> </td> <tr id="entity-nLt"><td> <code title="">nLt;</code> </td> <td> U+0226A U+020D2 </td> <td> <span class="glyph compound" title="">&#8810;&#8402;</span> </td> <tr id="entity-nLtv"><td> <code title="">nLtv;</code> </td> <td> U+0226A U+00338 </td> <td> <span class="glyph compound" title="">&#8810;&#824;</span> </td> <tr id="entity-nRightarrow"><td> <code title="">nRightarrow;</code> </td> <td> U+021CF </td> <td> <span class="glyph" title="">&#8655;</span> </td> <tr id="entity-nVDash"><td> <code title="">nVDash;</code> </td> <td> U+022AF </td> <td> <span class="glyph" title="">&#8879;</span> </td> <tr id="entity-nVdash"><td> <code title="">nVdash;</code> </td> <td> U+022AE </td> <td> <span class="glyph" title="">&#8878;</span> </td> <tr id="entity-nabla"><td> <code title="">nabla;</code> </td> <td> U+02207 </td> <td> <span class="glyph" title="">&nabla;</span> </td> <tr id="entity-nacute"><td> <code title="">nacute;</code> </td> <td> U+00144 </td> <td> <span class="glyph" title="">&#324;</span> </td> <tr id="entity-nang"><td> <code title="">nang;</code> </td> <td> U+02220 U+020D2 </td> <td> <span class="glyph compound" title="">&ang;&#8402;</span> </td> <tr id="entity-nap"><td> <code title="">nap;</code> </td> <td> U+02249 </td> <td> <span class="glyph" title="">&#8777;</span> </td> <tr id="entity-napE"><td> <code title="">napE;</code> </td> <td> U+02A70 U+00338 </td> <td> <span class="glyph compound" title="">&#10864;&#824;</span> </td> <tr id="entity-napid"><td> <code title="">napid;</code> </td> <td> U+0224B U+00338 </td> <td> <span class="glyph compound" title="">&#8779;&#824;</span> </td> <tr id="entity-napos"><td> <code title="">napos;</code> </td> <td> U+00149 </td> <td> <span class="glyph" title="">&#329;</span> </td> <tr id="entity-napprox"><td> <code title="">napprox;</code> </td> <td> U+02249 </td> <td> <span class="glyph" title="">&#8777;</span> </td> <tr id="entity-natur"><td> <code title="">natur;</code> </td> <td> U+0266E </td> <td> <span class="glyph" title="">&#9838;</span> </td> <tr id="entity-natural"><td> <code title="">natural;</code> </td> <td> U+0266E </td> <td> <span class="glyph" title="">&#9838;</span> </td> <tr id="entity-naturals"><td> <code title="">naturals;</code> </td> <td> U+02115 </td> <td> <span class="glyph" title="">&#8469;</span> </td> <tr id="entity-nbsp"><td> <code title="">nbsp;</code> </td> <td> U+000A0 </td> <td> <span class="glyph" title="">&nbsp;</span> </td> <tr id="entity-nbump"><td> <code title="">nbump;</code> </td> <td> U+0224E U+00338 </td> <td> <span class="glyph compound" title="">&#8782;&#824;</span> </td> <tr id="entity-nbumpe"><td> <code title="">nbumpe;</code> </td> <td> U+0224F U+00338 </td> <td> <span class="glyph compound" title="">&#8783;&#824;</span> </td> <tr id="entity-ncap"><td> <code title="">ncap;</code> </td> <td> U+02A43 </td> <td> <span class="glyph" title="">&#10819;</span> </td> <tr id="entity-ncaron"><td> <code title="">ncaron;</code> </td> <td> U+00148 </td> <td> <span class="glyph" title="">&#328;</span> </td> <tr id="entity-ncedil"><td> <code title="">ncedil;</code> </td> <td> U+00146 </td> <td> <span class="glyph" title="">&#326;</span> </td> <tr id="entity-ncong"><td> <code title="">ncong;</code> </td> <td> U+02247 </td> <td> <span class="glyph" title="">&#8775;</span> </td> <tr id="entity-ncongdot"><td> <code title="">ncongdot;</code> </td> <td> U+02A6D U+00338 </td> <td> <span class="glyph compound" title="">&#10861;&#824;</span> </td> <tr id="entity-ncup"><td> <code title="">ncup;</code> </td> <td> U+02A42 </td> <td> <span class="glyph" title="">&#10818;</span> </td> <tr id="entity-ncy"><td> <code title="">ncy;</code> </td> <td> U+0043D </td> <td> <span class="glyph" title="">&#1085;</span> </td> <tr id="entity-ndash"><td> <code title="">ndash;</code> </td> <td> U+02013 </td> <td> <span class="glyph" title="">&ndash;</span> </td> <tr id="entity-ne"><td> <code title="">ne;</code> </td> <td> U+02260 </td> <td> <span class="glyph" title="">&ne;</span> </td> <tr id="entity-neArr"><td> <code title="">neArr;</code> </td> <td> U+021D7 </td> <td> <span class="glyph" title="">&#8663;</span> </td> <tr id="entity-nearhk"><td> <code title="">nearhk;</code> </td> <td> U+02924 </td> <td> <span class="glyph" title="">&#10532;</span> </td> <tr id="entity-nearr"><td> <code title="">nearr;</code> </td> <td> U+02197 </td> <td> <span class="glyph" title="">&#8599;</span> </td> <tr id="entity-nearrow"><td> <code title="">nearrow;</code> </td> <td> U+02197 </td> <td> <span class="glyph" title="">&#8599;</span> </td> <tr id="entity-nedot"><td> <code title="">nedot;</code> </td> <td> U+02250 U+00338 </td> <td> <span class="glyph compound" title="">&#8784;&#824;</span> </td> <tr id="entity-nequiv"><td> <code title="">nequiv;</code> </td> <td> U+02262 </td> <td> <span class="glyph" title="">&#8802;</span> </td> <tr id="entity-nesear"><td> <code title="">nesear;</code> </td> <td> U+02928 </td> <td> <span class="glyph" title="">&#10536;</span> </td> <tr id="entity-nesim"><td> <code title="">nesim;</code> </td> <td> U+02242 U+00338 </td> <td> <span class="glyph compound" title="">&#8770;&#824;</span> </td> <tr id="entity-nexist"><td> <code title="">nexist;</code> </td> <td> U+02204 </td> <td> <span class="glyph" title="">&#8708;</span> </td> <tr id="entity-nexists"><td> <code title="">nexists;</code> </td> <td> U+02204 </td> <td> <span class="glyph" title="">&#8708;</span> </td> <tr id="entity-nfr"><td> <code title="">nfr;</code> </td> <td> U+1D52B </td> <td> <span class="glyph" title="">&#120107;</span> </td> <tr id="entity-ngE"><td> <code title="">ngE;</code> </td> <td> U+02267 U+00338 </td> <td> <span class="glyph compound" title="">&#8807;&#824;</span> </td> <tr id="entity-nge"><td> <code title="">nge;</code> </td> <td> U+02271 </td> <td> <span class="glyph" title="">&#8817;</span> </td> <tr id="entity-ngeq"><td> <code title="">ngeq;</code> </td> <td> U+02271 </td> <td> <span class="glyph" title="">&#8817;</span> </td> <tr id="entity-ngeqq"><td> <code title="">ngeqq;</code> </td> <td> U+02267 U+00338 </td> <td> <span class="glyph compound" title="">&#8807;&#824;</span> </td> <tr id="entity-ngeqslant"><td> <code title="">ngeqslant;</code> </td> <td> U+02A7E U+00338 </td> <td> <span class="glyph compound" title="">&#10878;&#824;</span> </td> <tr id="entity-nges"><td> <code title="">nges;</code> </td> <td> U+02A7E U+00338 </td> <td> <span class="glyph compound" title="">&#10878;&#824;</span> </td> <tr id="entity-ngsim"><td> <code title="">ngsim;</code> </td> <td> U+02275 </td> <td> <span class="glyph" title="">&#8821;</span> </td> <tr id="entity-ngt"><td> <code title="">ngt;</code> </td> <td> U+0226F </td> <td> <span class="glyph" title="">&#8815;</span> </td> <tr id="entity-ngtr"><td> <code title="">ngtr;</code> </td> <td> U+0226F </td> <td> <span class="glyph" title="">&#8815;</span> </td> <tr id="entity-nhArr"><td> <code title="">nhArr;</code> </td> <td> U+021CE </td> <td> <span class="glyph" title="">&#8654;</span> </td> <tr id="entity-nharr"><td> <code title="">nharr;</code> </td> <td> U+021AE </td> <td> <span class="glyph" title="">&#8622;</span> </td> <tr id="entity-nhpar"><td> <code title="">nhpar;</code> </td> <td> U+02AF2 </td> <td> <span class="glyph" title="">&#10994;</span> </td> <tr id="entity-ni"><td> <code title="">ni;</code> </td> <td> U+0220B </td> <td> <span class="glyph" title="">&ni;</span> </td> <tr id="entity-nis"><td> <code title="">nis;</code> </td> <td> U+022FC </td> <td> <span class="glyph" title="">&#8956;</span> </td> <tr id="entity-nisd"><td> <code title="">nisd;</code> </td> <td> U+022FA </td> <td> <span class="glyph" title="">&#8954;</span> </td> <tr id="entity-niv"><td> <code title="">niv;</code> </td> <td> U+0220B </td> <td> <span class="glyph" title="">&ni;</span> </td> <tr id="entity-njcy"><td> <code title="">njcy;</code> </td> <td> U+0045A </td> <td> <span class="glyph" title="">&#1114;</span> </td> <tr id="entity-nlArr"><td> <code title="">nlArr;</code> </td> <td> U+021CD </td> <td> <span class="glyph" title="">&#8653;</span> </td> <tr id="entity-nlE"><td> <code title="">nlE;</code> </td> <td> U+02266 U+00338 </td> <td> <span class="glyph compound" title="">&#8806;&#824;</span> </td> <tr id="entity-nlarr"><td> <code title="">nlarr;</code> </td> <td> U+0219A </td> <td> <span class="glyph" title="">&#8602;</span> </td> <tr id="entity-nldr"><td> <code title="">nldr;</code> </td> <td> U+02025 </td> <td> <span class="glyph" title="">&#8229;</span> </td> <tr id="entity-nle"><td> <code title="">nle;</code> </td> <td> U+02270 </td> <td> <span class="glyph" title="">&#8816;</span> </td> <tr id="entity-nleftarrow"><td> <code title="">nleftarrow;</code> </td> <td> U+0219A </td> <td> <span class="glyph" title="">&#8602;</span> </td> <tr id="entity-nleftrightarrow"><td> <code title="">nleftrightarrow;</code> </td> <td> U+021AE </td> <td> <span class="glyph" title="">&#8622;</span> </td> <tr id="entity-nleq"><td> <code title="">nleq;</code> </td> <td> U+02270 </td> <td> <span class="glyph" title="">&#8816;</span> </td> <tr id="entity-nleqq"><td> <code title="">nleqq;</code> </td> <td> U+02266 U+00338 </td> <td> <span class="glyph compound" title="">&#8806;&#824;</span> </td> <tr id="entity-nleqslant"><td> <code title="">nleqslant;</code> </td> <td> U+02A7D U+00338 </td> <td> <span class="glyph compound" title="">&#10877;&#824;</span> </td> <tr id="entity-nles"><td> <code title="">nles;</code> </td> <td> U+02A7D U+00338 </td> <td> <span class="glyph compound" title="">&#10877;&#824;</span> </td> <tr id="entity-nless"><td> <code title="">nless;</code> </td> <td> U+0226E </td> <td> <span class="glyph" title="">&#8814;</span> </td> <tr id="entity-nlsim"><td> <code title="">nlsim;</code> </td> <td> U+02274 </td> <td> <span class="glyph" title="">&#8820;</span> </td> <tr id="entity-nlt"><td> <code title="">nlt;</code> </td> <td> U+0226E </td> <td> <span class="glyph" title="">&#8814;</span> </td> <tr id="entity-nltri"><td> <code title="">nltri;</code> </td> <td> U+022EA </td> <td> <span class="glyph" title="">&#8938;</span> </td> <tr id="entity-nltrie"><td> <code title="">nltrie;</code> </td> <td> U+022EC </td> <td> <span class="glyph" title="">&#8940;</span> </td> <tr id="entity-nmid"><td> <code title="">nmid;</code> </td> <td> U+02224 </td> <td> <span class="glyph" title="">&#8740;</span> </td> <tr id="entity-nopf"><td> <code title="">nopf;</code> </td> <td> U+1D55F </td> <td> <span class="glyph" title="">&#120159;</span> </td> <tr id="entity-not"><td> <code title="">not;</code> </td> <td> U+000AC </td> <td> <span class="glyph" title="">&not;</span> </td> <tr id="entity-notin"><td> <code title="">notin;</code> </td> <td> U+02209 </td> <td> <span class="glyph" title="">&notin;</span> </td> <tr id="entity-notinE"><td> <code title="">notinE;</code> </td> <td> U+022F9 U+00338 </td> <td> <span class="glyph compound" title="">&#8953;&#824;</span> </td> <tr id="entity-notindot"><td> <code title="">notindot;</code> </td> <td> U+022F5 U+00338 </td> <td> <span class="glyph compound" title="">&#8949;&#824;</span> </td> <tr id="entity-notinva"><td> <code title="">notinva;</code> </td> <td> U+02209 </td> <td> <span class="glyph" title="">&notin;</span> </td> <tr id="entity-notinvb"><td> <code title="">notinvb;</code> </td> <td> U+022F7 </td> <td> <span class="glyph" title="">&#8951;</span> </td> <tr id="entity-notinvc"><td> <code title="">notinvc;</code> </td> <td> U+022F6 </td> <td> <span class="glyph" title="">&#8950;</span> </td> <tr id="entity-notni"><td> <code title="">notni;</code> </td> <td> U+0220C </td> <td> <span class="glyph" title="">&#8716;</span> </td> <tr id="entity-notniva"><td> <code title="">notniva;</code> </td> <td> U+0220C </td> <td> <span class="glyph" title="">&#8716;</span> </td> <tr id="entity-notnivb"><td> <code title="">notnivb;</code> </td> <td> U+022FE </td> <td> <span class="glyph" title="">&#8958;</span> </td> <tr id="entity-notnivc"><td> <code title="">notnivc;</code> </td> <td> U+022FD </td> <td> <span class="glyph" title="">&#8957;</span> </td> <tr id="entity-npar"><td> <code title="">npar;</code> </td> <td> U+02226 </td> <td> <span class="glyph" title="">&#8742;</span> </td> <tr id="entity-nparallel"><td> <code title="">nparallel;</code> </td> <td> U+02226 </td> <td> <span class="glyph" title="">&#8742;</span> </td> <tr id="entity-nparsl"><td> <code title="">nparsl;</code> </td> <td> U+02AFD U+020E5 </td> <td> <span class="glyph compound" title="">&#11005;&#8421;</span> </td> <tr id="entity-npart"><td> <code title="">npart;</code> </td> <td> U+02202 U+00338 </td> <td> <span class="glyph compound" title="">&part;&#824;</span> </td> <tr id="entity-npolint"><td> <code title="">npolint;</code> </td> <td> U+02A14 </td> <td> <span class="glyph" title="">&#10772;</span> </td> <tr id="entity-npr"><td> <code title="">npr;</code> </td> <td> U+02280 </td> <td> <span class="glyph" title="">&#8832;</span> </td> <tr id="entity-nprcue"><td> <code title="">nprcue;</code> </td> <td> U+022E0 </td> <td> <span class="glyph" title="">&#8928;</span> </td> <tr id="entity-npre"><td> <code title="">npre;</code> </td> <td> U+02AAF U+00338 </td> <td> <span class="glyph compound" title="">&#10927;&#824;</span> </td> <tr id="entity-nprec"><td> <code title="">nprec;</code> </td> <td> U+02280 </td> <td> <span class="glyph" title="">&#8832;</span> </td> <tr id="entity-npreceq"><td> <code title="">npreceq;</code> </td> <td> U+02AAF U+00338 </td> <td> <span class="glyph compound" title="">&#10927;&#824;</span> </td> <tr id="entity-nrArr"><td> <code title="">nrArr;</code> </td> <td> U+021CF </td> <td> <span class="glyph" title="">&#8655;</span> </td> <tr id="entity-nrarr"><td> <code title="">nrarr;</code> </td> <td> U+0219B </td> <td> <span class="glyph" title="">&#8603;</span> </td> <tr id="entity-nrarrc"><td> <code title="">nrarrc;</code> </td> <td> U+02933 U+00338 </td> <td> <span class="glyph compound" title="">&#10547;&#824;</span> </td> <tr id="entity-nrarrw"><td> <code title="">nrarrw;</code> </td> <td> U+0219D U+00338 </td> <td> <span class="glyph compound" title="">&#8605;&#824;</span> </td> <tr id="entity-nrightarrow"><td> <code title="">nrightarrow;</code> </td> <td> U+0219B </td> <td> <span class="glyph" title="">&#8603;</span> </td> <tr id="entity-nrtri"><td> <code title="">nrtri;</code> </td> <td> U+022EB </td> <td> <span class="glyph" title="">&#8939;</span> </td> <tr id="entity-nrtrie"><td> <code title="">nrtrie;</code> </td> <td> U+022ED </td> <td> <span class="glyph" title="">&#8941;</span> </td> <tr id="entity-nsc"><td> <code title="">nsc;</code> </td> <td> U+02281 </td> <td> <span class="glyph" title="">&#8833;</span> </td> <tr id="entity-nsccue"><td> <code title="">nsccue;</code> </td> <td> U+022E1 </td> <td> <span class="glyph" title="">&#8929;</span> </td> <tr id="entity-nsce"><td> <code title="">nsce;</code> </td> <td> U+02AB0 U+00338 </td> <td> <span class="glyph compound" title="">&#10928;&#824;</span> </td> <tr id="entity-nscr"><td> <code title="">nscr;</code> </td> <td> U+1D4C3 </td> <td> <span class="glyph" title="">&#120003;</span> </td> <tr id="entity-nshortmid"><td> <code title="">nshortmid;</code> </td> <td> U+02224 </td> <td> <span class="glyph" title="">&#8740;</span> </td> <tr id="entity-nshortparallel"><td> <code title="">nshortparallel;</code> </td> <td> U+02226 </td> <td> <span class="glyph" title="">&#8742;</span> </td> <tr id="entity-nsim"><td> <code title="">nsim;</code> </td> <td> U+02241 </td> <td> <span class="glyph" title="">&#8769;</span> </td> <tr id="entity-nsime"><td> <code title="">nsime;</code> </td> <td> U+02244 </td> <td> <span class="glyph" title="">&#8772;</span> </td> <tr id="entity-nsimeq"><td> <code title="">nsimeq;</code> </td> <td> U+02244 </td> <td> <span class="glyph" title="">&#8772;</span> </td> <tr id="entity-nsmid"><td> <code title="">nsmid;</code> </td> <td> U+02224 </td> <td> <span class="glyph" title="">&#8740;</span> </td> <tr id="entity-nspar"><td> <code title="">nspar;</code> </td> <td> U+02226 </td> <td> <span class="glyph" title="">&#8742;</span> </td> <tr id="entity-nsqsube"><td> <code title="">nsqsube;</code> </td> <td> U+022E2 </td> <td> <span class="glyph" title="">&#8930;</span> </td> <tr id="entity-nsqsupe"><td> <code title="">nsqsupe;</code> </td> <td> U+022E3 </td> <td> <span class="glyph" title="">&#8931;</span> </td> <tr id="entity-nsub"><td> <code title="">nsub;</code> </td> <td> U+02284 </td> <td> <span class="glyph" title="">&nsub;</span> </td> <tr id="entity-nsubE"><td> <code title="">nsubE;</code> </td> <td> U+02AC5 U+00338 </td> <td> <span class="glyph compound" title="">&#10949;&#824;</span> </td> <tr id="entity-nsube"><td> <code title="">nsube;</code> </td> <td> U+02288 </td> <td> <span class="glyph" title="">&#8840;</span> </td> <tr id="entity-nsubset"><td> <code title="">nsubset;</code> </td> <td> U+02282 U+020D2 </td> <td> <span class="glyph compound" title="">&sub;&#8402;</span> </td> <tr id="entity-nsubseteq"><td> <code title="">nsubseteq;</code> </td> <td> U+02288 </td> <td> <span class="glyph" title="">&#8840;</span> </td> <tr id="entity-nsubseteqq"><td> <code title="">nsubseteqq;</code> </td> <td> U+02AC5 U+00338 </td> <td> <span class="glyph compound" title="">&#10949;&#824;</span> </td> <tr id="entity-nsucc"><td> <code title="">nsucc;</code> </td> <td> U+02281 </td> <td> <span class="glyph" title="">&#8833;</span> </td> <tr id="entity-nsucceq"><td> <code title="">nsucceq;</code> </td> <td> U+02AB0 U+00338 </td> <td> <span class="glyph compound" title="">&#10928;&#824;</span> </td> <tr id="entity-nsup"><td> <code title="">nsup;</code> </td> <td> U+02285 </td> <td> <span class="glyph" title="">&#8837;</span> </td> <tr id="entity-nsupE"><td> <code title="">nsupE;</code> </td> <td> U+02AC6 U+00338 </td> <td> <span class="glyph compound" title="">&#10950;&#824;</span> </td> <tr id="entity-nsupe"><td> <code title="">nsupe;</code> </td> <td> U+02289 </td> <td> <span class="glyph" title="">&#8841;</span> </td> <tr id="entity-nsupset"><td> <code title="">nsupset;</code> </td> <td> U+02283 U+020D2 </td> <td> <span class="glyph compound" title="">&sup;&#8402;</span> </td> <tr id="entity-nsupseteq"><td> <code title="">nsupseteq;</code> </td> <td> U+02289 </td> <td> <span class="glyph" title="">&#8841;</span> </td> <tr id="entity-nsupseteqq"><td> <code title="">nsupseteqq;</code> </td> <td> U+02AC6 U+00338 </td> <td> <span class="glyph compound" title="">&#10950;&#824;</span> </td> <tr id="entity-ntgl"><td> <code title="">ntgl;</code> </td> <td> U+02279 </td> <td> <span class="glyph" title="">&#8825;</span> </td> <tr id="entity-ntilde"><td> <code title="">ntilde;</code> </td> <td> U+000F1 </td> <td> <span class="glyph" title="">&ntilde;</span> </td> <tr id="entity-ntlg"><td> <code title="">ntlg;</code> </td> <td> U+02278 </td> <td> <span class="glyph" title="">&#8824;</span> </td> <tr id="entity-ntriangleleft"><td> <code title="">ntriangleleft;</code> </td> <td> U+022EA </td> <td> <span class="glyph" title="">&#8938;</span> </td> <tr id="entity-ntrianglelefteq"><td> <code title="">ntrianglelefteq;</code> </td> <td> U+022EC </td> <td> <span class="glyph" title="">&#8940;</span> </td> <tr id="entity-ntriangleright"><td> <code title="">ntriangleright;</code> </td> <td> U+022EB </td> <td> <span class="glyph" title="">&#8939;</span> </td> <tr id="entity-ntrianglerighteq"><td> <code title="">ntrianglerighteq;</code> </td> <td> U+022ED </td> <td> <span class="glyph" title="">&#8941;</span> </td> <tr id="entity-nu"><td> <code title="">nu;</code> </td> <td> U+003BD </td> <td> <span class="glyph" title="">&nu;</span> </td> <tr id="entity-num"><td> <code title="">num;</code> </td> <td> U+00023 </td> <td> <span class="glyph" title="">#</span> </td> <tr id="entity-numero"><td> <code title="">numero;</code> </td> <td> U+02116 </td> <td> <span class="glyph" title="">&#8470;</span> </td> <tr id="entity-numsp"><td> <code title="">numsp;</code> </td> <td> U+02007 </td> <td> <span class="glyph" title="">&#8199;</span> </td> <tr id="entity-nvDash"><td> <code title="">nvDash;</code> </td> <td> U+022AD </td> <td> <span class="glyph" title="">&#8877;</span> </td> <tr id="entity-nvHarr"><td> <code title="">nvHarr;</code> </td> <td> U+02904 </td> <td> <span class="glyph" title="">&#10500;</span> </td> <tr id="entity-nvap"><td> <code title="">nvap;</code> </td> <td> U+0224D U+020D2 </td> <td> <span class="glyph compound" title="">&#8781;&#8402;</span> </td> <tr id="entity-nvdash"><td> <code title="">nvdash;</code> </td> <td> U+022AC </td> <td> <span class="glyph" title="">&#8876;</span> </td> <tr id="entity-nvge"><td> <code title="">nvge;</code> </td> <td> U+02265 U+020D2 </td> <td> <span class="glyph compound" title="">&ge;&#8402;</span> </td> <tr id="entity-nvgt"><td> <code title="">nvgt;</code> </td> <td> U+0003E U+020D2 </td> <td> <span class="glyph compound" title="">&gt;&#8402;</span> </td> <tr id="entity-nvinfin"><td> <code title="">nvinfin;</code> </td> <td> U+029DE </td> <td> <span class="glyph" title="">&#10718;</span> </td> <tr id="entity-nvlArr"><td> <code title="">nvlArr;</code> </td> <td> U+02902 </td> <td> <span class="glyph" title="">&#10498;</span> </td> <tr id="entity-nvle"><td> <code title="">nvle;</code> </td> <td> U+02264 U+020D2 </td> <td> <span class="glyph compound" title="">&le;&#8402;</span> </td> <tr id="entity-nvlt"><td> <code title="">nvlt;</code> </td> <td> U+0003C U+020D2 </td> <td> <span class="glyph compound" title="">&lt;&#8402;</span> </td> <tr id="entity-nvltrie"><td> <code title="">nvltrie;</code> </td> <td> U+022B4 U+020D2 </td> <td> <span class="glyph compound" title="">&#8884;&#8402;</span> </td> <tr id="entity-nvrArr"><td> <code title="">nvrArr;</code> </td> <td> U+02903 </td> <td> <span class="glyph" title="">&#10499;</span> </td> <tr id="entity-nvrtrie"><td> <code title="">nvrtrie;</code> </td> <td> U+022B5 U+020D2 </td> <td> <span class="glyph compound" title="">&#8885;&#8402;</span> </td> <tr id="entity-nvsim"><td> <code title="">nvsim;</code> </td> <td> U+0223C U+020D2 </td> <td> <span class="glyph compound" title="">&sim;&#8402;</span> </td> <tr id="entity-nwArr"><td> <code title="">nwArr;</code> </td> <td> U+021D6 </td> <td> <span class="glyph" title="">&#8662;</span> </td> <tr id="entity-nwarhk"><td> <code title="">nwarhk;</code> </td> <td> U+02923 </td> <td> <span class="glyph" title="">&#10531;</span> </td> <tr id="entity-nwarr"><td> <code title="">nwarr;</code> </td> <td> U+02196 </td> <td> <span class="glyph" title="">&#8598;</span> </td> <tr id="entity-nwarrow"><td> <code title="">nwarrow;</code> </td> <td> U+02196 </td> <td> <span class="glyph" title="">&#8598;</span> </td> <tr id="entity-nwnear"><td> <code title="">nwnear;</code> </td> <td> U+02927 </td> <td> <span class="glyph" title="">&#10535;</span> </td> <tr id="entity-oS"><td> <code title="">oS;</code> </td> <td> U+024C8 </td> <td> <span class="glyph" title="">&#9416;</span> </td> <tr id="entity-oacute"><td> <code title="">oacute;</code> </td> <td> U+000F3 </td> <td> <span class="glyph" title="">&oacute;</span> </td> <tr id="entity-oast"><td> <code title="">oast;</code> </td> <td> U+0229B </td> <td> <span class="glyph" title="">&#8859;</span> </td> <tr id="entity-ocir"><td> <code title="">ocir;</code> </td> <td> U+0229A </td> <td> <span class="glyph" title="">&#8858;</span> </td> <tr id="entity-ocirc"><td> <code title="">ocirc;</code> </td> <td> U+000F4 </td> <td> <span class="glyph" title="">&ocirc;</span> </td> <tr id="entity-ocy"><td> <code title="">ocy;</code> </td> <td> U+0043E </td> <td> <span class="glyph" title="">&#1086;</span> </td> <tr id="entity-odash"><td> <code title="">odash;</code> </td> <td> U+0229D </td> <td> <span class="glyph" title="">&#8861;</span> </td> <tr id="entity-odblac"><td> <code title="">odblac;</code> </td> <td> U+00151 </td> <td> <span class="glyph" title="">&#337;</span> </td> <tr id="entity-odiv"><td> <code title="">odiv;</code> </td> <td> U+02A38 </td> <td> <span class="glyph" title="">&#10808;</span> </td> <tr id="entity-odot"><td> <code title="">odot;</code> </td> <td> U+02299 </td> <td> <span class="glyph" title="">&#8857;</span> </td> <tr id="entity-odsold"><td> <code title="">odsold;</code> </td> <td> U+029BC </td> <td> <span class="glyph" title="">&#10684;</span> </td> <tr id="entity-oelig"><td> <code title="">oelig;</code> </td> <td> U+00153 </td> <td> <span class="glyph" title="">&oelig;</span> </td> <tr id="entity-ofcir"><td> <code title="">ofcir;</code> </td> <td> U+029BF </td> <td> <span class="glyph" title="">&#10687;</span> </td> <tr id="entity-ofr"><td> <code title="">ofr;</code> </td> <td> U+1D52C </td> <td> <span class="glyph" title="">&#120108;</span> </td> <tr id="entity-ogon"><td> <code title="">ogon;</code> </td> <td> U+002DB </td> <td> <span class="glyph" title="">&#731;</span> </td> <tr id="entity-ograve"><td> <code title="">ograve;</code> </td> <td> U+000F2 </td> <td> <span class="glyph" title="">&ograve;</span> </td> <tr id="entity-ogt"><td> <code title="">ogt;</code> </td> <td> U+029C1 </td> <td> <span class="glyph" title="">&#10689;</span> </td> <tr id="entity-ohbar"><td> <code title="">ohbar;</code> </td> <td> U+029B5 </td> <td> <span class="glyph" title="">&#10677;</span> </td> <tr id="entity-ohm"><td> <code title="">ohm;</code> </td> <td> U+003A9 </td> <td> <span class="glyph" title="">&Omega;</span> </td> <tr id="entity-oint"><td> <code title="">oint;</code> </td> <td> U+0222E </td> <td> <span class="glyph" title="">&#8750;</span> </td> <tr id="entity-olarr"><td> <code title="">olarr;</code> </td> <td> U+021BA </td> <td> <span class="glyph" title="">&#8634;</span> </td> <tr id="entity-olcir"><td> <code title="">olcir;</code> </td> <td> U+029BE </td> <td> <span class="glyph" title="">&#10686;</span> </td> <tr id="entity-olcross"><td> <code title="">olcross;</code> </td> <td> U+029BB </td> <td> <span class="glyph" title="">&#10683;</span> </td> <tr id="entity-oline"><td> <code title="">oline;</code> </td> <td> U+0203E </td> <td> <span class="glyph" title="">&oline;</span> </td> <tr id="entity-olt"><td> <code title="">olt;</code> </td> <td> U+029C0 </td> <td> <span class="glyph" title="">&#10688;</span> </td> <tr id="entity-omacr"><td> <code title="">omacr;</code> </td> <td> U+0014D </td> <td> <span class="glyph" title="">&#333;</span> </td> <tr id="entity-omega"><td> <code title="">omega;</code> </td> <td> U+003C9 </td> <td> <span class="glyph" title="">&omega;</span> </td> <tr id="entity-omicron"><td> <code title="">omicron;</code> </td> <td> U+003BF </td> <td> <span class="glyph" title="">&omicron;</span> </td> <tr id="entity-omid"><td> <code title="">omid;</code> </td> <td> U+029B6 </td> <td> <span class="glyph" title="">&#10678;</span> </td> <tr id="entity-ominus"><td> <code title="">ominus;</code> </td> <td> U+02296 </td> <td> <span class="glyph" title="">&#8854;</span> </td> <tr id="entity-oopf"><td> <code title="">oopf;</code> </td> <td> U+1D560 </td> <td> <span class="glyph" title="">&#120160;</span> </td> <tr id="entity-opar"><td> <code title="">opar;</code> </td> <td> U+029B7 </td> <td> <span class="glyph" title="">&#10679;</span> </td> <tr id="entity-operp"><td> <code title="">operp;</code> </td> <td> U+029B9 </td> <td> <span class="glyph" title="">&#10681;</span> </td> <tr id="entity-oplus"><td> <code title="">oplus;</code> </td> <td> U+02295 </td> <td> <span class="glyph" title="">&oplus;</span> </td> <tr id="entity-or"><td> <code title="">or;</code> </td> <td> U+02228 </td> <td> <span class="glyph" title="">&or;</span> </td> <tr id="entity-orarr"><td> <code title="">orarr;</code> </td> <td> U+021BB </td> <td> <span class="glyph" title="">&#8635;</span> </td> <tr id="entity-ord"><td> <code title="">ord;</code> </td> <td> U+02A5D </td> <td> <span class="glyph" title="">&#10845;</span> </td> <tr id="entity-order"><td> <code title="">order;</code> </td> <td> U+02134 </td> <td> <span class="glyph" title="">&#8500;</span> </td> <tr id="entity-orderof"><td> <code title="">orderof;</code> </td> <td> U+02134 </td> <td> <span class="glyph" title="">&#8500;</span> </td> <tr id="entity-ordf"><td> <code title="">ordf;</code> </td> <td> U+000AA </td> <td> <span class="glyph" title="">&ordf;</span> </td> <tr id="entity-ordm"><td> <code title="">ordm;</code> </td> <td> U+000BA </td> <td> <span class="glyph" title="">&ordm;</span> </td> <tr id="entity-origof"><td> <code title="">origof;</code> </td> <td> U+022B6 </td> <td> <span class="glyph" title="">&#8886;</span> </td> <tr id="entity-oror"><td> <code title="">oror;</code> </td> <td> U+02A56 </td> <td> <span class="glyph" title="">&#10838;</span> </td> <tr id="entity-orslope"><td> <code title="">orslope;</code> </td> <td> U+02A57 </td> <td> <span class="glyph" title="">&#10839;</span> </td> <tr id="entity-orv"><td> <code title="">orv;</code> </td> <td> U+02A5B </td> <td> <span class="glyph" title="">&#10843;</span> </td> <tr id="entity-oscr"><td> <code title="">oscr;</code> </td> <td> U+02134 </td> <td> <span class="glyph" title="">&#8500;</span> </td> <tr id="entity-oslash"><td> <code title="">oslash;</code> </td> <td> U+000F8 </td> <td> <span class="glyph" title="">&oslash;</span> </td> <tr id="entity-osol"><td> <code title="">osol;</code> </td> <td> U+02298 </td> <td> <span class="glyph" title="">&#8856;</span> </td> <tr id="entity-otilde"><td> <code title="">otilde;</code> </td> <td> U+000F5 </td> <td> <span class="glyph" title="">&otilde;</span> </td> <tr id="entity-otimes"><td> <code title="">otimes;</code> </td> <td> U+02297 </td> <td> <span class="glyph" title="">&otimes;</span> </td> <tr id="entity-otimesas"><td> <code title="">otimesas;</code> </td> <td> U+02A36 </td> <td> <span class="glyph" title="">&#10806;</span> </td> <tr id="entity-ouml"><td> <code title="">ouml;</code> </td> <td> U+000F6 </td> <td> <span class="glyph" title="">&ouml;</span> </td> <tr id="entity-ovbar"><td> <code title="">ovbar;</code> </td> <td> U+0233D </td> <td> <span class="glyph" title="">&#9021;</span> </td> <tr id="entity-par"><td> <code title="">par;</code> </td> <td> U+02225 </td> <td> <span class="glyph" title="">&#8741;</span> </td> <tr id="entity-para"><td> <code title="">para;</code> </td> <td> U+000B6 </td> <td> <span class="glyph" title="">&para;</span> </td> <tr id="entity-parallel"><td> <code title="">parallel;</code> </td> <td> U+02225 </td> <td> <span class="glyph" title="">&#8741;</span> </td> <tr id="entity-parsim"><td> <code title="">parsim;</code> </td> <td> U+02AF3 </td> <td> <span class="glyph" title="">&#10995;</span> </td> <tr id="entity-parsl"><td> <code title="">parsl;</code> </td> <td> U+02AFD </td> <td> <span class="glyph" title="">&#11005;</span> </td> <tr id="entity-part"><td> <code title="">part;</code> </td> <td> U+02202 </td> <td> <span class="glyph" title="">&part;</span> </td> <tr id="entity-pcy"><td> <code title="">pcy;</code> </td> <td> U+0043F </td> <td> <span class="glyph" title="">&#1087;</span> </td> <tr id="entity-percnt"><td> <code title="">percnt;</code> </td> <td> U+00025 </td> <td> <span class="glyph" title="">%</span> </td> <tr id="entity-period"><td> <code title="">period;</code> </td> <td> U+0002E </td> <td> <span class="glyph" title="">.</span> </td> <tr id="entity-permil"><td> <code title="">permil;</code> </td> <td> U+02030 </td> <td> <span class="glyph" title="">&permil;</span> </td> <tr id="entity-perp"><td> <code title="">perp;</code> </td> <td> U+022A5 </td> <td> <span class="glyph" title="">&perp;</span> </td> <tr id="entity-pertenk"><td> <code title="">pertenk;</code> </td> <td> U+02031 </td> <td> <span class="glyph" title="">&#8241;</span> </td> <tr id="entity-pfr"><td> <code title="">pfr;</code> </td> <td> U+1D52D </td> <td> <span class="glyph" title="">&#120109;</span> </td> <tr id="entity-phi"><td> <code title="">phi;</code> </td> <td> U+003C6 </td> <td> <span class="glyph" title="">&phi;</span> </td> <tr id="entity-phiv"><td> <code title="">phiv;</code> </td> <td> U+003D5 </td> <td> <span class="glyph" title="">&#981;</span> </td> <tr id="entity-phmmat"><td> <code title="">phmmat;</code> </td> <td> U+02133 </td> <td> <span class="glyph" title="">&#8499;</span> </td> <tr id="entity-phone"><td> <code title="">phone;</code> </td> <td> U+0260E </td> <td> <span class="glyph" title="">&#9742;</span> </td> <tr id="entity-pi"><td> <code title="">pi;</code> </td> <td> U+003C0 </td> <td> <span class="glyph" title="">&pi;</span> </td> <tr id="entity-pitchfork"><td> <code title="">pitchfork;</code> </td> <td> U+022D4 </td> <td> <span class="glyph" title="">&#8916;</span> </td> <tr id="entity-piv"><td> <code title="">piv;</code> </td> <td> U+003D6 </td> <td> <span class="glyph" title="">&piv;</span> </td> <tr id="entity-planck"><td> <code title="">planck;</code> </td> <td> U+0210F </td> <td> <span class="glyph" title="">&#8463;</span> </td> <tr id="entity-planckh"><td> <code title="">planckh;</code> </td> <td> U+0210E </td> <td> <span class="glyph" title="">&#8462;</span> </td> <tr id="entity-plankv"><td> <code title="">plankv;</code> </td> <td> U+0210F </td> <td> <span class="glyph" title="">&#8463;</span> </td> <tr id="entity-plus"><td> <code title="">plus;</code> </td> <td> U+0002B </td> <td> <span class="glyph" title="">+</span> </td> <tr id="entity-plusacir"><td> <code title="">plusacir;</code> </td> <td> U+02A23 </td> <td> <span class="glyph" title="">&#10787;</span> </td> <tr id="entity-plusb"><td> <code title="">plusb;</code> </td> <td> U+0229E </td> <td> <span class="glyph" title="">&#8862;</span> </td> <tr id="entity-pluscir"><td> <code title="">pluscir;</code> </td> <td> U+02A22 </td> <td> <span class="glyph" title="">&#10786;</span> </td> <tr id="entity-plusdo"><td> <code title="">plusdo;</code> </td> <td> U+02214 </td> <td> <span class="glyph" title="">&#8724;</span> </td> <tr id="entity-plusdu"><td> <code title="">plusdu;</code> </td> <td> U+02A25 </td> <td> <span class="glyph" title="">&#10789;</span> </td> <tr id="entity-pluse"><td> <code title="">pluse;</code> </td> <td> U+02A72 </td> <td> <span class="glyph" title="">&#10866;</span> </td> <tr id="entity-plusmn"><td> <code title="">plusmn;</code> </td> <td> U+000B1 </td> <td> <span class="glyph" title="">&plusmn;</span> </td> <tr id="entity-plussim"><td> <code title="">plussim;</code> </td> <td> U+02A26 </td> <td> <span class="glyph" title="">&#10790;</span> </td> <tr id="entity-plustwo"><td> <code title="">plustwo;</code> </td> <td> U+02A27 </td> <td> <span class="glyph" title="">&#10791;</span> </td> <tr id="entity-pm"><td> <code title="">pm;</code> </td> <td> U+000B1 </td> <td> <span class="glyph" title="">&plusmn;</span> </td> <tr id="entity-pointint"><td> <code title="">pointint;</code> </td> <td> U+02A15 </td> <td> <span class="glyph" title="">&#10773;</span> </td> <tr id="entity-popf"><td> <code title="">popf;</code> </td> <td> U+1D561 </td> <td> <span class="glyph" title="">&#120161;</span> </td> <tr id="entity-pound"><td> <code title="">pound;</code> </td> <td> U+000A3 </td> <td> <span class="glyph" title="">&pound;</span> </td> <tr id="entity-pr"><td> <code title="">pr;</code> </td> <td> U+0227A </td> <td> <span class="glyph" title="">&#8826;</span> </td> <tr id="entity-prE"><td> <code title="">prE;</code> </td> <td> U+02AB3 </td> <td> <span class="glyph" title="">&#10931;</span> </td> <tr id="entity-prap"><td> <code title="">prap;</code> </td> <td> U+02AB7 </td> <td> <span class="glyph" title="">&#10935;</span> </td> <tr id="entity-prcue"><td> <code title="">prcue;</code> </td> <td> U+0227C </td> <td> <span class="glyph" title="">&#8828;</span> </td> <tr id="entity-pre"><td> <code title="">pre;</code> </td> <td> U+02AAF </td> <td> <span class="glyph" title="">&#10927;</span> </td> <tr id="entity-prec"><td> <code title="">prec;</code> </td> <td> U+0227A </td> <td> <span class="glyph" title="">&#8826;</span> </td> <tr id="entity-precapprox"><td> <code title="">precapprox;</code> </td> <td> U+02AB7 </td> <td> <span class="glyph" title="">&#10935;</span> </td> <tr id="entity-preccurlyeq"><td> <code title="">preccurlyeq;</code> </td> <td> U+0227C </td> <td> <span class="glyph" title="">&#8828;</span> </td> <tr id="entity-preceq"><td> <code title="">preceq;</code> </td> <td> U+02AAF </td> <td> <span class="glyph" title="">&#10927;</span> </td> <tr id="entity-precnapprox"><td> <code title="">precnapprox;</code> </td> <td> U+02AB9 </td> <td> <span class="glyph" title="">&#10937;</span> </td> <tr id="entity-precneqq"><td> <code title="">precneqq;</code> </td> <td> U+02AB5 </td> <td> <span class="glyph" title="">&#10933;</span> </td> <tr id="entity-precnsim"><td> <code title="">precnsim;</code> </td> <td> U+022E8 </td> <td> <span class="glyph" title="">&#8936;</span> </td> <tr id="entity-precsim"><td> <code title="">precsim;</code> </td> <td> U+0227E </td> <td> <span class="glyph" title="">&#8830;</span> </td> <tr id="entity-prime"><td> <code title="">prime;</code> </td> <td> U+02032 </td> <td> <span class="glyph" title="">&prime;</span> </td> <tr id="entity-primes"><td> <code title="">primes;</code> </td> <td> U+02119 </td> <td> <span class="glyph" title="">&#8473;</span> </td> <tr id="entity-prnE"><td> <code title="">prnE;</code> </td> <td> U+02AB5 </td> <td> <span class="glyph" title="">&#10933;</span> </td> <tr id="entity-prnap"><td> <code title="">prnap;</code> </td> <td> U+02AB9 </td> <td> <span class="glyph" title="">&#10937;</span> </td> <tr id="entity-prnsim"><td> <code title="">prnsim;</code> </td> <td> U+022E8 </td> <td> <span class="glyph" title="">&#8936;</span> </td> <tr id="entity-prod"><td> <code title="">prod;</code> </td> <td> U+0220F </td> <td> <span class="glyph" title="">&prod;</span> </td> <tr id="entity-profalar"><td> <code title="">profalar;</code> </td> <td> U+0232E </td> <td> <span class="glyph" title="">&#9006;</span> </td> <tr id="entity-profline"><td> <code title="">profline;</code> </td> <td> U+02312 </td> <td> <span class="glyph" title="">&#8978;</span> </td> <tr id="entity-profsurf"><td> <code title="">profsurf;</code> </td> <td> U+02313 </td> <td> <span class="glyph" title="">&#8979;</span> </td> <tr id="entity-prop"><td> <code title="">prop;</code> </td> <td> U+0221D </td> <td> <span class="glyph" title="">&prop;</span> </td> <tr id="entity-propto"><td> <code title="">propto;</code> </td> <td> U+0221D </td> <td> <span class="glyph" title="">&prop;</span> </td> <tr id="entity-prsim"><td> <code title="">prsim;</code> </td> <td> U+0227E </td> <td> <span class="glyph" title="">&#8830;</span> </td> <tr id="entity-prurel"><td> <code title="">prurel;</code> </td> <td> U+022B0 </td> <td> <span class="glyph" title="">&#8880;</span> </td> <tr id="entity-pscr"><td> <code title="">pscr;</code> </td> <td> U+1D4C5 </td> <td> <span class="glyph" title="">&#120005;</span> </td> <tr id="entity-psi"><td> <code title="">psi;</code> </td> <td> U+003C8 </td> <td> <span class="glyph" title="">&psi;</span> </td> <tr id="entity-puncsp"><td> <code title="">puncsp;</code> </td> <td> U+02008 </td> <td> <span class="glyph" title="">&#8200;</span> </td> <tr id="entity-qfr"><td> <code title="">qfr;</code> </td> <td> U+1D52E </td> <td> <span class="glyph" title="">&#120110;</span> </td> <tr id="entity-qint"><td> <code title="">qint;</code> </td> <td> U+02A0C </td> <td> <span class="glyph" title="">&#10764;</span> </td> <tr id="entity-qopf"><td> <code title="">qopf;</code> </td> <td> U+1D562 </td> <td> <span class="glyph" title="">&#120162;</span> </td> <tr id="entity-qprime"><td> <code title="">qprime;</code> </td> <td> U+02057 </td> <td> <span class="glyph" title="">&#8279;</span> </td> <tr id="entity-qscr"><td> <code title="">qscr;</code> </td> <td> U+1D4C6 </td> <td> <span class="glyph" title="">&#120006;</span> </td> <tr id="entity-quaternions"><td> <code title="">quaternions;</code> </td> <td> U+0210D </td> <td> <span class="glyph" title="">&#8461;</span> </td> <tr id="entity-quatint"><td> <code title="">quatint;</code> </td> <td> U+02A16 </td> <td> <span class="glyph" title="">&#10774;</span> </td> <tr id="entity-quest"><td> <code title="">quest;</code> </td> <td> U+0003F </td> <td> <span class="glyph" title="">?</span> </td> <tr id="entity-questeq"><td> <code title="">questeq;</code> </td> <td> U+0225F </td> <td> <span class="glyph" title="">&#8799;</span> </td> <tr id="entity-quot"><td> <code title="">quot;</code> </td> <td> U+00022 </td> <td> <span class="glyph" title="">"</span> </td> <tr id="entity-rAarr"><td> <code title="">rAarr;</code> </td> <td> U+021DB </td> <td> <span class="glyph" title="">&#8667;</span> </td> <tr id="entity-rArr"><td> <code title="">rArr;</code> </td> <td> U+021D2 </td> <td> <span class="glyph" title="">&rArr;</span> </td> <tr id="entity-rAtail"><td> <code title="">rAtail;</code> </td> <td> U+0291C </td> <td> <span class="glyph" title="">&#10524;</span> </td> <tr id="entity-rBarr"><td> <code title="">rBarr;</code> </td> <td> U+0290F </td> <td> <span class="glyph" title="">&#10511;</span> </td> <tr id="entity-rHar"><td> <code title="">rHar;</code> </td> <td> U+02964 </td> <td> <span class="glyph" title="">&#10596;</span> </td> <tr id="entity-race"><td> <code title="">race;</code> </td> <td> U+0223D U+00331 </td> <td> <span class="glyph compound" title="">&#8765;&#817;</span> </td> <tr id="entity-racute"><td> <code title="">racute;</code> </td> <td> U+00155 </td> <td> <span class="glyph" title="">&#341;</span> </td> <tr id="entity-radic"><td> <code title="">radic;</code> </td> <td> U+0221A </td> <td> <span class="glyph" title="">&radic;</span> </td> <tr id="entity-raemptyv"><td> <code title="">raemptyv;</code> </td> <td> U+029B3 </td> <td> <span class="glyph" title="">&#10675;</span> </td> <tr id="entity-rang"><td> <code title="">rang;</code> </td> <td> U+027E9 </td> <td> <span class="glyph" title="">&#9002;</span> </td> <tr id="entity-rangd"><td> <code title="">rangd;</code> </td> <td> U+02992 </td> <td> <span class="glyph" title="">&#10642;</span> </td> <tr id="entity-range"><td> <code title="">range;</code> </td> <td> U+029A5 </td> <td> <span class="glyph" title="">&#10661;</span> </td> <tr id="entity-rangle"><td> <code title="">rangle;</code> </td> <td> U+027E9 </td> <td> <span class="glyph" title="">&#9002;</span> </td> <tr id="entity-raquo"><td> <code title="">raquo;</code> </td> <td> U+000BB </td> <td> <span class="glyph" title="">&raquo;</span> </td> <tr id="entity-rarr"><td> <code title="">rarr;</code> </td> <td> U+02192 </td> <td> <span class="glyph" title="">&rarr;</span> </td> <tr id="entity-rarrap"><td> <code title="">rarrap;</code> </td> <td> U+02975 </td> <td> <span class="glyph" title="">&#10613;</span> </td> <tr id="entity-rarrb"><td> <code title="">rarrb;</code> </td> <td> U+021E5 </td> <td> <span class="glyph" title="">&#8677;</span> </td> <tr id="entity-rarrbfs"><td> <code title="">rarrbfs;</code> </td> <td> U+02920 </td> <td> <span class="glyph" title="">&#10528;</span> </td> <tr id="entity-rarrc"><td> <code title="">rarrc;</code> </td> <td> U+02933 </td> <td> <span class="glyph" title="">&#10547;</span> </td> <tr id="entity-rarrfs"><td> <code title="">rarrfs;</code> </td> <td> U+0291E </td> <td> <span class="glyph" title="">&#10526;</span> </td> <tr id="entity-rarrhk"><td> <code title="">rarrhk;</code> </td> <td> U+021AA </td> <td> <span class="glyph" title="">&#8618;</span> </td> <tr id="entity-rarrlp"><td> <code title="">rarrlp;</code> </td> <td> U+021AC </td> <td> <span class="glyph" title="">&#8620;</span> </td> <tr id="entity-rarrpl"><td> <code title="">rarrpl;</code> </td> <td> U+02945 </td> <td> <span class="glyph" title="">&#10565;</span> </td> <tr id="entity-rarrsim"><td> <code title="">rarrsim;</code> </td> <td> U+02974 </td> <td> <span class="glyph" title="">&#10612;</span> </td> <tr id="entity-rarrtl"><td> <code title="">rarrtl;</code> </td> <td> U+021A3 </td> <td> <span class="glyph" title="">&#8611;</span> </td> <tr id="entity-rarrw"><td> <code title="">rarrw;</code> </td> <td> U+0219D </td> <td> <span class="glyph" title="">&#8605;</span> </td> <tr id="entity-ratail"><td> <code title="">ratail;</code> </td> <td> U+0291A </td> <td> <span class="glyph" title="">&#10522;</span> </td> <tr id="entity-ratio"><td> <code title="">ratio;</code> </td> <td> U+02236 </td> <td> <span class="glyph" title="">&#8758;</span> </td> <tr id="entity-rationals"><td> <code title="">rationals;</code> </td> <td> U+0211A </td> <td> <span class="glyph" title="">&#8474;</span> </td> <tr id="entity-rbarr"><td> <code title="">rbarr;</code> </td> <td> U+0290D </td> <td> <span class="glyph" title="">&#10509;</span> </td> <tr id="entity-rbbrk"><td> <code title="">rbbrk;</code> </td> <td> U+02773 </td> <td> <span class="glyph" title="">&#10099;</span> </td> <tr id="entity-rbrace"><td> <code title="">rbrace;</code> </td> <td> U+0007D </td> <td> <span class="glyph" title="">}</span> </td> <tr id="entity-rbrack"><td> <code title="">rbrack;</code> </td> <td> U+0005D </td> <td> <span class="glyph" title="">]</span> </td> <tr id="entity-rbrke"><td> <code title="">rbrke;</code> </td> <td> U+0298C </td> <td> <span class="glyph" title="">&#10636;</span> </td> <tr id="entity-rbrksld"><td> <code title="">rbrksld;</code> </td> <td> U+0298E </td> <td> <span class="glyph" title="">&#10638;</span> </td> <tr id="entity-rbrkslu"><td> <code title="">rbrkslu;</code> </td> <td> U+02990 </td> <td> <span class="glyph" title="">&#10640;</span> </td> <tr id="entity-rcaron"><td> <code title="">rcaron;</code> </td> <td> U+00159 </td> <td> <span class="glyph" title="">&#345;</span> </td> <tr id="entity-rcedil"><td> <code title="">rcedil;</code> </td> <td> U+00157 </td> <td> <span class="glyph" title="">&#343;</span> </td> <tr id="entity-rceil"><td> <code title="">rceil;</code> </td> <td> U+02309 </td> <td> <span class="glyph" title="">&rceil;</span> </td> <tr id="entity-rcub"><td> <code title="">rcub;</code> </td> <td> U+0007D </td> <td> <span class="glyph" title="">}</span> </td> <tr id="entity-rcy"><td> <code title="">rcy;</code> </td> <td> U+00440 </td> <td> <span class="glyph" title="">&#1088;</span> </td> <tr id="entity-rdca"><td> <code title="">rdca;</code> </td> <td> U+02937 </td> <td> <span class="glyph" title="">&#10551;</span> </td> <tr id="entity-rdldhar"><td> <code title="">rdldhar;</code> </td> <td> U+02969 </td> <td> <span class="glyph" title="">&#10601;</span> </td> <tr id="entity-rdquo"><td> <code title="">rdquo;</code> </td> <td> U+0201D </td> <td> <span class="glyph" title="">&rdquo;</span> </td> <tr id="entity-rdquor"><td> <code title="">rdquor;</code> </td> <td> U+0201D </td> <td> <span class="glyph" title="">&rdquo;</span> </td> <tr id="entity-rdsh"><td> <code title="">rdsh;</code> </td> <td> U+021B3 </td> <td> <span class="glyph" title="">&#8627;</span> </td> <tr id="entity-real"><td> <code title="">real;</code> </td> <td> U+0211C </td> <td> <span class="glyph" title="">&real;</span> </td> <tr id="entity-realine"><td> <code title="">realine;</code> </td> <td> U+0211B </td> <td> <span class="glyph" title="">&#8475;</span> </td> <tr id="entity-realpart"><td> <code title="">realpart;</code> </td> <td> U+0211C </td> <td> <span class="glyph" title="">&real;</span> </td> <tr id="entity-reals"><td> <code title="">reals;</code> </td> <td> U+0211D </td> <td> <span class="glyph" title="">&#8477;</span> </td> <tr id="entity-rect"><td> <code title="">rect;</code> </td> <td> U+025AD </td> <td> <span class="glyph" title="">&#9645;</span> </td> <tr id="entity-reg"><td> <code title="">reg;</code> </td> <td> U+000AE </td> <td> <span class="glyph" title="">&reg;</span> </td> <tr id="entity-rfisht"><td> <code title="">rfisht;</code> </td> <td> U+0297D </td> <td> <span class="glyph" title="">&#10621;</span> </td> <tr id="entity-rfloor"><td> <code title="">rfloor;</code> </td> <td> U+0230B </td> <td> <span class="glyph" title="">&rfloor;</span> </td> <tr id="entity-rfr"><td> <code title="">rfr;</code> </td> <td> U+1D52F </td> <td> <span class="glyph" title="">&#120111;</span> </td> <tr id="entity-rhard"><td> <code title="">rhard;</code> </td> <td> U+021C1 </td> <td> <span class="glyph" title="">&#8641;</span> </td> <tr id="entity-rharu"><td> <code title="">rharu;</code> </td> <td> U+021C0 </td> <td> <span class="glyph" title="">&#8640;</span> </td> <tr id="entity-rharul"><td> <code title="">rharul;</code> </td> <td> U+0296C </td> <td> <span class="glyph" title="">&#10604;</span> </td> <tr id="entity-rho"><td> <code title="">rho;</code> </td> <td> U+003C1 </td> <td> <span class="glyph" title="">&rho;</span> </td> <tr id="entity-rhov"><td> <code title="">rhov;</code> </td> <td> U+003F1 </td> <td> <span class="glyph" title="">&#1009;</span> </td> <tr id="entity-rightarrow"><td> <code title="">rightarrow;</code> </td> <td> U+02192 </td> <td> <span class="glyph" title="">&rarr;</span> </td> <tr id="entity-rightarrowtail"><td> <code title="">rightarrowtail;</code> </td> <td> U+021A3 </td> <td> <span class="glyph" title="">&#8611;</span> </td> <tr id="entity-rightharpoondown"><td> <code title="">rightharpoondown;</code> </td> <td> U+021C1 </td> <td> <span class="glyph" title="">&#8641;</span> </td> <tr id="entity-rightharpoonup"><td> <code title="">rightharpoonup;</code> </td> <td> U+021C0 </td> <td> <span class="glyph" title="">&#8640;</span> </td> <tr id="entity-rightleftarrows"><td> <code title="">rightleftarrows;</code> </td> <td> U+021C4 </td> <td> <span class="glyph" title="">&#8644;</span> </td> <tr id="entity-rightleftharpoons"><td> <code title="">rightleftharpoons;</code> </td> <td> U+021CC </td> <td> <span class="glyph" title="">&#8652;</span> </td> <tr id="entity-rightrightarrows"><td> <code title="">rightrightarrows;</code> </td> <td> U+021C9 </td> <td> <span class="glyph" title="">&#8649;</span> </td> <tr id="entity-rightsquigarrow"><td> <code title="">rightsquigarrow;</code> </td> <td> U+0219D </td> <td> <span class="glyph" title="">&#8605;</span> </td> <tr id="entity-rightthreetimes"><td> <code title="">rightthreetimes;</code> </td> <td> U+022CC </td> <td> <span class="glyph" title="">&#8908;</span> </td> <tr id="entity-ring"><td> <code title="">ring;</code> </td> <td> U+002DA </td> <td> <span class="glyph" title="">&#730;</span> </td> <tr id="entity-risingdotseq"><td> <code title="">risingdotseq;</code> </td> <td> U+02253 </td> <td> <span class="glyph" title="">&#8787;</span> </td> <tr id="entity-rlarr"><td> <code title="">rlarr;</code> </td> <td> U+021C4 </td> <td> <span class="glyph" title="">&#8644;</span> </td> <tr id="entity-rlhar"><td> <code title="">rlhar;</code> </td> <td> U+021CC </td> <td> <span class="glyph" title="">&#8652;</span> </td> <tr id="entity-rlm"><td> <code title="">rlm;</code> </td> <td> U+0200F </td> <td> <span class="glyph" title="">&rlm;</span> </td> <tr id="entity-rmoust"><td> <code title="">rmoust;</code> </td> <td> U+023B1 </td> <td> <span class="glyph" title="">&#9137;</span> </td> <tr id="entity-rmoustache"><td> <code title="">rmoustache;</code> </td> <td> U+023B1 </td> <td> <span class="glyph" title="">&#9137;</span> </td> <tr id="entity-rnmid"><td> <code title="">rnmid;</code> </td> <td> U+02AEE </td> <td> <span class="glyph" title="">&#10990;</span> </td> <tr id="entity-roang"><td> <code title="">roang;</code> </td> <td> U+027ED </td> <td> <span class="glyph" title="">&#10221;</span> </td> <tr id="entity-roarr"><td> <code title="">roarr;</code> </td> <td> U+021FE </td> <td> <span class="glyph" title="">&#8702;</span> </td> <tr id="entity-robrk"><td> <code title="">robrk;</code> </td> <td> U+027E7 </td> <td> <span class="glyph" title="">&#10215;</span> </td> <tr id="entity-ropar"><td> <code title="">ropar;</code> </td> <td> U+02986 </td> <td> <span class="glyph" title="">&#10630;</span> </td> <tr id="entity-ropf"><td> <code title="">ropf;</code> </td> <td> U+1D563 </td> <td> <span class="glyph" title="">&#120163;</span> </td> <tr id="entity-roplus"><td> <code title="">roplus;</code> </td> <td> U+02A2E </td> <td> <span class="glyph" title="">&#10798;</span> </td> <tr id="entity-rotimes"><td> <code title="">rotimes;</code> </td> <td> U+02A35 </td> <td> <span class="glyph" title="">&#10805;</span> </td> <tr id="entity-rpar"><td> <code title="">rpar;</code> </td> <td> U+00029 </td> <td> <span class="glyph" title="">)</span> </td> <tr id="entity-rpargt"><td> <code title="">rpargt;</code> </td> <td> U+02994 </td> <td> <span class="glyph" title="">&#10644;</span> </td> <tr id="entity-rppolint"><td> <code title="">rppolint;</code> </td> <td> U+02A12 </td> <td> <span class="glyph" title="">&#10770;</span> </td> <tr id="entity-rrarr"><td> <code title="">rrarr;</code> </td> <td> U+021C9 </td> <td> <span class="glyph" title="">&#8649;</span> </td> <tr id="entity-rsaquo"><td> <code title="">rsaquo;</code> </td> <td> U+0203A </td> <td> <span class="glyph" title="">&rsaquo;</span> </td> <tr id="entity-rscr"><td> <code title="">rscr;</code> </td> <td> U+1D4C7 </td> <td> <span class="glyph" title="">&#120007;</span> </td> <tr id="entity-rsh"><td> <code title="">rsh;</code> </td> <td> U+021B1 </td> <td> <span class="glyph" title="">&#8625;</span> </td> <tr id="entity-rsqb"><td> <code title="">rsqb;</code> </td> <td> U+0005D </td> <td> <span class="glyph" title="">]</span> </td> <tr id="entity-rsquo"><td> <code title="">rsquo;</code> </td> <td> U+02019 </td> <td> <span class="glyph" title="">&rsquo;</span> </td> <tr id="entity-rsquor"><td> <code title="">rsquor;</code> </td> <td> U+02019 </td> <td> <span class="glyph" title="">&rsquo;</span> </td> <tr id="entity-rthree"><td> <code title="">rthree;</code> </td> <td> U+022CC </td> <td> <span class="glyph" title="">&#8908;</span> </td> <tr id="entity-rtimes"><td> <code title="">rtimes;</code> </td> <td> U+022CA </td> <td> <span class="glyph" title="">&#8906;</span> </td> <tr id="entity-rtri"><td> <code title="">rtri;</code> </td> <td> U+025B9 </td> <td> <span class="glyph" title="">&#9657;</span> </td> <tr id="entity-rtrie"><td> <code title="">rtrie;</code> </td> <td> U+022B5 </td> <td> <span class="glyph" title="">&#8885;</span> </td> <tr id="entity-rtrif"><td> <code title="">rtrif;</code> </td> <td> U+025B8 </td> <td> <span class="glyph" title="">&#9656;</span> </td> <tr id="entity-rtriltri"><td> <code title="">rtriltri;</code> </td> <td> U+029CE </td> <td> <span class="glyph" title="">&#10702;</span> </td> <tr id="entity-ruluhar"><td> <code title="">ruluhar;</code> </td> <td> U+02968 </td> <td> <span class="glyph" title="">&#10600;</span> </td> <tr id="entity-rx"><td> <code title="">rx;</code> </td> <td> U+0211E </td> <td> <span class="glyph" title="">&#8478;</span> </td> <tr id="entity-sacute"><td> <code title="">sacute;</code> </td> <td> U+0015B </td> <td> <span class="glyph" title="">&#347;</span> </td> <tr id="entity-sbquo"><td> <code title="">sbquo;</code> </td> <td> U+0201A </td> <td> <span class="glyph" title="">&sbquo;</span> </td> <tr id="entity-sc"><td> <code title="">sc;</code> </td> <td> U+0227B </td> <td> <span class="glyph" title="">&#8827;</span> </td> <tr id="entity-scE"><td> <code title="">scE;</code> </td> <td> U+02AB4 </td> <td> <span class="glyph" title="">&#10932;</span> </td> <tr id="entity-scap"><td> <code title="">scap;</code> </td> <td> U+02AB8 </td> <td> <span class="glyph" title="">&#10936;</span> </td> <tr id="entity-scaron"><td> <code title="">scaron;</code> </td> <td> U+00161 </td> <td> <span class="glyph" title="">&scaron;</span> </td> <tr id="entity-sccue"><td> <code title="">sccue;</code> </td> <td> U+0227D </td> <td> <span class="glyph" title="">&#8829;</span> </td> <tr id="entity-sce"><td> <code title="">sce;</code> </td> <td> U+02AB0 </td> <td> <span class="glyph" title="">&#10928;</span> </td> <tr id="entity-scedil"><td> <code title="">scedil;</code> </td> <td> U+0015F </td> <td> <span class="glyph" title="">&#351;</span> </td> <tr id="entity-scirc"><td> <code title="">scirc;</code> </td> <td> U+0015D </td> <td> <span class="glyph" title="">&#349;</span> </td> <tr id="entity-scnE"><td> <code title="">scnE;</code> </td> <td> U+02AB6 </td> <td> <span class="glyph" title="">&#10934;</span> </td> <tr id="entity-scnap"><td> <code title="">scnap;</code> </td> <td> U+02ABA </td> <td> <span class="glyph" title="">&#10938;</span> </td> <tr id="entity-scnsim"><td> <code title="">scnsim;</code> </td> <td> U+022E9 </td> <td> <span class="glyph" title="">&#8937;</span> </td> <tr id="entity-scpolint"><td> <code title="">scpolint;</code> </td> <td> U+02A13 </td> <td> <span class="glyph" title="">&#10771;</span> </td> <tr id="entity-scsim"><td> <code title="">scsim;</code> </td> <td> U+0227F </td> <td> <span class="glyph" title="">&#8831;</span> </td> <tr id="entity-scy"><td> <code title="">scy;</code> </td> <td> U+00441 </td> <td> <span class="glyph" title="">&#1089;</span> </td> <tr id="entity-sdot"><td> <code title="">sdot;</code> </td> <td> U+022C5 </td> <td> <span class="glyph" title="">&sdot;</span> </td> <tr id="entity-sdotb"><td> <code title="">sdotb;</code> </td> <td> U+022A1 </td> <td> <span class="glyph" title="">&#8865;</span> </td> <tr id="entity-sdote"><td> <code title="">sdote;</code> </td> <td> U+02A66 </td> <td> <span class="glyph" title="">&#10854;</span> </td> <tr id="entity-seArr"><td> <code title="">seArr;</code> </td> <td> U+021D8 </td> <td> <span class="glyph" title="">&#8664;</span> </td> <tr id="entity-searhk"><td> <code title="">searhk;</code> </td> <td> U+02925 </td> <td> <span class="glyph" title="">&#10533;</span> </td> <tr id="entity-searr"><td> <code title="">searr;</code> </td> <td> U+02198 </td> <td> <span class="glyph" title="">&#8600;</span> </td> <tr id="entity-searrow"><td> <code title="">searrow;</code> </td> <td> U+02198 </td> <td> <span class="glyph" title="">&#8600;</span> </td> <tr id="entity-sect"><td> <code title="">sect;</code> </td> <td> U+000A7 </td> <td> <span class="glyph" title="">&sect;</span> </td> <tr id="entity-semi"><td> <code title="">semi;</code> </td> <td> U+0003B </td> <td> <span class="glyph" title="">;</span> </td> <tr id="entity-seswar"><td> <code title="">seswar;</code> </td> <td> U+02929 </td> <td> <span class="glyph" title="">&#10537;</span> </td> <tr id="entity-setminus"><td> <code title="">setminus;</code> </td> <td> U+02216 </td> <td> <span class="glyph" title="">&#8726;</span> </td> <tr id="entity-setmn"><td> <code title="">setmn;</code> </td> <td> U+02216 </td> <td> <span class="glyph" title="">&#8726;</span> </td> <tr id="entity-sext"><td> <code title="">sext;</code> </td> <td> U+02736 </td> <td> <span class="glyph" title="">&#10038;</span> </td> <tr id="entity-sfr"><td> <code title="">sfr;</code> </td> <td> U+1D530 </td> <td> <span class="glyph" title="">&#120112;</span> </td> <tr id="entity-sfrown"><td> <code title="">sfrown;</code> </td> <td> U+02322 </td> <td> <span class="glyph" title="">&#8994;</span> </td> <tr id="entity-sharp"><td> <code title="">sharp;</code> </td> <td> U+0266F </td> <td> <span class="glyph" title="">&#9839;</span> </td> <tr id="entity-shchcy"><td> <code title="">shchcy;</code> </td> <td> U+00449 </td> <td> <span class="glyph" title="">&#1097;</span> </td> <tr id="entity-shcy"><td> <code title="">shcy;</code> </td> <td> U+00448 </td> <td> <span class="glyph" title="">&#1096;</span> </td> <tr id="entity-shortmid"><td> <code title="">shortmid;</code> </td> <td> U+02223 </td> <td> <span class="glyph" title="">&#8739;</span> </td> <tr id="entity-shortparallel"><td> <code title="">shortparallel;</code> </td> <td> U+02225 </td> <td> <span class="glyph" title="">&#8741;</span> </td> <tr id="entity-shy"><td> <code title="">shy;</code> </td> <td> U+000AD </td> <td> <span class="glyph" title="">&shy;</span> </td> <tr id="entity-sigma"><td> <code title="">sigma;</code> </td> <td> U+003C3 </td> <td> <span class="glyph" title="">&sigma;</span> </td> <tr id="entity-sigmaf"><td> <code title="">sigmaf;</code> </td> <td> U+003C2 </td> <td> <span class="glyph" title="">&sigmaf;</span> </td> <tr id="entity-sigmav"><td> <code title="">sigmav;</code> </td> <td> U+003C2 </td> <td> <span class="glyph" title="">&sigmaf;</span> </td> <tr id="entity-sim"><td> <code title="">sim;</code> </td> <td> U+0223C </td> <td> <span class="glyph" title="">&sim;</span> </td> <tr id="entity-simdot"><td> <code title="">simdot;</code> </td> <td> U+02A6A </td> <td> <span class="glyph" title="">&#10858;</span> </td> <tr id="entity-sime"><td> <code title="">sime;</code> </td> <td> U+02243 </td> <td> <span class="glyph" title="">&#8771;</span> </td> <tr id="entity-simeq"><td> <code title="">simeq;</code> </td> <td> U+02243 </td> <td> <span class="glyph" title="">&#8771;</span> </td> <tr id="entity-simg"><td> <code title="">simg;</code> </td> <td> U+02A9E </td> <td> <span class="glyph" title="">&#10910;</span> </td> <tr id="entity-simgE"><td> <code title="">simgE;</code> </td> <td> U+02AA0 </td> <td> <span class="glyph" title="">&#10912;</span> </td> <tr id="entity-siml"><td> <code title="">siml;</code> </td> <td> U+02A9D </td> <td> <span class="glyph" title="">&#10909;</span> </td> <tr id="entity-simlE"><td> <code title="">simlE;</code> </td> <td> U+02A9F </td> <td> <span class="glyph" title="">&#10911;</span> </td> <tr id="entity-simne"><td> <code title="">simne;</code> </td> <td> U+02246 </td> <td> <span class="glyph" title="">&#8774;</span> </td> <tr id="entity-simplus"><td> <code title="">simplus;</code> </td> <td> U+02A24 </td> <td> <span class="glyph" title="">&#10788;</span> </td> <tr id="entity-simrarr"><td> <code title="">simrarr;</code> </td> <td> U+02972 </td> <td> <span class="glyph" title="">&#10610;</span> </td> <tr id="entity-slarr"><td> <code title="">slarr;</code> </td> <td> U+02190 </td> <td> <span class="glyph" title="">&larr;</span> </td> <tr id="entity-smallsetminus"><td> <code title="">smallsetminus;</code> </td> <td> U+02216 </td> <td> <span class="glyph" title="">&#8726;</span> </td> <tr id="entity-smashp"><td> <code title="">smashp;</code> </td> <td> U+02A33 </td> <td> <span class="glyph" title="">&#10803;</span> </td> <tr id="entity-smeparsl"><td> <code title="">smeparsl;</code> </td> <td> U+029E4 </td> <td> <span class="glyph" title="">&#10724;</span> </td> <tr id="entity-smid"><td> <code title="">smid;</code> </td> <td> U+02223 </td> <td> <span class="glyph" title="">&#8739;</span> </td> <tr id="entity-smile"><td> <code title="">smile;</code> </td> <td> U+02323 </td> <td> <span class="glyph" title="">&#8995;</span> </td> <tr id="entity-smt"><td> <code title="">smt;</code> </td> <td> U+02AAA </td> <td> <span class="glyph" title="">&#10922;</span> </td> <tr id="entity-smte"><td> <code title="">smte;</code> </td> <td> U+02AAC </td> <td> <span class="glyph" title="">&#10924;</span> </td> <tr id="entity-smtes"><td> <code title="">smtes;</code> </td> <td> U+02AAC U+0FE00 </td> <td> <span class="glyph compound" title="">&#10924;&#65024;</span> </td> <tr id="entity-softcy"><td> <code title="">softcy;</code> </td> <td> U+0044C </td> <td> <span class="glyph" title="">&#1100;</span> </td> <tr id="entity-sol"><td> <code title="">sol;</code> </td> <td> U+0002F </td> <td> <span class="glyph" title="">/</span> </td> <tr id="entity-solb"><td> <code title="">solb;</code> </td> <td> U+029C4 </td> <td> <span class="glyph" title="">&#10692;</span> </td> <tr id="entity-solbar"><td> <code title="">solbar;</code> </td> <td> U+0233F </td> <td> <span class="glyph" title="">&#9023;</span> </td> <tr id="entity-sopf"><td> <code title="">sopf;</code> </td> <td> U+1D564 </td> <td> <span class="glyph" title="">&#120164;</span> </td> <tr id="entity-spades"><td> <code title="">spades;</code> </td> <td> U+02660 </td> <td> <span class="glyph" title="">&spades;</span> </td> <tr id="entity-spadesuit"><td> <code title="">spadesuit;</code> </td> <td> U+02660 </td> <td> <span class="glyph" title="">&spades;</span> </td> <tr id="entity-spar"><td> <code title="">spar;</code> </td> <td> U+02225 </td> <td> <span class="glyph" title="">&#8741;</span> </td> <tr id="entity-sqcap"><td> <code title="">sqcap;</code> </td> <td> U+02293 </td> <td> <span class="glyph" title="">&#8851;</span> </td> <tr id="entity-sqcaps"><td> <code title="">sqcaps;</code> </td> <td> U+02293 U+0FE00 </td> <td> <span class="glyph compound" title="">&#8851;&#65024;</span> </td> <tr id="entity-sqcup"><td> <code title="">sqcup;</code> </td> <td> U+02294 </td> <td> <span class="glyph" title="">&#8852;</span> </td> <tr id="entity-sqcups"><td> <code title="">sqcups;</code> </td> <td> U+02294 U+0FE00 </td> <td> <span class="glyph compound" title="">&#8852;&#65024;</span> </td> <tr id="entity-sqsub"><td> <code title="">sqsub;</code> </td> <td> U+0228F </td> <td> <span class="glyph" title="">&#8847;</span> </td> <tr id="entity-sqsube"><td> <code title="">sqsube;</code> </td> <td> U+02291 </td> <td> <span class="glyph" title="">&#8849;</span> </td> <tr id="entity-sqsubset"><td> <code title="">sqsubset;</code> </td> <td> U+0228F </td> <td> <span class="glyph" title="">&#8847;</span> </td> <tr id="entity-sqsubseteq"><td> <code title="">sqsubseteq;</code> </td> <td> U+02291 </td> <td> <span class="glyph" title="">&#8849;</span> </td> <tr id="entity-sqsup"><td> <code title="">sqsup;</code> </td> <td> U+02290 </td> <td> <span class="glyph" title="">&#8848;</span> </td> <tr id="entity-sqsupe"><td> <code title="">sqsupe;</code> </td> <td> U+02292 </td> <td> <span class="glyph" title="">&#8850;</span> </td> <tr id="entity-sqsupset"><td> <code title="">sqsupset;</code> </td> <td> U+02290 </td> <td> <span class="glyph" title="">&#8848;</span> </td> <tr id="entity-sqsupseteq"><td> <code title="">sqsupseteq;</code> </td> <td> U+02292 </td> <td> <span class="glyph" title="">&#8850;</span> </td> <tr id="entity-squ"><td> <code title="">squ;</code> </td> <td> U+025A1 </td> <td> <span class="glyph" title="">&#9633;</span> </td> <tr id="entity-square"><td> <code title="">square;</code> </td> <td> U+025A1 </td> <td> <span class="glyph" title="">&#9633;</span> </td> <tr id="entity-squarf"><td> <code title="">squarf;</code> </td> <td> U+025AA </td> <td> <span class="glyph" title="">&#9642;</span> </td> <tr id="entity-squf"><td> <code title="">squf;</code> </td> <td> U+025AA </td> <td> <span class="glyph" title="">&#9642;</span> </td> <tr id="entity-srarr"><td> <code title="">srarr;</code> </td> <td> U+02192 </td> <td> <span class="glyph" title="">&rarr;</span> </td> <tr id="entity-sscr"><td> <code title="">sscr;</code> </td> <td> U+1D4C8 </td> <td> <span class="glyph" title="">&#120008;</span> </td> <tr id="entity-ssetmn"><td> <code title="">ssetmn;</code> </td> <td> U+02216 </td> <td> <span class="glyph" title="">&#8726;</span> </td> <tr id="entity-ssmile"><td> <code title="">ssmile;</code> </td> <td> U+02323 </td> <td> <span class="glyph" title="">&#8995;</span> </td> <tr id="entity-sstarf"><td> <code title="">sstarf;</code> </td> <td> U+022C6 </td> <td> <span class="glyph" title="">&#8902;</span> </td> <tr id="entity-star"><td> <code title="">star;</code> </td> <td> U+02606 </td> <td> <span class="glyph" title="">&#9734;</span> </td> <tr id="entity-starf"><td> <code title="">starf;</code> </td> <td> U+02605 </td> <td> <span class="glyph" title="">&#9733;</span> </td> <tr id="entity-straightepsilon"><td> <code title="">straightepsilon;</code> </td> <td> U+003F5 </td> <td> <span class="glyph" title="">&#1013;</span> </td> <tr id="entity-straightphi"><td> <code title="">straightphi;</code> </td> <td> U+003D5 </td> <td> <span class="glyph" title="">&#981;</span> </td> <tr id="entity-strns"><td> <code title="">strns;</code> </td> <td> U+000AF </td> <td> <span class="glyph" title="">&macr;</span> </td> <tr id="entity-sub"><td> <code title="">sub;</code> </td> <td> U+02282 </td> <td> <span class="glyph" title="">&sub;</span> </td> <tr id="entity-subE"><td> <code title="">subE;</code> </td> <td> U+02AC5 </td> <td> <span class="glyph" title="">&#10949;</span> </td> <tr id="entity-subdot"><td> <code title="">subdot;</code> </td> <td> U+02ABD </td> <td> <span class="glyph" title="">&#10941;</span> </td> <tr id="entity-sube"><td> <code title="">sube;</code> </td> <td> U+02286 </td> <td> <span class="glyph" title="">&sube;</span> </td> <tr id="entity-subedot"><td> <code title="">subedot;</code> </td> <td> U+02AC3 </td> <td> <span class="glyph" title="">&#10947;</span> </td> <tr id="entity-submult"><td> <code title="">submult;</code> </td> <td> U+02AC1 </td> <td> <span class="glyph" title="">&#10945;</span> </td> <tr id="entity-subnE"><td> <code title="">subnE;</code> </td> <td> U+02ACB </td> <td> <span class="glyph" title="">&#10955;</span> </td> <tr id="entity-subne"><td> <code title="">subne;</code> </td> <td> U+0228A </td> <td> <span class="glyph" title="">&#8842;</span> </td> <tr id="entity-subplus"><td> <code title="">subplus;</code> </td> <td> U+02ABF </td> <td> <span class="glyph" title="">&#10943;</span> </td> <tr id="entity-subrarr"><td> <code title="">subrarr;</code> </td> <td> U+02979 </td> <td> <span class="glyph" title="">&#10617;</span> </td> <tr id="entity-subset"><td> <code title="">subset;</code> </td> <td> U+02282 </td> <td> <span class="glyph" title="">&sub;</span> </td> <tr id="entity-subseteq"><td> <code title="">subseteq;</code> </td> <td> U+02286 </td> <td> <span class="glyph" title="">&sube;</span> </td> <tr id="entity-subseteqq"><td> <code title="">subseteqq;</code> </td> <td> U+02AC5 </td> <td> <span class="glyph" title="">&#10949;</span> </td> <tr id="entity-subsetneq"><td> <code title="">subsetneq;</code> </td> <td> U+0228A </td> <td> <span class="glyph" title="">&#8842;</span> </td> <tr id="entity-subsetneqq"><td> <code title="">subsetneqq;</code> </td> <td> U+02ACB </td> <td> <span class="glyph" title="">&#10955;</span> </td> <tr id="entity-subsim"><td> <code title="">subsim;</code> </td> <td> U+02AC7 </td> <td> <span class="glyph" title="">&#10951;</span> </td> <tr id="entity-subsub"><td> <code title="">subsub;</code> </td> <td> U+02AD5 </td> <td> <span class="glyph" title="">&#10965;</span> </td> <tr id="entity-subsup"><td> <code title="">subsup;</code> </td> <td> U+02AD3 </td> <td> <span class="glyph" title="">&#10963;</span> </td> <tr id="entity-succ"><td> <code title="">succ;</code> </td> <td> U+0227B </td> <td> <span class="glyph" title="">&#8827;</span> </td> <tr id="entity-succapprox"><td> <code title="">succapprox;</code> </td> <td> U+02AB8 </td> <td> <span class="glyph" title="">&#10936;</span> </td> <tr id="entity-succcurlyeq"><td> <code title="">succcurlyeq;</code> </td> <td> U+0227D </td> <td> <span class="glyph" title="">&#8829;</span> </td> <tr id="entity-succeq"><td> <code title="">succeq;</code> </td> <td> U+02AB0 </td> <td> <span class="glyph" title="">&#10928;</span> </td> <tr id="entity-succnapprox"><td> <code title="">succnapprox;</code> </td> <td> U+02ABA </td> <td> <span class="glyph" title="">&#10938;</span> </td> <tr id="entity-succneqq"><td> <code title="">succneqq;</code> </td> <td> U+02AB6 </td> <td> <span class="glyph" title="">&#10934;</span> </td> <tr id="entity-succnsim"><td> <code title="">succnsim;</code> </td> <td> U+022E9 </td> <td> <span class="glyph" title="">&#8937;</span> </td> <tr id="entity-succsim"><td> <code title="">succsim;</code> </td> <td> U+0227F </td> <td> <span class="glyph" title="">&#8831;</span> </td> <tr id="entity-sum"><td> <code title="">sum;</code> </td> <td> U+02211 </td> <td> <span class="glyph" title="">&sum;</span> </td> <tr id="entity-sung"><td> <code title="">sung;</code> </td> <td> U+0266A </td> <td> <span class="glyph" title="">&#9834;</span> </td> <tr id="entity-sup"><td> <code title="">sup;</code> </td> <td> U+02283 </td> <td> <span class="glyph" title="">&sup;</span> </td> <tr id="entity-sup1"><td> <code title="">sup1;</code> </td> <td> U+000B9 </td> <td> <span class="glyph" title="">&sup1;</span> </td> <tr id="entity-sup2"><td> <code title="">sup2;</code> </td> <td> U+000B2 </td> <td> <span class="glyph" title="">&sup2;</span> </td> <tr id="entity-sup3"><td> <code title="">sup3;</code> </td> <td> U+000B3 </td> <td> <span class="glyph" title="">&sup3;</span> </td> <tr id="entity-supE"><td> <code title="">supE;</code> </td> <td> U+02AC6 </td> <td> <span class="glyph" title="">&#10950;</span> </td> <tr id="entity-supdot"><td> <code title="">supdot;</code> </td> <td> U+02ABE </td> <td> <span class="glyph" title="">&#10942;</span> </td> <tr id="entity-supdsub"><td> <code title="">supdsub;</code> </td> <td> U+02AD8 </td> <td> <span class="glyph" title="">&#10968;</span> </td> <tr id="entity-supe"><td> <code title="">supe;</code> </td> <td> U+02287 </td> <td> <span class="glyph" title="">&supe;</span> </td> <tr id="entity-supedot"><td> <code title="">supedot;</code> </td> <td> U+02AC4 </td> <td> <span class="glyph" title="">&#10948;</span> </td> <tr id="entity-suphsol"><td> <code title="">suphsol;</code> </td> <td> U+027C9 </td> <td> <span class="glyph" title="">&#10185;</span> </td> <tr id="entity-suphsub"><td> <code title="">suphsub;</code> </td> <td> U+02AD7 </td> <td> <span class="glyph" title="">&#10967;</span> </td> <tr id="entity-suplarr"><td> <code title="">suplarr;</code> </td> <td> U+0297B </td> <td> <span class="glyph" title="">&#10619;</span> </td> <tr id="entity-supmult"><td> <code title="">supmult;</code> </td> <td> U+02AC2 </td> <td> <span class="glyph" title="">&#10946;</span> </td> <tr id="entity-supnE"><td> <code title="">supnE;</code> </td> <td> U+02ACC </td> <td> <span class="glyph" title="">&#10956;</span> </td> <tr id="entity-supne"><td> <code title="">supne;</code> </td> <td> U+0228B </td> <td> <span class="glyph" title="">&#8843;</span> </td> <tr id="entity-supplus"><td> <code title="">supplus;</code> </td> <td> U+02AC0 </td> <td> <span class="glyph" title="">&#10944;</span> </td> <tr id="entity-supset"><td> <code title="">supset;</code> </td> <td> U+02283 </td> <td> <span class="glyph" title="">&sup;</span> </td> <tr id="entity-supseteq"><td> <code title="">supseteq;</code> </td> <td> U+02287 </td> <td> <span class="glyph" title="">&supe;</span> </td> <tr id="entity-supseteqq"><td> <code title="">supseteqq;</code> </td> <td> U+02AC6 </td> <td> <span class="glyph" title="">&#10950;</span> </td> <tr id="entity-supsetneq"><td> <code title="">supsetneq;</code> </td> <td> U+0228B </td> <td> <span class="glyph" title="">&#8843;</span> </td> <tr id="entity-supsetneqq"><td> <code title="">supsetneqq;</code> </td> <td> U+02ACC </td> <td> <span class="glyph" title="">&#10956;</span> </td> <tr id="entity-supsim"><td> <code title="">supsim;</code> </td> <td> U+02AC8 </td> <td> <span class="glyph" title="">&#10952;</span> </td> <tr id="entity-supsub"><td> <code title="">supsub;</code> </td> <td> U+02AD4 </td> <td> <span class="glyph" title="">&#10964;</span> </td> <tr id="entity-supsup"><td> <code title="">supsup;</code> </td> <td> U+02AD6 </td> <td> <span class="glyph" title="">&#10966;</span> </td> <tr id="entity-swArr"><td> <code title="">swArr;</code> </td> <td> U+021D9 </td> <td> <span class="glyph" title="">&#8665;</span> </td> <tr id="entity-swarhk"><td> <code title="">swarhk;</code> </td> <td> U+02926 </td> <td> <span class="glyph" title="">&#10534;</span> </td> <tr id="entity-swarr"><td> <code title="">swarr;</code> </td> <td> U+02199 </td> <td> <span class="glyph" title="">&#8601;</span> </td> <tr id="entity-swarrow"><td> <code title="">swarrow;</code> </td> <td> U+02199 </td> <td> <span class="glyph" title="">&#8601;</span> </td> <tr id="entity-swnwar"><td> <code title="">swnwar;</code> </td> <td> U+0292A </td> <td> <span class="glyph" title="">&#10538;</span> </td> <tr id="entity-szlig"><td> <code title="">szlig;</code> </td> <td> U+000DF </td> <td> <span class="glyph" title="">&szlig;</span> </td> <tr id="entity-target"><td> <code title="">target;</code> </td> <td> U+02316 </td> <td> <span class="glyph" title="">&#8982;</span> </td> <tr id="entity-tau"><td> <code title="">tau;</code> </td> <td> U+003C4 </td> <td> <span class="glyph" title="">&tau;</span> </td> <tr id="entity-tbrk"><td> <code title="">tbrk;</code> </td> <td> U+023B4 </td> <td> <span class="glyph" title="">&#9140;</span> </td> <tr id="entity-tcaron"><td> <code title="">tcaron;</code> </td> <td> U+00165 </td> <td> <span class="glyph" title="">&#357;</span> </td> <tr id="entity-tcedil"><td> <code title="">tcedil;</code> </td> <td> U+00163 </td> <td> <span class="glyph" title="">&#355;</span> </td> <tr id="entity-tcy"><td> <code title="">tcy;</code> </td> <td> U+00442 </td> <td> <span class="glyph" title="">&#1090;</span> </td> <tr id="entity-tdot"><td> <code title="">tdot;</code> </td> <td> U+020DB </td> <td> <span class="glyph composition" title="">&#9676;&#8411;</span> </td> <tr id="entity-telrec"><td> <code title="">telrec;</code> </td> <td> U+02315 </td> <td> <span class="glyph" title="">&#8981;</span> </td> <tr id="entity-tfr"><td> <code title="">tfr;</code> </td> <td> U+1D531 </td> <td> <span class="glyph" title="">&#120113;</span> </td> <tr id="entity-there4"><td> <code title="">there4;</code> </td> <td> U+02234 </td> <td> <span class="glyph" title="">&there4;</span> </td> <tr id="entity-therefore"><td> <code title="">therefore;</code> </td> <td> U+02234 </td> <td> <span class="glyph" title="">&there4;</span> </td> <tr id="entity-theta"><td> <code title="">theta;</code> </td> <td> U+003B8 </td> <td> <span class="glyph" title="">&theta;</span> </td> <tr id="entity-thetasym"><td> <code title="">thetasym;</code> </td> <td> U+003D1 </td> <td> <span class="glyph" title="">&thetasym;</span> </td> <tr id="entity-thetav"><td> <code title="">thetav;</code> </td> <td> U+003D1 </td> <td> <span class="glyph" title="">&thetasym;</span> </td> <tr id="entity-thickapprox"><td> <code title="">thickapprox;</code> </td> <td> U+02248 </td> <td> <span class="glyph" title="">&asymp;</span> </td> <tr id="entity-thicksim"><td> <code title="">thicksim;</code> </td> <td> U+0223C </td> <td> <span class="glyph" title="">&sim;</span> </td> <tr id="entity-thinsp"><td> <code title="">thinsp;</code> </td> <td> U+02009 </td> <td> <span class="glyph" title="">&thinsp;</span> </td> <tr id="entity-thkap"><td> <code title="">thkap;</code> </td> <td> U+02248 </td> <td> <span class="glyph" title="">&asymp;</span> </td> <tr id="entity-thksim"><td> <code title="">thksim;</code> </td> <td> U+0223C </td> <td> <span class="glyph" title="">&sim;</span> </td> <tr id="entity-thorn"><td> <code title="">thorn;</code> </td> <td> U+000FE </td> <td> <span class="glyph" title="">&thorn;</span> </td> <tr id="entity-tilde"><td> <code title="">tilde;</code> </td> <td> U+002DC </td> <td> <span class="glyph" title="">&tilde;</span> </td> <tr id="entity-times"><td> <code title="">times;</code> </td> <td> U+000D7 </td> <td> <span class="glyph" title="">&times;</span> </td> <tr id="entity-timesb"><td> <code title="">timesb;</code> </td> <td> U+022A0 </td> <td> <span class="glyph" title="">&#8864;</span> </td> <tr id="entity-timesbar"><td> <code title="">timesbar;</code> </td> <td> U+02A31 </td> <td> <span class="glyph" title="">&#10801;</span> </td> <tr id="entity-timesd"><td> <code title="">timesd;</code> </td> <td> U+02A30 </td> <td> <span class="glyph" title="">&#10800;</span> </td> <tr id="entity-tint"><td> <code title="">tint;</code> </td> <td> U+0222D </td> <td> <span class="glyph" title="">&#8749;</span> </td> <tr id="entity-toea"><td> <code title="">toea;</code> </td> <td> U+02928 </td> <td> <span class="glyph" title="">&#10536;</span> </td> <tr id="entity-top"><td> <code title="">top;</code> </td> <td> U+022A4 </td> <td> <span class="glyph" title="">&#8868;</span> </td> <tr id="entity-topbot"><td> <code title="">topbot;</code> </td> <td> U+02336 </td> <td> <span class="glyph" title="">&#9014;</span> </td> <tr id="entity-topcir"><td> <code title="">topcir;</code> </td> <td> U+02AF1 </td> <td> <span class="glyph" title="">&#10993;</span> </td> <tr id="entity-topf"><td> <code title="">topf;</code> </td> <td> U+1D565 </td> <td> <span class="glyph" title="">&#120165;</span> </td> <tr id="entity-topfork"><td> <code title="">topfork;</code> </td> <td> U+02ADA </td> <td> <span class="glyph" title="">&#10970;</span> </td> <tr id="entity-tosa"><td> <code title="">tosa;</code> </td> <td> U+02929 </td> <td> <span class="glyph" title="">&#10537;</span> </td> <tr id="entity-tprime"><td> <code title="">tprime;</code> </td> <td> U+02034 </td> <td> <span class="glyph" title="">&#8244;</span> </td> <tr id="entity-trade"><td> <code title="">trade;</code> </td> <td> U+02122 </td> <td> <span class="glyph" title="">&trade;</span> </td> <tr id="entity-triangle"><td> <code title="">triangle;</code> </td> <td> U+025B5 </td> <td> <span class="glyph" title="">&#9653;</span> </td> <tr id="entity-triangledown"><td> <code title="">triangledown;</code> </td> <td> U+025BF </td> <td> <span class="glyph" title="">&#9663;</span> </td> <tr id="entity-triangleleft"><td> <code title="">triangleleft;</code> </td> <td> U+025C3 </td> <td> <span class="glyph" title="">&#9667;</span> </td> <tr id="entity-trianglelefteq"><td> <code title="">trianglelefteq;</code> </td> <td> U+022B4 </td> <td> <span class="glyph" title="">&#8884;</span> </td> <tr id="entity-triangleq"><td> <code title="">triangleq;</code> </td> <td> U+0225C </td> <td> <span class="glyph" title="">&#8796;</span> </td> <tr id="entity-triangleright"><td> <code title="">triangleright;</code> </td> <td> U+025B9 </td> <td> <span class="glyph" title="">&#9657;</span> </td> <tr id="entity-trianglerighteq"><td> <code title="">trianglerighteq;</code> </td> <td> U+022B5 </td> <td> <span class="glyph" title="">&#8885;</span> </td> <tr id="entity-tridot"><td> <code title="">tridot;</code> </td> <td> U+025EC </td> <td> <span class="glyph" title="">&#9708;</span> </td> <tr id="entity-trie"><td> <code title="">trie;</code> </td> <td> U+0225C </td> <td> <span class="glyph" title="">&#8796;</span> </td> <tr id="entity-triminus"><td> <code title="">triminus;</code> </td> <td> U+02A3A </td> <td> <span class="glyph" title="">&#10810;</span> </td> <tr id="entity-triplus"><td> <code title="">triplus;</code> </td> <td> U+02A39 </td> <td> <span class="glyph" title="">&#10809;</span> </td> <tr id="entity-trisb"><td> <code title="">trisb;</code> </td> <td> U+029CD </td> <td> <span class="glyph" title="">&#10701;</span> </td> <tr id="entity-tritime"><td> <code title="">tritime;</code> </td> <td> U+02A3B </td> <td> <span class="glyph" title="">&#10811;</span> </td> <tr id="entity-trpezium"><td> <code title="">trpezium;</code> </td> <td> U+023E2 </td> <td> <span class="glyph" title="">&#9186;</span> </td> <tr id="entity-tscr"><td> <code title="">tscr;</code> </td> <td> U+1D4C9 </td> <td> <span class="glyph" title="">&#120009;</span> </td> <tr id="entity-tscy"><td> <code title="">tscy;</code> </td> <td> U+00446 </td> <td> <span class="glyph" title="">&#1094;</span> </td> <tr id="entity-tshcy"><td> <code title="">tshcy;</code> </td> <td> U+0045B </td> <td> <span class="glyph" title="">&#1115;</span> </td> <tr id="entity-tstrok"><td> <code title="">tstrok;</code> </td> <td> U+00167 </td> <td> <span class="glyph" title="">&#359;</span> </td> <tr id="entity-twixt"><td> <code title="">twixt;</code> </td> <td> U+0226C </td> <td> <span class="glyph" title="">&#8812;</span> </td> <tr id="entity-twoheadleftarrow"><td> <code title="">twoheadleftarrow;</code> </td> <td> U+0219E </td> <td> <span class="glyph" title="">&#8606;</span> </td> <tr id="entity-twoheadrightarrow"><td> <code title="">twoheadrightarrow;</code> </td> <td> U+021A0 </td> <td> <span class="glyph" title="">&#8608;</span> </td> <tr id="entity-uArr"><td> <code title="">uArr;</code> </td> <td> U+021D1 </td> <td> <span class="glyph" title="">&uArr;</span> </td> <tr id="entity-uHar"><td> <code title="">uHar;</code> </td> <td> U+02963 </td> <td> <span class="glyph" title="">&#10595;</span> </td> <tr id="entity-uacute"><td> <code title="">uacute;</code> </td> <td> U+000FA </td> <td> <span class="glyph" title="">&uacute;</span> </td> <tr id="entity-uarr"><td> <code title="">uarr;</code> </td> <td> U+02191 </td> <td> <span class="glyph" title="">&uarr;</span> </td> <tr id="entity-ubrcy"><td> <code title="">ubrcy;</code> </td> <td> U+0045E </td> <td> <span class="glyph" title="">&#1118;</span> </td> <tr id="entity-ubreve"><td> <code title="">ubreve;</code> </td> <td> U+0016D </td> <td> <span class="glyph" title="">&#365;</span> </td> <tr id="entity-ucirc"><td> <code title="">ucirc;</code> </td> <td> U+000FB </td> <td> <span class="glyph" title="">&ucirc;</span> </td> <tr id="entity-ucy"><td> <code title="">ucy;</code> </td> <td> U+00443 </td> <td> <span class="glyph" title="">&#1091;</span> </td> <tr id="entity-udarr"><td> <code title="">udarr;</code> </td> <td> U+021C5 </td> <td> <span class="glyph" title="">&#8645;</span> </td> <tr id="entity-udblac"><td> <code title="">udblac;</code> </td> <td> U+00171 </td> <td> <span class="glyph" title="">&#369;</span> </td> <tr id="entity-udhar"><td> <code title="">udhar;</code> </td> <td> U+0296E </td> <td> <span class="glyph" title="">&#10606;</span> </td> <tr id="entity-ufisht"><td> <code title="">ufisht;</code> </td> <td> U+0297E </td> <td> <span class="glyph" title="">&#10622;</span> </td> <tr id="entity-ufr"><td> <code title="">ufr;</code> </td> <td> U+1D532 </td> <td> <span class="glyph" title="">&#120114;</span> </td> <tr id="entity-ugrave"><td> <code title="">ugrave;</code> </td> <td> U+000F9 </td> <td> <span class="glyph" title="">&ugrave;</span> </td> <tr id="entity-uharl"><td> <code title="">uharl;</code> </td> <td> U+021BF </td> <td> <span class="glyph" title="">&#8639;</span> </td> <tr id="entity-uharr"><td> <code title="">uharr;</code> </td> <td> U+021BE </td> <td> <span class="glyph" title="">&#8638;</span> </td> <tr id="entity-uhblk"><td> <code title="">uhblk;</code> </td> <td> U+02580 </td> <td> <span class="glyph" title="">&#9600;</span> </td> <tr id="entity-ulcorn"><td> <code title="">ulcorn;</code> </td> <td> U+0231C </td> <td> <span class="glyph" title="">&#8988;</span> </td> <tr id="entity-ulcorner"><td> <code title="">ulcorner;</code> </td> <td> U+0231C </td> <td> <span class="glyph" title="">&#8988;</span> </td> <tr id="entity-ulcrop"><td> <code title="">ulcrop;</code> </td> <td> U+0230F </td> <td> <span class="glyph" title="">&#8975;</span> </td> <tr id="entity-ultri"><td> <code title="">ultri;</code> </td> <td> U+025F8 </td> <td> <span class="glyph" title="">&#9720;</span> </td> <tr id="entity-umacr"><td> <code title="">umacr;</code> </td> <td> U+0016B </td> <td> <span class="glyph" title="">&#363;</span> </td> <tr id="entity-uml"><td> <code title="">uml;</code> </td> <td> U+000A8 </td> <td> <span class="glyph" title="">&uml;</span> </td> <tr id="entity-uogon"><td> <code title="">uogon;</code> </td> <td> U+00173 </td> <td> <span class="glyph" title="">&#371;</span> </td> <tr id="entity-uopf"><td> <code title="">uopf;</code> </td> <td> U+1D566 </td> <td> <span class="glyph" title="">&#120166;</span> </td> <tr id="entity-uparrow"><td> <code title="">uparrow;</code> </td> <td> U+02191 </td> <td> <span class="glyph" title="">&uarr;</span> </td> <tr id="entity-updownarrow"><td> <code title="">updownarrow;</code> </td> <td> U+02195 </td> <td> <span class="glyph" title="">&#8597;</span> </td> <tr id="entity-upharpoonleft"><td> <code title="">upharpoonleft;</code> </td> <td> U+021BF </td> <td> <span class="glyph" title="">&#8639;</span> </td> <tr id="entity-upharpoonright"><td> <code title="">upharpoonright;</code> </td> <td> U+021BE </td> <td> <span class="glyph" title="">&#8638;</span> </td> <tr id="entity-uplus"><td> <code title="">uplus;</code> </td> <td> U+0228E </td> <td> <span class="glyph" title="">&#8846;</span> </td> <tr id="entity-upsi"><td> <code title="">upsi;</code> </td> <td> U+003C5 </td> <td> <span class="glyph" title="">&upsilon;</span> </td> <tr id="entity-upsih"><td> <code title="">upsih;</code> </td> <td> U+003D2 </td> <td> <span class="glyph" title="">&upsih;</span> </td> <tr id="entity-upsilon"><td> <code title="">upsilon;</code> </td> <td> U+003C5 </td> <td> <span class="glyph" title="">&upsilon;</span> </td> <tr id="entity-upuparrows"><td> <code title="">upuparrows;</code> </td> <td> U+021C8 </td> <td> <span class="glyph" title="">&#8648;</span> </td> <tr id="entity-urcorn"><td> <code title="">urcorn;</code> </td> <td> U+0231D </td> <td> <span class="glyph" title="">&#8989;</span> </td> <tr id="entity-urcorner"><td> <code title="">urcorner;</code> </td> <td> U+0231D </td> <td> <span class="glyph" title="">&#8989;</span> </td> <tr id="entity-urcrop"><td> <code title="">urcrop;</code> </td> <td> U+0230E </td> <td> <span class="glyph" title="">&#8974;</span> </td> <tr id="entity-uring"><td> <code title="">uring;</code> </td> <td> U+0016F </td> <td> <span class="glyph" title="">&#367;</span> </td> <tr id="entity-urtri"><td> <code title="">urtri;</code> </td> <td> U+025F9 </td> <td> <span class="glyph" title="">&#9721;</span> </td> <tr id="entity-uscr"><td> <code title="">uscr;</code> </td> <td> U+1D4CA </td> <td> <span class="glyph" title="">&#120010;</span> </td> <tr id="entity-utdot"><td> <code title="">utdot;</code> </td> <td> U+022F0 </td> <td> <span class="glyph" title="">&#8944;</span> </td> <tr id="entity-utilde"><td> <code title="">utilde;</code> </td> <td> U+00169 </td> <td> <span class="glyph" title="">&#361;</span> </td> <tr id="entity-utri"><td> <code title="">utri;</code> </td> <td> U+025B5 </td> <td> <span class="glyph" title="">&#9653;</span> </td> <tr id="entity-utrif"><td> <code title="">utrif;</code> </td> <td> U+025B4 </td> <td> <span class="glyph" title="">&#9652;</span> </td> <tr id="entity-uuarr"><td> <code title="">uuarr;</code> </td> <td> U+021C8 </td> <td> <span class="glyph" title="">&#8648;</span> </td> <tr id="entity-uuml"><td> <code title="">uuml;</code> </td> <td> U+000FC </td> <td> <span class="glyph" title="">&uuml;</span> </td> <tr id="entity-uwangle"><td> <code title="">uwangle;</code> </td> <td> U+029A7 </td> <td> <span class="glyph" title="">&#10663;</span> </td> <tr id="entity-vArr"><td> <code title="">vArr;</code> </td> <td> U+021D5 </td> <td> <span class="glyph" title="">&#8661;</span> </td> <tr id="entity-vBar"><td> <code title="">vBar;</code> </td> <td> U+02AE8 </td> <td> <span class="glyph" title="">&#10984;</span> </td> <tr id="entity-vBarv"><td> <code title="">vBarv;</code> </td> <td> U+02AE9 </td> <td> <span class="glyph" title="">&#10985;</span> </td> <tr id="entity-vDash"><td> <code title="">vDash;</code> </td> <td> U+022A8 </td> <td> <span class="glyph" title="">&#8872;</span> </td> <tr id="entity-vangrt"><td> <code title="">vangrt;</code> </td> <td> U+0299C </td> <td> <span class="glyph" title="">&#10652;</span> </td> <tr id="entity-varepsilon"><td> <code title="">varepsilon;</code> </td> <td> U+003F5 </td> <td> <span class="glyph" title="">&#1013;</span> </td> <tr id="entity-varkappa"><td> <code title="">varkappa;</code> </td> <td> U+003F0 </td> <td> <span class="glyph" title="">&#1008;</span> </td> <tr id="entity-varnothing"><td> <code title="">varnothing;</code> </td> <td> U+02205 </td> <td> <span class="glyph" title="">&empty;</span> </td> <tr id="entity-varphi"><td> <code title="">varphi;</code> </td> <td> U+003D5 </td> <td> <span class="glyph" title="">&#981;</span> </td> <tr id="entity-varpi"><td> <code title="">varpi;</code> </td> <td> U+003D6 </td> <td> <span class="glyph" title="">&piv;</span> </td> <tr id="entity-varpropto"><td> <code title="">varpropto;</code> </td> <td> U+0221D </td> <td> <span class="glyph" title="">&prop;</span> </td> <tr id="entity-varr"><td> <code title="">varr;</code> </td> <td> U+02195 </td> <td> <span class="glyph" title="">&#8597;</span> </td> <tr id="entity-varrho"><td> <code title="">varrho;</code> </td> <td> U+003F1 </td> <td> <span class="glyph" title="">&#1009;</span> </td> <tr id="entity-varsigma"><td> <code title="">varsigma;</code> </td> <td> U+003C2 </td> <td> <span class="glyph" title="">&sigmaf;</span> </td> <tr id="entity-varsubsetneq"><td> <code title="">varsubsetneq;</code> </td> <td> U+0228A U+0FE00 </td> <td> <span class="glyph compound" title="">&#8842;&#65024;</span> </td> <tr id="entity-varsubsetneqq"><td> <code title="">varsubsetneqq;</code> </td> <td> U+02ACB U+0FE00 </td> <td> <span class="glyph compound" title="">&#10955;&#65024;</span> </td> <tr id="entity-varsupsetneq"><td> <code title="">varsupsetneq;</code> </td> <td> U+0228B U+0FE00 </td> <td> <span class="glyph compound" title="">&#8843;&#65024;</span> </td> <tr id="entity-varsupsetneqq"><td> <code title="">varsupsetneqq;</code> </td> <td> U+02ACC U+0FE00 </td> <td> <span class="glyph compound" title="">&#10956;&#65024;</span> </td> <tr id="entity-vartheta"><td> <code title="">vartheta;</code> </td> <td> U+003D1 </td> <td> <span class="glyph" title="">&thetasym;</span> </td> <tr id="entity-vartriangleleft"><td> <code title="">vartriangleleft;</code> </td> <td> U+022B2 </td> <td> <span class="glyph" title="">&#8882;</span> </td> <tr id="entity-vartriangleright"><td> <code title="">vartriangleright;</code> </td> <td> U+022B3 </td> <td> <span class="glyph" title="">&#8883;</span> </td> <tr id="entity-vcy"><td> <code title="">vcy;</code> </td> <td> U+00432 </td> <td> <span class="glyph" title="">&#1074;</span> </td> <tr id="entity-vdash"><td> <code title="">vdash;</code> </td> <td> U+022A2 </td> <td> <span class="glyph" title="">&#8866;</span> </td> <tr id="entity-vee"><td> <code title="">vee;</code> </td> <td> U+02228 </td> <td> <span class="glyph" title="">&or;</span> </td> <tr id="entity-veebar"><td> <code title="">veebar;</code> </td> <td> U+022BB </td> <td> <span class="glyph" title="">&#8891;</span> </td> <tr id="entity-veeeq"><td> <code title="">veeeq;</code> </td> <td> U+0225A </td> <td> <span class="glyph" title="">&#8794;</span> </td> <tr id="entity-vellip"><td> <code title="">vellip;</code> </td> <td> U+022EE </td> <td> <span class="glyph" title="">&#8942;</span> </td> <tr id="entity-verbar"><td> <code title="">verbar;</code> </td> <td> U+0007C </td> <td> <span class="glyph" title="">|</span> </td> <tr id="entity-vert"><td> <code title="">vert;</code> </td> <td> U+0007C </td> <td> <span class="glyph" title="">|</span> </td> <tr id="entity-vfr"><td> <code title="">vfr;</code> </td> <td> U+1D533 </td> <td> <span class="glyph" title="">&#120115;</span> </td> <tr id="entity-vltri"><td> <code title="">vltri;</code> </td> <td> U+022B2 </td> <td> <span class="glyph" title="">&#8882;</span> </td> <tr id="entity-vnsub"><td> <code title="">vnsub;</code> </td> <td> U+02282 U+020D2 </td> <td> <span class="glyph compound" title="">&sub;&#8402;</span> </td> <tr id="entity-vnsup"><td> <code title="">vnsup;</code> </td> <td> U+02283 U+020D2 </td> <td> <span class="glyph compound" title="">&sup;&#8402;</span> </td> <tr id="entity-vopf"><td> <code title="">vopf;</code> </td> <td> U+1D567 </td> <td> <span class="glyph" title="">&#120167;</span> </td> <tr id="entity-vprop"><td> <code title="">vprop;</code> </td> <td> U+0221D </td> <td> <span class="glyph" title="">&prop;</span> </td> <tr id="entity-vrtri"><td> <code title="">vrtri;</code> </td> <td> U+022B3 </td> <td> <span class="glyph" title="">&#8883;</span> </td> <tr id="entity-vscr"><td> <code title="">vscr;</code> </td> <td> U+1D4CB </td> <td> <span class="glyph" title="">&#120011;</span> </td> <tr id="entity-vsubnE"><td> <code title="">vsubnE;</code> </td> <td> U+02ACB U+0FE00 </td> <td> <span class="glyph compound" title="">&#10955;&#65024;</span> </td> <tr id="entity-vsubne"><td> <code title="">vsubne;</code> </td> <td> U+0228A U+0FE00 </td> <td> <span class="glyph compound" title="">&#8842;&#65024;</span> </td> <tr id="entity-vsupnE"><td> <code title="">vsupnE;</code> </td> <td> U+02ACC U+0FE00 </td> <td> <span class="glyph compound" title="">&#10956;&#65024;</span> </td> <tr id="entity-vsupne"><td> <code title="">vsupne;</code> </td> <td> U+0228B U+0FE00 </td> <td> <span class="glyph compound" title="">&#8843;&#65024;</span> </td> <tr id="entity-vzigzag"><td> <code title="">vzigzag;</code> </td> <td> U+0299A </td> <td> <span class="glyph" title="">&#10650;</span> </td> <tr id="entity-wcirc"><td> <code title="">wcirc;</code> </td> <td> U+00175 </td> <td> <span class="glyph" title="">&#373;</span> </td> <tr id="entity-wedbar"><td> <code title="">wedbar;</code> </td> <td> U+02A5F </td> <td> <span class="glyph" title="">&#10847;</span> </td> <tr id="entity-wedge"><td> <code title="">wedge;</code> </td> <td> U+02227 </td> <td> <span class="glyph" title="">&and;</span> </td> <tr id="entity-wedgeq"><td> <code title="">wedgeq;</code> </td> <td> U+02259 </td> <td> <span class="glyph" title="">&#8793;</span> </td> <tr id="entity-weierp"><td> <code title="">weierp;</code> </td> <td> U+02118 </td> <td> <span class="glyph" title="">&weierp;</span> </td> <tr id="entity-wfr"><td> <code title="">wfr;</code> </td> <td> U+1D534 </td> <td> <span class="glyph" title="">&#120116;</span> </td> <tr id="entity-wopf"><td> <code title="">wopf;</code> </td> <td> U+1D568 </td> <td> <span class="glyph" title="">&#120168;</span> </td> <tr id="entity-wp"><td> <code title="">wp;</code> </td> <td> U+02118 </td> <td> <span class="glyph" title="">&weierp;</span> </td> <tr id="entity-wr"><td> <code title="">wr;</code> </td> <td> U+02240 </td> <td> <span class="glyph" title="">&#8768;</span> </td> <tr id="entity-wreath"><td> <code title="">wreath;</code> </td> <td> U+02240 </td> <td> <span class="glyph" title="">&#8768;</span> </td> <tr id="entity-wscr"><td> <code title="">wscr;</code> </td> <td> U+1D4CC </td> <td> <span class="glyph" title="">&#120012;</span> </td> <tr id="entity-xcap"><td> <code title="">xcap;</code> </td> <td> U+022C2 </td> <td> <span class="glyph" title="">&#8898;</span> </td> <tr id="entity-xcirc"><td> <code title="">xcirc;</code> </td> <td> U+025EF </td> <td> <span class="glyph" title="">&#9711;</span> </td> <tr id="entity-xcup"><td> <code title="">xcup;</code> </td> <td> U+022C3 </td> <td> <span class="glyph" title="">&#8899;</span> </td> <tr id="entity-xdtri"><td> <code title="">xdtri;</code> </td> <td> U+025BD </td> <td> <span class="glyph" title="">&#9661;</span> </td> <tr id="entity-xfr"><td> <code title="">xfr;</code> </td> <td> U+1D535 </td> <td> <span class="glyph" title="">&#120117;</span> </td> <tr id="entity-xhArr"><td> <code title="">xhArr;</code> </td> <td> U+027FA </td> <td> <span class="glyph" title="">&#10234;</span> </td> <tr id="entity-xharr"><td> <code title="">xharr;</code> </td> <td> U+027F7 </td> <td> <span class="glyph" title="">&#10231;</span> </td> <tr id="entity-xi"><td> <code title="">xi;</code> </td> <td> U+003BE </td> <td> <span class="glyph" title="">&xi;</span> </td> <tr id="entity-xlArr"><td> <code title="">xlArr;</code> </td> <td> U+027F8 </td> <td> <span class="glyph" title="">&#10232;</span> </td> <tr id="entity-xlarr"><td> <code title="">xlarr;</code> </td> <td> U+027F5 </td> <td> <span class="glyph" title="">&#10229;</span> </td> <tr id="entity-xmap"><td> <code title="">xmap;</code> </td> <td> U+027FC </td> <td> <span class="glyph" title="">&#10236;</span> </td> <tr id="entity-xnis"><td> <code title="">xnis;</code> </td> <td> U+022FB </td> <td> <span class="glyph" title="">&#8955;</span> </td> <tr id="entity-xodot"><td> <code title="">xodot;</code> </td> <td> U+02A00 </td> <td> <span class="glyph" title="">&#10752;</span> </td> <tr id="entity-xopf"><td> <code title="">xopf;</code> </td> <td> U+1D569 </td> <td> <span class="glyph" title="">&#120169;</span> </td> <tr id="entity-xoplus"><td> <code title="">xoplus;</code> </td> <td> U+02A01 </td> <td> <span class="glyph" title="">&#10753;</span> </td> <tr id="entity-xotime"><td> <code title="">xotime;</code> </td> <td> U+02A02 </td> <td> <span class="glyph" title="">&#10754;</span> </td> <tr id="entity-xrArr"><td> <code title="">xrArr;</code> </td> <td> U+027F9 </td> <td> <span class="glyph" title="">&#10233;</span> </td> <tr id="entity-xrarr"><td> <code title="">xrarr;</code> </td> <td> U+027F6 </td> <td> <span class="glyph" title="">&#10230;</span> </td> <tr id="entity-xscr"><td> <code title="">xscr;</code> </td> <td> U+1D4CD </td> <td> <span class="glyph" title="">&#120013;</span> </td> <tr id="entity-xsqcup"><td> <code title="">xsqcup;</code> </td> <td> U+02A06 </td> <td> <span class="glyph" title="">&#10758;</span> </td> <tr id="entity-xuplus"><td> <code title="">xuplus;</code> </td> <td> U+02A04 </td> <td> <span class="glyph" title="">&#10756;</span> </td> <tr id="entity-xutri"><td> <code title="">xutri;</code> </td> <td> U+025B3 </td> <td> <span class="glyph" title="">&#9651;</span> </td> <tr id="entity-xvee"><td> <code title="">xvee;</code> </td> <td> U+022C1 </td> <td> <span class="glyph" title="">&#8897;</span> </td> <tr id="entity-xwedge"><td> <code title="">xwedge;</code> </td> <td> U+022C0 </td> <td> <span class="glyph" title="">&#8896;</span> </td> <tr id="entity-yacute"><td> <code title="">yacute;</code> </td> <td> U+000FD </td> <td> <span class="glyph" title="">&yacute;</span> </td> <tr id="entity-yacy"><td> <code title="">yacy;</code> </td> <td> U+0044F </td> <td> <span class="glyph" title="">&#1103;</span> </td> <tr id="entity-ycirc"><td> <code title="">ycirc;</code> </td> <td> U+00177 </td> <td> <span class="glyph" title="">&#375;</span> </td> <tr id="entity-ycy"><td> <code title="">ycy;</code> </td> <td> U+0044B </td> <td> <span class="glyph" title="">&#1099;</span> </td> <tr id="entity-yen"><td> <code title="">yen;</code> </td> <td> U+000A5 </td> <td> <span class="glyph" title="">&yen;</span> </td> <tr id="entity-yfr"><td> <code title="">yfr;</code> </td> <td> U+1D536 </td> <td> <span class="glyph" title="">&#120118;</span> </td> <tr id="entity-yicy"><td> <code title="">yicy;</code> </td> <td> U+00457 </td> <td> <span class="glyph" title="">&#1111;</span> </td> <tr id="entity-yopf"><td> <code title="">yopf;</code> </td> <td> U+1D56A </td> <td> <span class="glyph" title="">&#120170;</span> </td> <tr id="entity-yscr"><td> <code title="">yscr;</code> </td> <td> U+1D4CE </td> <td> <span class="glyph" title="">&#120014;</span> </td> <tr id="entity-yucy"><td> <code title="">yucy;</code> </td> <td> U+0044E </td> <td> <span class="glyph" title="">&#1102;</span> </td> <tr id="entity-yuml"><td> <code title="">yuml;</code> </td> <td> U+000FF </td> <td> <span class="glyph" title="">&yuml;</span> </td> <tr id="entity-zacute"><td> <code title="">zacute;</code> </td> <td> U+0017A </td> <td> <span class="glyph" title="">&#378;</span> </td> <tr id="entity-zcaron"><td> <code title="">zcaron;</code> </td> <td> U+0017E </td> <td> <span class="glyph" title="">&#382;</span> </td> <tr id="entity-zcy"><td> <code title="">zcy;</code> </td> <td> U+00437 </td> <td> <span class="glyph" title="">&#1079;</span> </td> <tr id="entity-zdot"><td> <code title="">zdot;</code> </td> <td> U+0017C </td> <td> <span class="glyph" title="">&#380;</span> </td> <tr id="entity-zeetrf"><td> <code title="">zeetrf;</code> </td> <td> U+02128 </td> <td> <span class="glyph" title="">&#8488;</span> </td> <tr id="entity-zeta"><td> <code title="">zeta;</code> </td> <td> U+003B6 </td> <td> <span class="glyph" title="">&zeta;</span> </td> <tr id="entity-zfr"><td> <code title="">zfr;</code> </td> <td> U+1D537 </td> <td> <span class="glyph" title="">&#120119;</span> </td> <tr id="entity-zhcy"><td> <code title="">zhcy;</code> </td> <td> U+00436 </td> <td> <span class="glyph" title="">&#1078;</span> </td> <tr id="entity-zigrarr"><td> <code title="">zigrarr;</code> </td> <td> U+021DD </td> <td> <span class="glyph" title="">&#8669;</span> </td> <tr id="entity-zopf"><td> <code title="">zopf;</code> </td> <td> U+1D56B </td> <td> <span class="glyph" title="">&#120171;</span> </td> <tr id="entity-zscr"><td> <code title="">zscr;</code> </td> <td> U+1D4CF </td> <td> <span class="glyph" title="">&#120015;</span> </td> <tr id="entity-zwj"><td> <code title="">zwj;</code> </td> <td> U+0200D </td> <td> <span class="glyph" title="">&zwj;</span> </td> <tr id="entity-zwnj"><td> <code title="">zwnj;</code> </td> <td> U+0200C </td> <td> <span class="glyph" title="">&zwnj;</span> </td> <tr class="impl"><td> <code title="">AElig</code> </td> <td> U+000C6 </td> <td> <span title="">&AElig;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">AMP</code> </td> <td> U+00026 </td> <td> <span title="">&amp;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Aacute</code> </td> <td> U+000C1 </td> <td> <span title="">&Aacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Acirc</code> </td> <td> U+000C2 </td> <td> <span title="">&Acirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Agrave</code> </td> <td> U+000C0 </td> <td> <span title="">&Agrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Aring</code> </td> <td> U+000C5 </td> <td> <span title="">&Aring;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Atilde</code> </td> <td> U+000C3 </td> <td> <span title="">&Atilde;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Auml</code> </td> <td> U+000C4 </td> <td> <span title="">&Auml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">COPY</code> </td> <td> U+000A9 </td> <td> <span title="">&copy;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ccedil</code> </td> <td> U+000C7 </td> <td> <span title="">&Ccedil;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ETH</code> </td> <td> U+000D0 </td> <td> <span title="">&ETH;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Eacute</code> </td> <td> U+000C9 </td> <td> <span title="">&Eacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ecirc</code> </td> <td> U+000CA </td> <td> <span title="">&Ecirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Egrave</code> </td> <td> U+000C8 </td> <td> <span title="">&Egrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Euml</code> </td> <td> U+000CB </td> <td> <span title="">&Euml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">GT</code> </td> <td> U+0003E </td> <td> <span title="">&gt;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Iacute</code> </td> <td> U+000CD </td> <td> <span title="">&Iacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Icirc</code> </td> <td> U+000CE </td> <td> <span title="">&Icirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Igrave</code> </td> <td> U+000CC </td> <td> <span title="">&Igrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Iuml</code> </td> <td> U+000CF </td> <td> <span title="">&Iuml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">LT</code> </td> <td> U+0003C </td> <td> <span title="">&lt;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ntilde</code> </td> <td> U+000D1 </td> <td> <span title="">&Ntilde;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Oacute</code> </td> <td> U+000D3 </td> <td> <span title="">&Oacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ocirc</code> </td> <td> U+000D4 </td> <td> <span title="">&Ocirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ograve</code> </td> <td> U+000D2 </td> <td> <span title="">&Ograve;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Oslash</code> </td> <td> U+000D8 </td> <td> <span title="">&Oslash;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Otilde</code> </td> <td> U+000D5 </td> <td> <span title="">&Otilde;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ouml</code> </td> <td> U+000D6 </td> <td> <span title="">&Ouml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">QUOT</code> </td> <td> U+00022 </td> <td> <span title="">"</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">REG</code> </td> <td> U+000AE </td> <td> <span title="">&reg;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">THORN</code> </td> <td> U+000DE </td> <td> <span title="">&THORN;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Uacute</code> </td> <td> U+000DA </td> <td> <span title="">&Uacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ucirc</code> </td> <td> U+000DB </td> <td> <span title="">&Ucirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Ugrave</code> </td> <td> U+000D9 </td> <td> <span title="">&Ugrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Uuml</code> </td> <td> U+000DC </td> <td> <span title="">&Uuml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">Yacute</code> </td> <td> U+000DD </td> <td> <span title="">&Yacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">aacute</code> </td> <td> U+000E1 </td> <td> <span title="">&aacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">acirc</code> </td> <td> U+000E2 </td> <td> <span title="">&acirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">acute</code> </td> <td> U+000B4 </td> <td> <span title="">&acute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">aelig</code> </td> <td> U+000E6 </td> <td> <span title="">&aelig;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">agrave</code> </td> <td> U+000E0 </td> <td> <span title="">&agrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">amp</code> </td> <td> U+00026 </td> <td> <span title="">&amp;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">aring</code> </td> <td> U+000E5 </td> <td> <span title="">&aring;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">atilde</code> </td> <td> U+000E3 </td> <td> <span title="">&atilde;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">auml</code> </td> <td> U+000E4 </td> <td> <span title="">&auml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">brvbar</code> </td> <td> U+000A6 </td> <td> <span title="">&brvbar;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ccedil</code> </td> <td> U+000E7 </td> <td> <span title="">&ccedil;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">cedil</code> </td> <td> U+000B8 </td> <td> <span title="">&cedil;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">cent</code> </td> <td> U+000A2 </td> <td> <span title="">&cent;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">copy</code> </td> <td> U+000A9 </td> <td> <span title="">&copy;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">curren</code> </td> <td> U+000A4 </td> <td> <span title="">&curren;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">deg</code> </td> <td> U+000B0 </td> <td> <span title="">&deg;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">divide</code> </td> <td> U+000F7 </td> <td> <span title="">&divide;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">eacute</code> </td> <td> U+000E9 </td> <td> <span title="">&eacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ecirc</code> </td> <td> U+000EA </td> <td> <span title="">&ecirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">egrave</code> </td> <td> U+000E8 </td> <td> <span title="">&egrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">eth</code> </td> <td> U+000F0 </td> <td> <span title="">&eth;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">euml</code> </td> <td> U+000EB </td> <td> <span title="">&euml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">frac12</code> </td> <td> U+000BD </td> <td> <span title="">&frac12;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">frac14</code> </td> <td> U+000BC </td> <td> <span title="">&frac14;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">frac34</code> </td> <td> U+000BE </td> <td> <span title="">&frac34;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">gt</code> </td> <td> U+0003E </td> <td> <span title="">&gt;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">iacute</code> </td> <td> U+000ED </td> <td> <span title="">&iacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">icirc</code> </td> <td> U+000EE </td> <td> <span title="">&icirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">iexcl</code> </td> <td> U+000A1 </td> <td> <span title="">&iexcl;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">igrave</code> </td> <td> U+000EC </td> <td> <span title="">&igrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">iquest</code> </td> <td> U+000BF </td> <td> <span title="">&iquest;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">iuml</code> </td> <td> U+000EF </td> <td> <span title="">&iuml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">laquo</code> </td> <td> U+000AB </td> <td> <span title="">&laquo;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">lt</code> </td> <td> U+0003C </td> <td> <span title="">&lt;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">macr</code> </td> <td> U+000AF </td> <td> <span title="">&macr;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">micro</code> </td> <td> U+000B5 </td> <td> <span title="">&micro;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">middot</code> </td> <td> U+000B7 </td> <td> <span title="">&middot;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">nbsp</code> </td> <td> U+000A0 </td> <td> <span title="">&nbsp;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">not</code> </td> <td> U+000AC </td> <td> <span title="">&not;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ntilde</code> </td> <td> U+000F1 </td> <td> <span title="">&ntilde;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">oacute</code> </td> <td> U+000F3 </td> <td> <span title="">&oacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ocirc</code> </td> <td> U+000F4 </td> <td> <span title="">&ocirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ograve</code> </td> <td> U+000F2 </td> <td> <span title="">&ograve;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ordf</code> </td> <td> U+000AA </td> <td> <span title="">&ordf;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ordm</code> </td> <td> U+000BA </td> <td> <span title="">&ordm;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">oslash</code> </td> <td> U+000F8 </td> <td> <span title="">&oslash;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">otilde</code> </td> <td> U+000F5 </td> <td> <span title="">&otilde;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ouml</code> </td> <td> U+000F6 </td> <td> <span title="">&ouml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">para</code> </td> <td> U+000B6 </td> <td> <span title="">&para;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">plusmn</code> </td> <td> U+000B1 </td> <td> <span title="">&plusmn;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">pound</code> </td> <td> U+000A3 </td> <td> <span title="">&pound;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">quot</code> </td> <td> U+00022 </td> <td> <span title="">"</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">raquo</code> </td> <td> U+000BB </td> <td> <span title="">&raquo;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">reg</code> </td> <td> U+000AE </td> <td> <span title="">&reg;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">sect</code> </td> <td> U+000A7 </td> <td> <span title="">&sect;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">shy</code> </td> <td> U+000AD </td> <td> <span title="">&shy;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">sup1</code> </td> <td> U+000B9 </td> <td> <span title="">&sup1;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">sup2</code> </td> <td> U+000B2 </td> <td> <span title="">&sup2;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">sup3</code> </td> <td> U+000B3 </td> <td> <span title="">&sup3;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">szlig</code> </td> <td> U+000DF </td> <td> <span title="">&szlig;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">thorn</code> </td> <td> U+000FE </td> <td> <span title="">&thorn;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">times</code> </td> <td> U+000D7 </td> <td> <span title="">&times;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">uacute</code> </td> <td> U+000FA </td> <td> <span title="">&uacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ucirc</code> </td> <td> U+000FB </td> <td> <span title="">&ucirc;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">ugrave</code> </td> <td> U+000F9 </td> <td> <span title="">&ugrave;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">uml</code> </td> <td> U+000A8 </td> <td> <span title="">&uml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">uuml</code> </td> <td> U+000FC </td> <td> <span title="">&uuml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">yacute</code> </td> <td> U+000FD </td> <td> <span title="">&yacute;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">yen</code> </td> <td> U+000A5 </td> <td> <span title="">&yen;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --><tr class="impl"><td> <code title="">yuml</code> </td> <td> U+000FF </td> <td> <span title="">&yuml;</span> </td> </tr><!-- (invalid entity with missing semicolon for legacy support only) --></table><!--
+
diff --git a/parser/html/java/htmlparser/doc/tokenization.txt b/parser/html/java/htmlparser/doc/tokenization.txt
new file mode 100644
index 000000000..21cd7f6e2
--- /dev/null
+++ b/parser/html/java/htmlparser/doc/tokenization.txt
@@ -0,0 +1,1147 @@
+ #8.2 Parsing HTML documents Table of contents 8.2.5 Tree construction
+
+ WHATWG
+
+HTML 5
+
+Draft Recommendation — 7 February 2009
+
+ ← 8.2 Parsing HTML documents – Table of contents – 8.2.5 Tree
+ construction →
+
+ 8.2.4 Tokenization
+
+ Implementations must act as if they used the following state machine to
+ tokenise HTML. The state machine must start in the data state. Most
+ states consume a single character, which may have various side-effects,
+ and either switches the state machine to a new state to reconsume the
+ same character, or switches it to a new state (to consume the next
+ character), or repeats the same state (to consume the next character).
+ Some states have more complicated behavior and can consume several
+ characters before switching to another state.
+
+ The exact behavior of certain states depends on a content model flag
+ that is set after certain tokens are emitted. The flag has several
+ states: PCDATA, RCDATA, CDATA, and PLAINTEXT. Initially it must be in
+ the PCDATA state. In the RCDATA and CDATA states, a further escape flag
+ is used to control the behavior of the tokeniser. It is either true or
+ false, and initially must be set to the false state. The insertion mode
+ and the stack of open elements also affects tokenization.
+
+ The output of the tokenization step is a series of zero or more of the
+ following tokens: DOCTYPE, start tag, end tag, comment, character,
+ end-of-file. DOCTYPE tokens have a name, a public identifier, a system
+ identifier, and a force-quirks flag. When a DOCTYPE token is created,
+ its name, public identifier, and system identifier must be marked as
+ missing (which is a distinct state from the empty string), and the
+ force-quirks flag must be set to off (its other state is on). Start and
+ end tag tokens have a tag name, a self-closing flag, and a list of
+ attributes, each of which has a name and a value. When a start or end
+ tag token is created, its self-closing flag must be unset (its other
+ state is that it be set), and its attributes list must be empty.
+ Comment and character tokens have data.
+
+ When a token is emitted, it must immediately be handled by the tree
+ construction stage. The tree construction stage can affect the state of
+ the content model flag, and can insert additional characters into the
+ stream. (For example, the script element can result in scripts
+ executing and using the dynamic markup insertion APIs to insert
+ characters into the stream being tokenised.)
+
+ When a start tag token is emitted with its self-closing flag set, if
+ the flag is not acknowledged when it is processed by the tree
+ construction stage, that is a parse error.
+
+ When an end tag token is emitted, the content model flag must be
+ switched to the PCDATA state.
+
+ When an end tag token is emitted with attributes, that is a parse
+ error.
+
+ When an end tag token is emitted with its self-closing flag set, that
+ is a parse error.
+
+ Before each step of the tokeniser, the user agent must first check the
+ parser pause flag. If it is true, then the tokeniser must abort the
+ processing of any nested invocations of the tokeniser, yielding control
+ back to the caller. If it is false, then the user agent may then check
+ to see if either one of the scripts in the list of scripts that will
+ execute as soon as possible or the first script in the list of scripts
+ that will execute asynchronously, has completed loading. If one has,
+ then it must be executed and removed from its list.
+
+ The tokeniser state machine consists of the states defined in the
+ following subsections.
+
+ 8.2.4.1 Data state
+
+ Consume the next input character:
+
+ U+0026 AMPERSAND (&)
+ When the content model flag is set to one of the PCDATA or
+ RCDATA states and the escape flag is false: switch to the
+ character reference data state.
+ Otherwise: treat it as per the "anything else" entry below.
+
+ U+002D HYPHEN-MINUS (-)
+ If the content model flag is set to either the RCDATA state or
+ the CDATA state, and the escape flag is false, and there are at
+ least three characters before this one in the input stream, and
+ the last four characters in the input stream, including this
+ one, are U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D
+ HYPHEN-MINUS, and U+002D HYPHEN-MINUS ("<!--"), then set the
+ escape flag to true.
+
+ In any case, emit the input character as a character token. Stay
+ in the data state.
+
+ U+003C LESS-THAN SIGN (<)
+ When the content model flag is set to the PCDATA state: switch
+ to the tag open state.
+ When the content model flag is set to either the RCDATA state or
+ the CDATA state, and the escape flag is false: switch to the tag
+ open state.
+ Otherwise: treat it as per the "anything else" entry below.
+
+ U+003E GREATER-THAN SIGN (>)
+ If the content model flag is set to either the RCDATA state or
+ the CDATA state, and the escape flag is true, and the last three
+ characters in the input stream including this one are U+002D
+ HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN
+ ("-->"), set the escape flag to false.
+
+ In any case, emit the input character as a character token. Stay
+ in the data state.
+
+ EOF
+ Emit an end-of-file token.
+
+ Anything else
+ Emit the input character as a character token. Stay in the data
+ state.
+
+ 8.2.4.2 Character reference data state
+
+ (This cannot happen if the content model flag is set to the CDATA
+ state.)
+
+ Attempt to consume a character reference, with no additional allowed
+ character.
+
+ If nothing is returned, emit a U+0026 AMPERSAND character token.
+
+ Otherwise, emit the character token that was returned.
+
+ Finally, switch to the data state.
+
+ 8.2.4.3 Tag open state
+
+ The behavior of this state depends on the content model flag.
+
+ If the content model flag is set to the RCDATA or CDATA states
+ Consume the next input character. If it is a U+002F SOLIDUS (/)
+ character, switch to the close tag open state. Otherwise, emit a
+ U+003C LESS-THAN SIGN character token and reconsume the current
+ input character in the data state.
+
+ If the content model flag is set to the PCDATA state
+ Consume the next input character:
+
+ U+0021 EXCLAMATION MARK (!)
+ Switch to the markup declaration open state.
+
+ U+002F SOLIDUS (/)
+ Switch to the close tag open state.
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL
+ LETTER Z
+ Create a new start tag token, set its tag name to the
+ lowercase version of the input character (add 0x0020 to
+ the character's code point), then switch to the tag name
+ state. (Don't emit the token yet; further details will be
+ filled in before it is emitted.)
+
+ U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+ Create a new start tag token, set its tag name to the
+ input character, then switch to the tag name state. (Don't
+ emit the token yet; further details will be filled in
+ before it is emitted.)
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit a U+003C LESS-THAN SIGN character token
+ and a U+003E GREATER-THAN SIGN character token. Switch to
+ the data state.
+
+ U+003F QUESTION MARK (?)
+ Parse error. Switch to the bogus comment state.
+
+ Anything else
+ Parse error. Emit a U+003C LESS-THAN SIGN character token
+ and reconsume the current input character in the data
+ state.
+
+ 8.2.4.4 Close tag open state
+
+ If the content model flag is set to the RCDATA or CDATA states but no
+ start tag token has ever been emitted by this instance of the tokeniser
+ (fragment case), or, if the content model flag is set to the RCDATA or
+ CDATA states and the next few characters do not match the tag name of
+ the last start tag token emitted (compared in an ASCII case-insensitive
+ manner), or if they do but they are not immediately followed by one of
+ the following characters:
+ * U+0009 CHARACTER TABULATION
+ * U+000A LINE FEED (LF)
+ * U+000C FORM FEED (FF)
+ * U+0020 SPACE
+ * U+003E GREATER-THAN SIGN (>)
+ * U+002F SOLIDUS (/)
+ * EOF
+
+ ...then emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS
+ character token, and switch to the data state to process the next input
+ character.
+
+ Otherwise, if the content model flag is set to the PCDATA state, or if
+ the next few characters do match that tag name, consume the next input
+ character:
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Create a new end tag token, set its tag name to the lowercase
+ version of the input character (add 0x0020 to the character's
+ code point), then switch to the tag name state. (Don't emit the
+ token yet; further details will be filled in before it is
+ emitted.)
+
+ U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+ Create a new end tag token, set its tag name to the input
+ character, then switch to the tag name state. (Don't emit the
+ token yet; further details will be filled in before it is
+ emitted.)
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Switch to the data state.
+
+ EOF
+ Parse error. Emit a U+003C LESS-THAN SIGN character token and a
+ U+002F SOLIDUS character token. Reconsume the EOF character in
+ the data state.
+
+ Anything else
+ Parse error. Switch to the bogus comment state.
+
+ 8.2.4.5 Tag name state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state.
+
+ U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state.
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Append the lowercase version of the current input character (add
+ 0x0020 to the character's code point) to the current tag token's
+ tag name. Stay in the tag name state.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state.
+
+ Anything else
+ Append the current input character to the current tag token's
+ tag name. Stay in the tag name state.
+
+ 8.2.4.6 Before attribute name state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before attribute name state.
+
+ U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state.
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Start a new attribute in the current tag token. Set that
+ attribute's name to the lowercase version of the current input
+ character (add 0x0020 to the character's code point), and its
+ value to the empty string. Switch to the attribute name state.
+
+ U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ U+003D EQUALS SIGN (=)
+ Parse error. Treat it as per the "anything else" entry below.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state.
+
+ Anything else
+ Start a new attribute in the current tag token. Set that
+ attribute's name to the current input character, and its value
+ to the empty string. Switch to the attribute name state.
+
+ 8.2.4.7 Attribute name state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the after attribute name state.
+
+ U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state.
+
+ U+003D EQUALS SIGN (=)
+ Switch to the before attribute value state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state.
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Append the lowercase version of the current input character (add
+ 0x0020 to the character's code point) to the current attribute's
+ name. Stay in the attribute name state.
+
+ U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ Parse error. Treat it as per the "anything else" entry below.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state.
+
+ Anything else
+ Append the current input character to the current attribute's
+ name. Stay in the attribute name state.
+
+ When the user agent leaves the attribute name state (and before
+ emitting the tag token, if appropriate), the complete attribute's name
+ must be compared to the other attributes on the same token; if there is
+ already an attribute on the token with the exact same name, then this
+ is a parse error and the new attribute must be dropped, along with the
+ value that gets associated with it (if any).
+
+ 8.2.4.8 After attribute name state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after attribute name state.
+
+ U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state.
+
+ U+003D EQUALS SIGN (=)
+ Switch to the before attribute value state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state.
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Start a new attribute in the current tag token. Set that
+ attribute's name to the lowercase version of the current input
+ character (add 0x0020 to the character's code point), and its
+ value to the empty string. Switch to the attribute name state.
+
+ U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ Parse error. Treat it as per the "anything else" entry below.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state.
+
+ Anything else
+ Start a new attribute in the current tag token. Set that
+ attribute's name to the current input character, and its value
+ to the empty string. Switch to the attribute name state.
+
+ 8.2.4.9 Before attribute value state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before attribute value state.
+
+ U+0022 QUOTATION MARK (")
+ Switch to the attribute value (double-quoted) state.
+
+ U+0026 AMPERSAND (&)
+ Switch to the attribute value (unquoted) state and reconsume
+ this input character.
+
+ U+0027 APOSTROPHE (')
+ Switch to the attribute value (single-quoted) state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit the current tag token. Switch to the data
+ state.
+
+ U+003D EQUALS SIGN (=)
+ Parse error. Treat it as per the "anything else" entry below.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state.
+
+ Anything else
+ Append the current input character to the current attribute's
+ value. Switch to the attribute value (unquoted) state.
+
+ 8.2.4.10 Attribute value (double-quoted) state
+
+ Consume the next input character:
+
+ U+0022 QUOTATION MARK (")
+ Switch to the after attribute value (quoted) state.
+
+ U+0026 AMPERSAND (&)
+ Switch to the character reference in attribute value state, with
+ the additional allowed character being U+0022 QUOTATION MARK
+ (").
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state.
+
+ Anything else
+ Append the current input character to the current attribute's
+ value. Stay in the attribute value (double-quoted) state.
+
+ 8.2.4.11 Attribute value (single-quoted) state
+
+ Consume the next input character:
+
+ U+0027 APOSTROPHE (')
+ Switch to the after attribute value (quoted) state.
+
+ U+0026 AMPERSAND (&)
+ Switch to the character reference in attribute value state, with
+ the additional allowed character being U+0027 APOSTROPHE (').
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state.
+
+ Anything else
+ Append the current input character to the current attribute's
+ value. Stay in the attribute value (single-quoted) state.
+
+ 8.2.4.12 Attribute value (unquoted) state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state.
+
+ U+0026 AMPERSAND (&)
+ Switch to the character reference in attribute value state, with
+ no additional allowed character.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state.
+
+ U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ U+003D EQUALS SIGN (=)
+ Parse error. Treat it as per the "anything else" entry below.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state.
+
+ Anything else
+ Append the current input character to the current attribute's
+ value. Stay in the attribute value (unquoted) state.
+
+ 8.2.4.13 Character reference in attribute value state
+
+ Attempt to consume a character reference.
+
+ If nothing is returned, append a U+0026 AMPERSAND character to the
+ current attribute's value.
+
+ Otherwise, append the returned character token to the current
+ attribute's value.
+
+ Finally, switch back to the attribute value state that you were in when
+ were switched into this state.
+
+ 8.2.4.14 After attribute value (quoted) state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state.
+
+ U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state.
+
+ Anything else
+ Parse error. Reconsume the character in the before attribute
+ name state.
+
+ 8.2.4.15 Self-closing start tag state
+
+ Consume the next input character:
+
+ U+003E GREATER-THAN SIGN (>)
+ Set the self-closing flag of the current tag token. Emit the
+ current tag token. Switch to the data state.
+
+ EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state.
+
+ Anything else
+ Parse error. Reconsume the character in the before attribute
+ name state.
+
+ 8.2.4.16 Bogus comment state
+
+ (This can only happen if the content model flag is set to the PCDATA
+ state.)
+
+ Consume every character up to and including the first U+003E
+ GREATER-THAN SIGN character (>) or the end of the file (EOF), whichever
+ comes first. Emit a comment token whose data is the concatenation of
+ all the characters starting from and including the character that
+ caused the state machine to switch into the bogus comment state, up to
+ and including the character immediately before the last consumed
+ character (i.e. up to the character just before the U+003E or EOF
+ character). (If the comment was started by the end of the file (EOF),
+ the token is empty.)
+
+ Switch to the data state.
+
+ If the end of the file was reached, reconsume the EOF character.
+
+ 8.2.4.17 Markup declaration open state
+
+ (This can only happen if the content model flag is set to the PCDATA
+ state.)
+
+ If the next two characters are both U+002D HYPHEN-MINUS (-) characters,
+ consume those two characters, create a comment token whose data is the
+ empty string, and switch to the comment start state.
+
+ Otherwise, if the next seven characters are an ASCII case-insensitive
+ match for the word "DOCTYPE", then consume those characters and switch
+ to the DOCTYPE state.
+
+ Otherwise, if the insertion mode is "in foreign content" and the
+ current node is not an element in the HTML namespace and the next seven
+ characters are an ASCII case-sensitive match for the string "[CDATA["
+ (the five uppercase letters "CDATA" with a U+005B LEFT SQUARE BRACKET
+ character before and after), then consume those characters and switch
+ to the CDATA section state (which is unrelated to the content model
+ flag's CDATA state).
+
+ Otherwise, this is a parse error. Switch to the bogus comment state.
+ The next character that is consumed, if any, is the first character
+ that will be in the comment.
+
+ 8.2.4.18 Comment start state
+
+ Consume the next input character:
+
+ U+002D HYPHEN-MINUS (-)
+ Switch to the comment start dash state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit the comment token. Switch to the data state.
+
+ EOF
+ Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state.
+
+ Anything else
+ Append the input character to the comment token's data. Switch
+ to the comment state.
+
+ 8.2.4.19 Comment start dash state
+
+ Consume the next input character:
+
+ U+002D HYPHEN-MINUS (-)
+ Switch to the comment end state
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit the comment token. Switch to the data state.
+
+ EOF
+ Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state.
+
+ Anything else
+ Append a U+002D HYPHEN-MINUS (-) character and the input
+ character to the comment token's data. Switch to the comment
+ state.
+
+ 8.2.4.20 Comment state
+
+ Consume the next input character:
+
+ U+002D HYPHEN-MINUS (-)
+ Switch to the comment end dash state
+
+ EOF
+ Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state.
+
+ Anything else
+ Append the input character to the comment token's data. Stay in
+ the comment state.
+
+ 8.2.4.21 Comment end dash state
+
+ Consume the next input character:
+
+ U+002D HYPHEN-MINUS (-)
+ Switch to the comment end state
+
+ EOF
+ Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state.
+
+ Anything else
+ Append a U+002D HYPHEN-MINUS (-) character and the input
+ character to the comment token's data. Switch to the comment
+ state.
+
+ 8.2.4.22 Comment end state
+
+ Consume the next input character:
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the comment token. Switch to the data state.
+
+ U+002D HYPHEN-MINUS (-)
+ Parse error. Append a U+002D HYPHEN-MINUS (-) character to the
+ comment token's data. Stay in the comment end state.
+
+ EOF
+ Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state.
+
+ Anything else
+ Parse error. Append two U+002D HYPHEN-MINUS (-) characters and
+ the input character to the comment token's data. Switch to the
+ comment state.
+
+ 8.2.4.23 DOCTYPE state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before DOCTYPE name state.
+
+ Anything else
+ Parse error. Reconsume the current character in the before
+ DOCTYPE name state.
+
+ 8.2.4.24 Before DOCTYPE name state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before DOCTYPE name state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Create a new DOCTYPE token. Set its force-quirks
+ flag to on. Emit the token. Switch to the data state.
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Create a new DOCTYPE token. Set the token's name to the
+ lowercase version of the input character (add 0x0020 to the
+ character's code point). Switch to the DOCTYPE name state.
+
+ EOF
+ Parse error. Create a new DOCTYPE token. Set its force-quirks
+ flag to on. Emit the token. Reconsume the EOF character in the
+ data state.
+
+ Anything else
+ Create a new DOCTYPE token. Set the token's name to the current
+ input character. Switch to the DOCTYPE name state.
+
+ 8.2.4.25 DOCTYPE name state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the after DOCTYPE name state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state.
+
+ U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Append the lowercase version of the input character (add 0x0020
+ to the character's code point) to the current DOCTYPE token's
+ name. Stay in the DOCTYPE name state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Append the current input character to the current DOCTYPE
+ token's name. Stay in the DOCTYPE name state.
+
+ 8.2.4.26 After DOCTYPE name state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after DOCTYPE name state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ If the six characters starting from the current input character
+ are an ASCII case-insensitive match for the word "PUBLIC", then
+ consume those characters and switch to the before DOCTYPE public
+ identifier state.
+
+ Otherwise, if the six characters starting from the current input
+ character are an ASCII case-insensitive match for the word
+ "SYSTEM", then consume those characters and switch to the before
+ DOCTYPE system identifier state.
+
+ Otherwise, this is the parse error. Set the DOCTYPE token's
+ force-quirks flag to on. Switch to the bogus DOCTYPE state.
+
+ 8.2.4.27 Before DOCTYPE public identifier state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before DOCTYPE public identifier state.
+
+ U+0022 QUOTATION MARK (")
+ Set the DOCTYPE token's public identifier to the empty string
+ (not missing), then switch to the DOCTYPE public identifier
+ (double-quoted) state.
+
+ U+0027 APOSTROPHE (')
+ Set the DOCTYPE token's public identifier to the empty string
+ (not missing), then switch to the DOCTYPE public identifier
+ (single-quoted) state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Switch to the bogus DOCTYPE state.
+
+ 8.2.4.28 DOCTYPE public identifier (double-quoted) state
+
+ Consume the next input character:
+
+ U+0022 QUOTATION MARK (")
+ Switch to the after DOCTYPE public identifier state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Append the current input character to the current DOCTYPE
+ token's public identifier. Stay in the DOCTYPE public identifier
+ (double-quoted) state.
+
+ 8.2.4.29 DOCTYPE public identifier (single-quoted) state
+
+ Consume the next input character:
+
+ U+0027 APOSTROPHE (')
+ Switch to the after DOCTYPE public identifier state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Append the current input character to the current DOCTYPE
+ token's public identifier. Stay in the DOCTYPE public identifier
+ (single-quoted) state.
+
+ 8.2.4.30 After DOCTYPE public identifier state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after DOCTYPE public identifier state.
+
+ U+0022 QUOTATION MARK (")
+ Set the DOCTYPE token's system identifier to the empty string
+ (not missing), then switch to the DOCTYPE system identifier
+ (double-quoted) state.
+
+ U+0027 APOSTROPHE (')
+ Set the DOCTYPE token's system identifier to the empty string
+ (not missing), then switch to the DOCTYPE system identifier
+ (single-quoted) state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Switch to the bogus DOCTYPE state.
+
+ 8.2.4.31 Before DOCTYPE system identifier state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before DOCTYPE system identifier state.
+
+ U+0022 QUOTATION MARK (")
+ Set the DOCTYPE token's system identifier to the empty string
+ (not missing), then switch to the DOCTYPE system identifier
+ (double-quoted) state.
+
+ U+0027 APOSTROPHE (')
+ Set the DOCTYPE token's system identifier to the empty string
+ (not missing), then switch to the DOCTYPE system identifier
+ (single-quoted) state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Switch to the bogus DOCTYPE state.
+
+ 8.2.4.32 DOCTYPE system identifier (double-quoted) state
+
+ Consume the next input character:
+
+ U+0022 QUOTATION MARK (")
+ Switch to the after DOCTYPE system identifier state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Append the current input character to the current DOCTYPE
+ token's system identifier. Stay in the DOCTYPE system identifier
+ (double-quoted) state.
+
+ 8.2.4.33 DOCTYPE system identifier (single-quoted) state
+
+ Consume the next input character:
+
+ U+0027 APOSTROPHE (')
+ Switch to the after DOCTYPE system identifier state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Append the current input character to the current DOCTYPE
+ token's system identifier. Stay in the DOCTYPE system identifier
+ (single-quoted) state.
+
+ 8.2.4.34 After DOCTYPE system identifier state
+
+ Consume the next input character:
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after DOCTYPE system identifier state.
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state.
+
+ EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag to on.
+ Emit that DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Parse error. Switch to the bogus DOCTYPE state. (This does not
+ set the DOCTYPE token's force-quirks flag to on.)
+
+ 8.2.4.35 Bogus DOCTYPE state
+
+ Consume the next input character:
+
+ U+003E GREATER-THAN SIGN (>)
+ Emit the DOCTYPE token. Switch to the data state.
+
+ EOF
+ Emit the DOCTYPE token. Reconsume the EOF character in the data
+ state.
+
+ Anything else
+ Stay in the bogus DOCTYPE state.
+
+ 8.2.4.36 CDATA section state
+
+ (This can only happen if the content model flag is set to the PCDATA
+ state, and is unrelated to the content model flag's CDATA state.)
+
+ Consume every character up to the next occurrence of the three
+ character sequence U+005D RIGHT SQUARE BRACKET U+005D RIGHT SQUARE
+ BRACKET U+003E GREATER-THAN SIGN (]]>), or the end of the file (EOF),
+ whichever comes first. Emit a series of character tokens consisting of
+ all the characters consumed except the matching three character
+ sequence at the end (if one was found before the end of the file).
+
+ Switch to the data state.
+
+ If the end of the file was reached, reconsume the EOF character.
+
+ 8.2.4.37 Tokenizing character references
+
+ This section defines how to consume a character reference. This
+ definition is used when parsing character references in text and in
+ attributes.
+
+ The behavior depends on the identity of the next character (the one
+ immediately after the U+0026 AMPERSAND character):
+
+ U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ U+003C LESS-THAN SIGN
+ U+0026 AMPERSAND
+ EOF
+ The additional allowed character, if there is one
+ Not a character reference. No characters are consumed, and
+ nothing is returned. (This is not an error, either.)
+
+ U+0023 NUMBER SIGN (#)
+ Consume the U+0023 NUMBER SIGN.
+
+ The behavior further depends on the character after the U+0023
+ NUMBER SIGN:
+
+ U+0078 LATIN SMALL LETTER X
+ U+0058 LATIN CAPITAL LETTER X
+ Consume the X.
+
+ Follow the steps below, but using the range of characters
+ U+0030 DIGIT ZERO through to U+0039 DIGIT NINE, U+0061
+ LATIN SMALL LETTER A through to U+0066 LATIN SMALL LETTER
+ F, and U+0041 LATIN CAPITAL LETTER A, through to U+0046
+ LATIN CAPITAL LETTER F (in other words, 0-9, A-F, a-f).
+
+ When it comes to interpreting the number, interpret it as
+ a hexadecimal number.
+
+ Anything else
+ Follow the steps below, but using the range of characters
+ U+0030 DIGIT ZERO through to U+0039 DIGIT NINE (i.e. just
+ 0-9).
+
+ When it comes to interpreting the number, interpret it as
+ a decimal number.
+
+ Consume as many characters as match the range of characters
+ given above.
+
+ If no characters match the range, then don't consume any
+ characters (and unconsume the U+0023 NUMBER SIGN character and,
+ if appropriate, the X character). This is a parse error; nothing
+ is returned.
+
+ Otherwise, if the next character is a U+003B SEMICOLON, consume
+ that too. If it isn't, there is a parse error.
+
+ If one or more characters match the range, then take them all
+ and interpret the string of characters as a number (either
+ hexadecimal or decimal as appropriate).
+
+ If that number is one of the numbers in the first column of the
+ following table, then this is a parse error. Find the row with
+ that number in the first column, and return a character token
+ for the Unicode character given in the second column of that
+ row.
+
+ Number Unicode character
+ 0x0D U+000A LINE FEED (LF)
+ 0x80 U+20AC EURO SIGN ('€')
+ 0x81 U+FFFD REPLACEMENT CHARACTER
+ 0x82 U+201A SINGLE LOW-9 QUOTATION MARK ('‚')
+ 0x83 U+0192 LATIN SMALL LETTER F WITH HOOK ('ƒ')
+ 0x84 U+201E DOUBLE LOW-9 QUOTATION MARK ('„')
+ 0x85 U+2026 HORIZONTAL ELLIPSIS ('
')
+ 0x86 U+2020 DAGGER ('†')
+ 0x87 U+2021 DOUBLE DAGGER ('‡')
+ 0x88 U+02C6 MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ')
+ 0x89 U+2030 PER MILLE SIGN ('‰')
+ 0x8A U+0160 LATIN CAPITAL LETTER S WITH CARON ('Ć ')
+ 0x8B U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('â€č')
+ 0x8C U+0152 LATIN CAPITAL LIGATURE OE ('ƒ')
+ 0x8D U+FFFD REPLACEMENT CHARACTER
+ 0x8E U+017D LATIN CAPITAL LETTER Z WITH CARON ('Ćœ')
+ 0x8F U+FFFD REPLACEMENT CHARACTER
+ 0x90 U+FFFD REPLACEMENT CHARACTER
+ 0x91 U+2018 LEFT SINGLE QUOTATION MARK ('‘')
+ 0x92 U+2019 RIGHT SINGLE QUOTATION MARK ('’')
+ 0x93 U+201C LEFT DOUBLE QUOTATION MARK ('“')
+ 0x94 U+201D RIGHT DOUBLE QUOTATION MARK ('”')
+ 0x95 U+2022 BULLET ('‱')
+ 0x96 U+2013 EN DASH ('–')
+ 0x97 U+2014 EM DASH ('—')
+ 0x98 U+02DC SMALL TILDE ('˜')
+ 0x99 U+2122 TRADE MARK SIGN ('ℱ')
+ 0x9A U+0161 LATIN SMALL LETTER S WITH CARON ('ĆĄ')
+ 0x9B U+203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('â€ș')
+ 0x9C U+0153 LATIN SMALL LIGATURE OE ('Ɠ')
+ 0x9D U+FFFD REPLACEMENT CHARACTER
+ 0x9E U+017E LATIN SMALL LETTER Z WITH CARON ('ĆŸ')
+ 0x9F U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ćž')
+
+ Otherwise, if the number is in the range 0x0000 to 0x0008,
+ 0x000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to
+ 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF,
+ 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
+ 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF,
+ 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE,
+ 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+ 0x10FFFE, or 0x10FFFF, or is higher than 0x10FFFF, then this is
+ a parse error; return a character token for the U+FFFD
+ REPLACEMENT CHARACTER character instead.
+
+ Otherwise, return a character token for the Unicode character
+ whose code point is that number.
+
+ Anything else
+ Consume the maximum number of characters possible, with the
+ consumed characters matching one of the identifiers in the first
+ column of the named character references table (in a
+ case-sensitive manner).
+
+ If no match can be made, then this is a parse error. No
+ characters are consumed, and nothing is returned.
+
+ If the last character matched is not a U+003B SEMICOLON (;),
+ there is a parse error.
+
+ If the character reference is being consumed as part of an
+ attribute, and the last character matched is not a U+003B
+ SEMICOLON (;), and the next character is in the range U+0030
+ DIGIT ZERO to U+0039 DIGIT NINE, U+0041 LATIN CAPITAL LETTER A
+ to U+005A LATIN CAPITAL LETTER Z, or U+0061 LATIN SMALL LETTER A
+ to U+007A LATIN SMALL LETTER Z, then, for historical reasons,
+ all the characters that were matched after the U+0026 AMPERSAND
+ (&) must be unconsumed, and nothing is returned.
+
+ Otherwise, return a character token for the character
+ corresponding to the character reference name (as given by the
+ second column of the named character references table).
+
+ If the markup contains I'm &notit; I tell you, the character
+ reference is parsed as "not", as in, I'm ÂŹit; I tell you. But if
+ the markup was I'm &notin; I tell you, the character reference
+ would be parsed as "notin;", resulting in I'm ∉ I tell you.
diff --git a/parser/html/java/htmlparser/doc/tree-construction.txt b/parser/html/java/htmlparser/doc/tree-construction.txt
new file mode 100644
index 000000000..0febf147a
--- /dev/null
+++ b/parser/html/java/htmlparser/doc/tree-construction.txt
@@ -0,0 +1,2201 @@
+ #8.2.4 Tokenization Table of contents 8.4 Serializing HTML fragments
+
+ WHATWG
+
+HTML 5
+
+Draft Recommendation — 13 January 2009
+
+ ← 8.2.4 Tokenization – Table of contents – 8.4 Serializing HTML
+ fragments →
+
+ 8.2.5 Tree construction
+
+ The input to the tree construction stage is a sequence of tokens from
+ the tokenization stage. The tree construction stage is associated with
+ a DOM Document object when a parser is created. The "output" of this
+ stage consists of dynamically modifying or extending that document's
+ DOM tree.
+
+ This specification does not define when an interactive user agent has
+ to render the Document so that it is available to the user, or when it
+ has to begin accepting user input.
+
+ As each token is emitted from the tokeniser, the user agent must
+ process the token according to the rules given in the section
+ corresponding to the current insertion mode.
+
+ When the steps below require the UA to insert a character into a node,
+ if that node has a child immediately before where the character is to
+ be inserted, and that child is a Text node, and that Text node was the
+ last node that the parser inserted into the document, then the
+ character must be appended to that Text node; otherwise, a new Text
+ node whose data is just that character must be inserted in the
+ appropriate place.
+
+ DOM mutation events must not fire for changes caused by the UA parsing
+ the document. (Conceptually, the parser is not mutating the DOM, it is
+ constructing it.) This includes the parsing of any content inserted
+ using document.write() and document.writeln() calls. [DOM3EVENTS]
+
+ Not all of the tag names mentioned below are conformant tag names in
+ this specification; many are included to handle legacy content. They
+ still form part of the algorithm that implementations are required to
+ implement to claim conformance.
+
+ The algorithm described below places no limit on the depth of the DOM
+ tree generated, or on the length of tag names, attribute names,
+ attribute values, text nodes, etc. While implementors are encouraged to
+ avoid arbitrary limits, it is recognized that practical concerns will
+ likely force user agents to impose nesting depths.
+
+ 8.2.5.1 Creating and inserting elements
+
+ When the steps below require the UA to create an element for a token in
+ a particular namespace, the UA must create a node implementing the
+ interface appropriate for the element type corresponding to the tag
+ name of the token in the given namespace (as given in the specification
+ that defines that element, e.g. for an a element in the HTML namespace,
+ this specification defines it to be the HTMLAnchorElement interface),
+ with the tag name being the name of that element, with the node being
+ in the given namespace, and with the attributes on the node being those
+ given in the given token.
+
+ The interface appropriate for an element in the HTML namespace that is
+ not defined in this specification is HTMLElement. The interface
+ appropriate for an element in another namespace that is not defined by
+ that namespace's specification is Element.
+
+ When a resettable element is created in this manner, its reset
+ algorithm must be invoked once the attributes are set. (This
+ initializes the element's value and checkedness based on the element's
+ attributes.)
+ __________________________________________________________________
+
+ When the steps below require the UA to insert an HTML element for a
+ token, the UA must first create an element for the token in the HTML
+ namespace, and then append this node to the current node, and push it
+ onto the stack of open elements so that it is the new current node.
+
+ The steps below may also require that the UA insert an HTML element in
+ a particular place, in which case the UA must follow the same steps
+ except that it must insert or append the new node in the location
+ specified instead of appending it to the current node. (This happens in
+ particular during the parsing of tables with invalid content.)
+
+ If an element created by the insert an HTML element algorithm is a
+ form-associated element, and the form element pointer is not null, and
+ the newly created element doesn't have a form attribute, the user agent
+ must associate the newly created element with the form element pointed
+ to by the form element pointer before inserting it wherever it is to be
+ inserted.
+ __________________________________________________________________
+
+ When the steps below require the UA to insert a foreign element for a
+ token, the UA must first create an element for the token in the given
+ namespace, and then append this node to the current node, and push it
+ onto the stack of open elements so that it is the new current node. If
+ the newly created element has an xmlns attribute in the XMLNS namespace
+ whose value is not exactly the same as the element's namespace, that is
+ a parse error.
+
+ When the steps below require the user agent to adjust MathML attributes
+ for a token, then, if the token has an attribute named definitionurl,
+ change its name to definitionURL (note the case difference).
+
+ When the steps below require the user agent to adjust foreign
+ attributes for a token, then, if any of the attributes on the token
+ match the strings given in the first column of the following table, let
+ the attribute be a namespaced attribute, with the prefix being the
+ string given in the corresponding cell in the second column, the local
+ name being the string given in the corresponding cell in the third
+ column, and the namespace being the namespace given in the
+ corresponding cell in the fourth column. (This fixes the use of
+ namespaced attributes, in particular xml:lang.)
+
+ Attribute name Prefix Local name Namespace
+ xlink:actuate xlink actuate XLink namespace
+ xlink:arcrole xlink arcrole XLink namespace
+ xlink:href xlink href XLink namespace
+ xlink:role xlink role XLink namespace
+ xlink:show xlink show XLink namespace
+ xlink:title xlink title XLink namespace
+ xlink:type xlink type XLink namespace
+ xml:base xml base XML namespace
+ xml:lang xml lang XML namespace
+ xml:space xml space XML namespace
+ xmlns (none) xmlns XMLNS namespace
+ xmlns:xlink xmlns xlink XMLNS namespace
+ __________________________________________________________________
+
+ The generic CDATA element parsing algorithm and the generic RCDATA
+ element parsing algorithm consist of the following steps. These
+ algorithms are always invoked in response to a start tag token.
+ 1. Insert an HTML element for the token.
+ 2. If the algorithm that was invoked is the generic CDATA element
+ parsing algorithm, switch the tokeniser's content model flag to the
+ CDATA state; otherwise the algorithm invoked was the generic RCDATA
+ element parsing algorithm, switch the tokeniser's content model
+ flag to the RCDATA state.
+ 3. Let the original insertion mode be the current insertion mode.
+ 4. Then, switch the insertion mode to "in CDATA/RCDATA".
+
+ 8.2.5.2 Closing elements that have implied end tags
+
+ When the steps below require the UA to generate implied end tags, then,
+ while the current node is a dd element, a dt element, an li element, an
+ option element, an optgroup element, a p element, an rp element, or an
+ rt element, the UA must pop the current node off the stack of open
+ elements.
+
+ If a step requires the UA to generate implied end tags but lists an
+ element to exclude from the process, then the UA must perform the above
+ steps as if that element was not in the above list.
+
+ 8.2.5.3 Foster parenting
+
+ Foster parenting happens when content is misnested in tables.
+
+ When a node node is to be foster parented, the node node must be
+ inserted into the foster parent element, and the current table must be
+ marked as tainted. (Once the current table has been tainted, whitespace
+ characters are inserted into the foster parent element instead of the
+ current node.)
+
+ The foster parent element is the parent element of the last table
+ element in the stack of open elements, if there is a table element and
+ it has such a parent element. If there is no table element in the stack
+ of open elements (fragment case), then the foster parent element is the
+ first element in the stack of open elements (the html element).
+ Otherwise, if there is a table element in the stack of open elements,
+ but the last table element in the stack of open elements has no parent,
+ or its parent node is not an element, then the foster parent element is
+ the element before the last table element in the stack of open
+ elements.
+
+ If the foster parent element is the parent element of the last table
+ element in the stack of open elements, then node must be inserted
+ immediately before the last table element in the stack of open elements
+ in the foster parent element; otherwise, node must be appended to the
+ foster parent element.
+
+ 8.2.5.4 The "initial" insertion mode
+
+ When the insertion mode is "initial", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Ignore the token.
+
+ A comment token
+ Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ If the DOCTYPE token's name is not a case-sensitive match for
+ the string "html", or if the token's public identifier is
+ neither missing nor a case-sensitive match for the string
+ "XSLT-compat", or if the token's system identifier is not
+ missing, then there is a parse error (this is the DOCTYPE parse
+ error). Conformance checkers may, instead of reporting this
+ error, switch to a conformance checking mode for another
+ language (e.g. based on the DOCTYPE token a conformance checker
+ could recognize that the document is an HTML4-era document, and
+ defer to an HTML4 conformance checker.)
+
+ Append a DocumentType node to the Document node, with the name
+ attribute set to the name given in the DOCTYPE token; the
+ publicId attribute set to the public identifier given in the
+ DOCTYPE token, or the empty string if the public identifier was
+ missing; the systemId attribute set to the system identifier
+ given in the DOCTYPE token, or the empty string if the system
+ identifier was missing; and the other attributes specific to
+ DocumentType objects set to null and empty lists as appropriate.
+ Associate the DocumentType node with the Document object so that
+ it is returned as the value of the doctype attribute of the
+ Document object.
+
+ Then, if the DOCTYPE token matches one of the conditions in the
+ following list, then set the document to quirks mode:
+
+ + The force-quirks flag is set to on.
+ + The name is set to anything other than "HTML".
+ + The public identifier starts with: "+//Silmaril//dtd html Pro
+ v0r11 19970101//"
+ + The public identifier starts with: "-//AdvaSoft Ltd//DTD HTML
+ 3.0 asWedit + extensions//"
+ + The public identifier starts with: "-//AS//DTD HTML 3.0
+ asWedit + extensions//"
+ + The public identifier starts with: "-//IETF//DTD HTML 2.0
+ Level 1//"
+ + The public identifier starts with: "-//IETF//DTD HTML 2.0
+ Level 2//"
+ + The public identifier starts with: "-//IETF//DTD HTML 2.0
+ Strict Level 1//"
+ + The public identifier starts with: "-//IETF//DTD HTML 2.0
+ Strict Level 2//"
+ + The public identifier starts with: "-//IETF//DTD HTML 2.0
+ Strict//"
+ + The public identifier starts with: "-//IETF//DTD HTML 2.0//"
+ + The public identifier starts with: "-//IETF//DTD HTML 2.1E//"
+ + The public identifier starts with: "-//IETF//DTD HTML 3.0//"
+ + The public identifier starts with: "-//IETF//DTD HTML 3.2
+ Final//"
+ + The public identifier starts with: "-//IETF//DTD HTML 3.2//"
+ + The public identifier starts with: "-//IETF//DTD HTML 3//"
+ + The public identifier starts with: "-//IETF//DTD HTML Level
+ 0//"
+ + The public identifier starts with: "-//IETF//DTD HTML Level
+ 1//"
+ + The public identifier starts with: "-//IETF//DTD HTML Level
+ 2//"
+ + The public identifier starts with: "-//IETF//DTD HTML Level
+ 3//"
+ + The public identifier starts with: "-//IETF//DTD HTML Strict
+ Level 0//"
+ + The public identifier starts with: "-//IETF//DTD HTML Strict
+ Level 1//"
+ + The public identifier starts with: "-//IETF//DTD HTML Strict
+ Level 2//"
+ + The public identifier starts with: "-//IETF//DTD HTML Strict
+ Level 3//"
+ + The public identifier starts with: "-//IETF//DTD HTML
+ Strict//"
+ + The public identifier starts with: "-//IETF//DTD HTML//"
+ + The public identifier starts with: "-//Metrius//DTD Metrius
+ Presentational//"
+ + The public identifier starts with: "-//Microsoft//DTD Internet
+ Explorer 2.0 HTML Strict//"
+ + The public identifier starts with: "-//Microsoft//DTD Internet
+ Explorer 2.0 HTML//"
+ + The public identifier starts with: "-//Microsoft//DTD Internet
+ Explorer 2.0 Tables//"
+ + The public identifier starts with: "-//Microsoft//DTD Internet
+ Explorer 3.0 HTML Strict//"
+ + The public identifier starts with: "-//Microsoft//DTD Internet
+ Explorer 3.0 HTML//"
+ + The public identifier starts with: "-//Microsoft//DTD Internet
+ Explorer 3.0 Tables//"
+ + The public identifier starts with: "-//Netscape Comm.
+ Corp.//DTD HTML//"
+ + The public identifier starts with: "-//Netscape Comm.
+ Corp.//DTD Strict HTML//"
+ + The public identifier starts with: "-//O'Reilly and
+ Associates//DTD HTML 2.0//"
+ + The public identifier starts with: "-//O'Reilly and
+ Associates//DTD HTML Extended 1.0//"
+ + The public identifier starts with: "-//O'Reilly and
+ Associates//DTD HTML Extended Relaxed 1.0//"
+ + The public identifier starts with: "-//SoftQuad Software//DTD
+ HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//"
+ + The public identifier starts with: "-//SoftQuad//DTD HoTMetaL
+ PRO 4.0::19971010::extensions to HTML 4.0//"
+ + The public identifier starts with: "-//Spyglass//DTD HTML 2.0
+ Extended//"
+ + The public identifier starts with: "-//SQ//DTD HTML 2.0
+ HoTMetaL + extensions//"
+ + The public identifier starts with: "-//Sun Microsystems
+ Corp.//DTD HotJava HTML//"
+ + The public identifier starts with: "-//Sun Microsystems
+ Corp.//DTD HotJava Strict HTML//"
+ + The public identifier starts with: "-//W3C//DTD HTML 3
+ 1995-03-24//"
+ + The public identifier starts with: "-//W3C//DTD HTML 3.2
+ Draft//"
+ + The public identifier starts with: "-//W3C//DTD HTML 3.2
+ Final//"
+ + The public identifier starts with: "-//W3C//DTD HTML 3.2//"
+ + The public identifier starts with: "-//W3C//DTD HTML 3.2S
+ Draft//"
+ + The public identifier starts with: "-//W3C//DTD HTML 4.0
+ Frameset//"
+ + The public identifier starts with: "-//W3C//DTD HTML 4.0
+ Transitional//"
+ + The public identifier starts with: "-//W3C//DTD HTML
+ Experimental 19960712//"
+ + The public identifier starts with: "-//W3C//DTD HTML
+ Experimental 970421//"
+ + The public identifier starts with: "-//W3C//DTD W3 HTML//"
+ + The public identifier starts with: "-//W3O//DTD W3 HTML 3.0//"
+ + The public identifier is set to: "-//W3O//DTD W3 HTML Strict
+ 3.0//EN//"
+ + The public identifier starts with: "-//WebTechs//DTD Mozilla
+ HTML 2.0//"
+ + The public identifier starts with: "-//WebTechs//DTD Mozilla
+ HTML//"
+ + The public identifier is set to: "-/W3C/DTD HTML 4.0
+ Transitional/EN"
+ + The public identifier is set to: "HTML"
+ + The system identifier is set to:
+ "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
+ + The system identifier is missing and the public identifier
+ starts with: "-//W3C//DTD HTML 4.01 Frameset//"
+ + The system identifier is missing and the public identifier
+ starts with: "-//W3C//DTD HTML 4.01 Transitional//"
+
+ Otherwise, if the DOCTYPE token matches one of the conditions in
+ the following list, then set the document to limited quirks
+ mode:
+
+ + The public identifier starts with: "-//W3C//DTD XHTML 1.0
+ Frameset//"
+ + The public identifier starts with: "-//W3C//DTD XHTML 1.0
+ Transitional//"
+ + The system identifier is not missing and the public identifier
+ starts with: "-//W3C//DTD HTML 4.01 Frameset//"
+ + The system identifier is not missing and the public identifier
+ starts with: "-//W3C//DTD HTML 4.01 Transitional//"
+
+ The name, system identifier, and public identifier strings must
+ be compared to the values given in the lists above in an ASCII
+ case-insensitive manner. A system identifier whose value is the
+ empty string is not considered missing for the purposes of the
+ conditions above.
+
+ Then, switch the insertion mode to "before html".
+
+ Anything else
+ Parse error.
+
+ Set the document to quirks mode.
+
+ Switch the insertion mode to "before html", then reprocess the
+ current token.
+
+ 8.2.5.5 The "before html" insertion mode
+
+ When the insertion mode is "before html", tokens must be handled as
+ follows:
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A comment token
+ Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token.
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Ignore the token.
+
+ A start tag whose tag name is "html"
+ Create an element for the token in the HTML namespace. Append it
+ to the Document object. Put this element in the stack of open
+ elements.
+
+ If the token has an attribute "manifest", then resolve the value
+ of that attribute to an absolute URL, and if that is successful,
+ run the application cache selection algorithm with the resulting
+ absolute URL. Otherwise, if there is no such attribute or
+ resolving it fails, run the application cache selection
+ algorithm with no manifest. The algorithm must be passed the
+ Document object.
+
+ Switch the insertion mode to "before head".
+
+ Anything else
+ Create an HTMLElement node with the tag name html, in the HTML
+ namespace. Append it to the Document object. Put this element in
+ the stack of open elements.
+
+ Run the application cache selection algorithm with no manifest,
+ passing it the Document object.
+
+ Switch the insertion mode to "before head", then reprocess the
+ current token.
+
+ Should probably make end tags be ignored, so that "</head><!--
+ --><html>" puts the comment before the root node (or should we?)
+
+ The root element can end up being removed from the Document object,
+ e.g. by scripts; nothing in particular happens in such cases, content
+ continues being appended to the nodes as described in the next section.
+
+ 8.2.5.6 The "before head" insertion mode
+
+ When the insertion mode is "before head", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Ignore the token.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ A start tag whose tag name is "head"
+ Insert an HTML element for the token.
+
+ Set the head element pointer to the newly created head element.
+
+ Switch the insertion mode to "in head".
+
+ An end tag whose tag name is one of: "head", "br"
+ Act as if a start tag token with the tag name "head" and no
+ attributes had been seen, then reprocess the current token.
+
+ Any other end tag
+ Parse error. Ignore the token.
+
+ Anything else
+ Act as if a start tag token with the tag name "head" and no
+ attributes had been seen, then reprocess the current token.
+
+ This will result in an empty head element being generated, with
+ the current token being reprocessed in the "after head"
+ insertion mode.
+
+ 8.2.5.7 The "in head" insertion mode
+
+ When the insertion mode is "in head", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Insert the character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ A start tag whose tag name is one of: "base", "command", "eventsource",
+ "link"
+ Insert an HTML element for the token. Immediately pop the
+ current node off the stack of open elements.
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ A start tag whose tag name is "meta"
+ Insert an HTML element for the token. Immediately pop the
+ current node off the stack of open elements.
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ If the element has a charset attribute, and its value is a
+ supported encoding, and the confidence is currently tentative,
+ then change the encoding to the encoding given by the value of
+ the charset attribute.
+
+ Otherwise, if the element has a content attribute, and applying
+ the algorithm for extracting an encoding from a Content-Type to
+ its value returns a supported encoding encoding, and the
+ confidence is currently tentative, then change the encoding to
+ the encoding encoding.
+
+ A start tag whose tag name is "title"
+ Follow the generic RCDATA element parsing algorithm.
+
+ A start tag whose tag name is "noscript", if the scripting flag is
+ enabled
+
+ A start tag whose tag name is one of: "noframes", "style"
+ Follow the generic CDATA element parsing algorithm.
+
+ A start tag whose tag name is "noscript", if the scripting flag is
+ disabled
+ Insert an HTML element for the token.
+
+ Switch the insertion mode to "in head noscript".
+
+ A start tag whose tag name is "script"
+
+ 1. Create an element for the token in the HTML namespace.
+ 2. Mark the element as being "parser-inserted".
+ This ensures that, if the script is external, any
+ document.write() calls in the script will execute in-line,
+ instead of blowing the document away, as would happen in most
+ other cases. It also prevents the script from executing until
+ the end tag is seen.
+ 3. If the parser was originally created for the HTML fragment
+ parsing algorithm, then mark the script element as "already
+ executed". (fragment case)
+ 4. Append the new element to the current node.
+ 5. Switch the tokeniser's content model flag to the CDATA state.
+ 6. Let the original insertion mode be the current insertion mode.
+ 7. Switch the insertion mode to "in CDATA/RCDATA".
+
+ An end tag whose tag name is "head"
+ Pop the current node (which will be the head element) off the
+ stack of open elements.
+
+ Switch the insertion mode to "after head".
+
+ An end tag whose tag name is "br"
+ Act as described in the "anything else" entry below.
+
+ A start tag whose tag name is "head"
+ Any other end tag
+ Parse error. Ignore the token.
+
+ Anything else
+ Act as if an end tag token with the tag name "head" had been
+ seen, and reprocess the current token.
+
+ In certain UAs, some elements don't trigger the "in body" mode
+ straight away, but instead get put into the head. Do we want to
+ copy that?
+
+ 8.2.5.8 The "in head noscript" insertion mode
+
+ When the insertion mode is "in head noscript", tokens must be handled
+ as follows:
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ An end tag whose tag name is "noscript"
+ Pop the current node (which will be a noscript element) from the
+ stack of open elements; the new current node will be a head
+ element.
+
+ Switch the insertion mode to "in head".
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+
+ A comment token
+ A start tag whose tag name is one of: "link", "meta", "noframes",
+ "style"
+ Process the token using the rules for the "in head" insertion
+ mode.
+
+ An end tag whose tag name is "br"
+ Act as described in the "anything else" entry below.
+
+ A start tag whose tag name is one of: "head", "noscript"
+ Any other end tag
+ Parse error. Ignore the token.
+
+ Anything else
+ Parse error. Act as if an end tag with the tag name "noscript"
+ had been seen and reprocess the current token.
+
+ 8.2.5.9 The "after head" insertion mode
+
+ When the insertion mode is "after head", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Insert the character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ A start tag whose tag name is "body"
+ Insert an HTML element for the token.
+
+ Switch the insertion mode to "in body".
+
+ A start tag whose tag name is "frameset"
+ Insert an HTML element for the token.
+
+ Switch the insertion mode to "in frameset".
+
+ A start tag token whose tag name is one of: "base", "link", "meta",
+ "noframes", "script", "style", "title"
+ Parse error.
+
+ Push the node pointed to by the head element pointer onto the
+ stack of open elements.
+
+ Process the token using the rules for the "in head" insertion
+ mode.
+
+ Remove the node pointed to by the head element pointer from the
+ stack of open elements.
+
+ An end tag whose tag name is "br"
+ Act as described in the "anything else" entry below.
+
+ A start tag whose tag name is "head"
+ Any other end tag
+ Parse error. Ignore the token.
+
+ Anything else
+ Act as if a start tag token with the tag name "body" and no
+ attributes had been seen, and then reprocess the current token.
+
+ 8.2.5.10 The "in body" insertion mode
+
+ When the insertion mode is "in body", tokens must be handled as
+ follows:
+
+ A character token
+ Reconstruct the active formatting elements, if any.
+
+ Insert the token's character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Parse error. For each attribute on the token, check to see if
+ the attribute is already present on the top element of the stack
+ of open elements. If it is not, add the attribute and its
+ corresponding value to that element.
+
+ A start tag token whose tag name is one of: "base", "command",
+ "eventsource", "link", "meta", "noframes", "script", "style",
+ "title"
+ Process the token using the rules for the "in head" insertion
+ mode.
+
+ A start tag whose tag name is "body"
+ Parse error.
+
+ If the second element on the stack of open elements is not a
+ body element, or, if the stack of open elements has only one
+ node on it, then ignore the token. (fragment case)
+
+ Otherwise, for each attribute on the token, check to see if the
+ attribute is already present on the body element (the second
+ element) on the stack of open elements. If it is not, add the
+ attribute and its corresponding value to that element.
+
+ An end-of-file token
+ If there is a node in the stack of open elements that is not
+ either a dd element, a dt element, an li element, a p element, a
+ tbody element, a td element, a tfoot element, a th element, a
+ thead element, a tr element, the body element, or the html
+ element, then this is a parse error.
+
+ Stop parsing.
+
+ An end tag whose tag name is "body"
+ If the stack of open elements does not have a body element in
+ scope, this is a parse error; ignore the token.
+
+ Otherwise, if there is a node in the stack of open elements that
+ is not either a dd element, a dt element, an li element, a p
+ element, a tbody element, a td element, a tfoot element, a th
+ element, a thead element, a tr element, the body element, or the
+ html element, then this is a parse error.
+
+ Switch the insertion mode to "after body".
+
+ An end tag whose tag name is "html"
+ Act as if an end tag with tag name "body" had been seen, then,
+ if that token wasn't ignored, reprocess the current token.
+
+ The fake end tag token here can only be ignored in the fragment
+ case.
+
+ A start tag whose tag name is one of: "address", "article", "aside",
+ "blockquote", "center", "datagrid", "details", "dialog", "dir",
+ "div", "dl", "fieldset", "figure", "footer", "header", "menu",
+ "nav", "ol", "p", "section", "ul"
+ If the stack of open elements has a p element in scope, then act
+ as if an end tag with the tag name "p" had been seen.
+
+ Insert an HTML element for the token.
+
+ A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5",
+ "h6"
+ If the stack of open elements has a p element in scope, then act
+ as if an end tag with the tag name "p" had been seen.
+
+ If the current node is an element whose tag name is one of "h1",
+ "h2", "h3", "h4", "h5", or "h6", then this is a parse error; pop
+ the current node off the stack of open elements.
+
+ Insert an HTML element for the token.
+
+ A start tag whose tag name is one of: "pre", "listing"
+ If the stack of open elements has a p element in scope, then act
+ as if an end tag with the tag name "p" had been seen.
+
+ Insert an HTML element for the token.
+
+ If the next token is a U+000A LINE FEED (LF) character token,
+ then ignore that token and move on to the next one. (Newlines at
+ the start of pre blocks are ignored as an authoring
+ convenience.)
+
+ A start tag whose tag name is "form"
+ If the form element pointer is not null, then this is a parse
+ error; ignore the token.
+
+ Otherwise:
+
+ If the stack of open elements has a p element in scope, then act
+ as if an end tag with the tag name "p" had been seen.
+
+ Insert an HTML element for the token, and set the form element
+ pointer to point to the element created.
+
+ A start tag whose tag name is "li"
+ Run the following algorithm:
+
+ 1. Initialize node to be the current node (the bottommost node of
+ the stack).
+ 2. If node is an li element, then act as if an end tag with the
+ tag name "li" had been seen, then jump to the last step.
+ 3. If node is not in the formatting category, and is not in the
+ phrasing category, and is not an address, div, or p element,
+ then jump to the last step.
+ 4. Otherwise, set node to the previous entry in the stack of open
+ elements and return to step 2.
+ 5. This is the last step.
+ If the stack of open elements has a p element in scope, then
+ act as if an end tag with the tag name "p" had been seen.
+ Finally, insert an HTML element for the token.
+
+ A start tag whose tag name is one of: "dd", "dt"
+ Run the following algorithm:
+
+ 1. Initialize node to be the current node (the bottommost node of
+ the stack).
+ 2. If node is a dd or dt element, then act as if an end tag with
+ the same tag name as node had been seen, then jump to the last
+ step.
+ 3. If node is not in the formatting category, and is not in the
+ phrasing category, and is not an address, div, or p element,
+ then jump to the last step.
+ 4. Otherwise, set node to the previous entry in the stack of open
+ elements and return to step 2.
+ 5. This is the last step.
+ If the stack of open elements has a p element in scope, then
+ act as if an end tag with the tag name "p" had been seen.
+ Finally, insert an HTML element for the token.
+
+ A start tag whose tag name is "plaintext"
+ If the stack of open elements has a p element in scope, then act
+ as if an end tag with the tag name "p" had been seen.
+
+ Insert an HTML element for the token.
+
+ Switch the content model flag to the PLAINTEXT state.
+
+ Once a start tag with the tag name "plaintext" has been seen,
+ that will be the last token ever seen other than character
+ tokens (and the end-of-file token), because there is no way to
+ switch the content model flag out of the PLAINTEXT state.
+
+ An end tag whose tag name is one of: "address", "article", "aside",
+ "blockquote", "center", "datagrid", "details", "dialog", "dir",
+ "div", "dl", "fieldset", "figure", "footer", "header",
+ "listing", "menu", "nav", "ol", "pre", "section", "ul"
+ If the stack of open elements does not have an element in scope
+ with the same tag name as that of the token, then this is a
+ parse error; ignore the token.
+
+ Otherwise, run these steps:
+
+ 1. Generate implied end tags.
+ 2. If the current node is not an element with the same tag name
+ as that of the token, then this is a parse error.
+ 3. Pop elements from the stack of open elements until an element
+ with the same tag name as the token has been popped from the
+ stack.
+
+ An end tag whose tag name is "form"
+ Let node be the element that the form element pointer is set to.
+
+ Set the form element pointer to null.
+
+ If node is null or the stack of open elements does not have node
+ in scope, then this is a parse error; ignore the token.
+
+ Otherwise, run these steps:
+
+ 1. Generate implied end tags.
+ 2. If the current node is not node, then this is a parse error.
+ 3. Remove node from the stack of open elements.
+
+ An end tag whose tag name is "p"
+ If the stack of open elements does not have an element in scope
+ with the same tag name as that of the token, then this is a
+ parse error; act as if a start tag with the tag name p had been
+ seen, then reprocess the current token.
+
+ Otherwise, run these steps:
+
+ 1. Generate implied end tags, except for elements with the same
+ tag name as the token.
+ 2. If the current node is not an element with the same tag name
+ as that of the token, then this is a parse error.
+ 3. Pop elements from the stack of open elements until an element
+ with the same tag name as the token has been popped from the
+ stack.
+
+ An end tag whose tag name is one of: "dd", "dt", "li"
+ If the stack of open elements does not have an element in scope
+ with the same tag name as that of the token, then this is a
+ parse error; ignore the token.
+
+ Otherwise, run these steps:
+
+ 1. Generate implied end tags, except for elements with the same
+ tag name as the token.
+ 2. If the current node is not an element with the same tag name
+ as that of the token, then this is a parse error.
+ 3. Pop elements from the stack of open elements until an element
+ with the same tag name as the token has been popped from the
+ stack.
+
+ An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
+ If the stack of open elements does not have an element in scope
+ whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6",
+ then this is a parse error; ignore the token.
+
+ Otherwise, run these steps:
+
+ 1. Generate implied end tags.
+ 2. If the current node is not an element with the same tag name
+ as that of the token, then this is a parse error.
+ 3. Pop elements from the stack of open elements until an element
+ whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6"
+ has been popped from the stack.
+
+ An end tag whose tag name is "sarcasm"
+ Take a deep breath, then act as described in the "any other end
+ tag" entry below.
+
+ A start tag whose tag name is "a"
+ If the list of active formatting elements contains an element
+ whose tag name is "a" between the end of the list and the last
+ marker on the list (or the start of the list if there is no
+ marker on the list), then this is a parse error; act as if an
+ end tag with the tag name "a" had been seen, then remove that
+ element from the list of active formatting elements and the
+ stack of open elements if the end tag didn't already remove it
+ (it might not have if the element is not in table scope).
+
+ In the non-conforming stream
+ <a href="a">a<table><a href="b">b</table>x, the first a element
+ would be closed upon seeing the second one, and the "x"
+ character would be inside a link to "b", not to "a". This is
+ despite the fact that the outer a element is not in table scope
+ (meaning that a regular </a> end tag at the start of the table
+ wouldn't close the outer a element).
+
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token. Add that element to the
+ list of active formatting elements.
+
+ A start tag whose tag name is one of: "b", "big", "em", "font", "i",
+ "s", "small", "strike", "strong", "tt", "u"
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token. Add that element to the
+ list of active formatting elements.
+
+ A start tag whose tag name is "nobr"
+ Reconstruct the active formatting elements, if any.
+
+ If the stack of open elements has a nobr element in scope, then
+ this is a parse error; act as if an end tag with the tag name
+ "nobr" had been seen, then once again reconstruct the active
+ formatting elements, if any.
+
+ Insert an HTML element for the token. Add that element to the
+ list of active formatting elements.
+
+ An end tag whose tag name is one of: "a", "b", "big", "em", "font",
+ "i", "nobr", "s", "small", "strike", "strong", "tt", "u"
+ Follow these steps:
+
+ 1. Let the formatting element be the last element in the list of
+ active formatting elements that:
+ o is between the end of the list and the last scope marker
+ in the list, if any, or the start of the list otherwise,
+ and
+ o has the same tag name as the token.
+ If there is no such node, or, if that node is also in the
+ stack of open elements but the element is not in scope, then
+ this is a parse error; ignore the token, and abort these
+ steps.
+ Otherwise, if there is such a node, but that node is not in
+ the stack of open elements, then this is a parse error; remove
+ the element from the list, and abort these steps.
+ Otherwise, there is a formatting element and that element is
+ in the stack and is in scope. If the element is not the
+ current node, this is a parse error. In any case, proceed with
+ the algorithm as written in the following steps.
+ 2. Let the furthest block be the topmost node in the stack of
+ open elements that is lower in the stack than the formatting
+ element, and is not an element in the phrasing or formatting
+ categories. There might not be one.
+ 3. If there is no furthest block, then the UA must skip the
+ subsequent steps and instead just pop all the nodes from the
+ bottom of the stack of open elements, from the current node up
+ to and including the formatting element, and remove the
+ formatting element from the list of active formatting
+ elements.
+ 4. Let the common ancestor be the element immediately above the
+ formatting element in the stack of open elements.
+ 5. If the furthest block has a parent node, then remove the
+ furthest block from its parent node.
+ 6. Let a bookmark note the position of the formatting element in
+ the list of active formatting elements relative to the
+ elements on either side of it in the list.
+ 7. Let node and last node be the furthest block. Follow these
+ steps:
+ 1. Let node be the element immediately above node in the
+ stack of open elements.
+ 2. If node is not in the list of active formatting elements,
+ then remove node from the stack of open elements and then
+ go back to step 1.
+ 3. Otherwise, if node is the formatting element, then go to
+ the next step in the overall algorithm.
+ 4. Otherwise, if last node is the furthest block, then move
+ the aforementioned bookmark to be immediately after the
+ node in the list of active formatting elements.
+ 5. If node has any children, perform a shallow clone of
+ node, replace the entry for node in the list of active
+ formatting elements with an entry for the clone, replace
+ the entry for node in the stack of open elements with an
+ entry for the clone, and let node be the clone.
+ 6. Insert last node into node, first removing it from its
+ previous parent node if any.
+ 7. Let last node be node.
+ 8. Return to step 1 of this inner set of steps.
+ 8. If the common ancestor node is a table, tbody, tfoot, thead,
+ or tr element, then, foster parent whatever last node ended up
+ being in the previous step.
+ Otherwise, append whatever last node ended up being in the
+ previous step to the common ancestor node, first removing it
+ from its previous parent node if any.
+ 9. Perform a shallow clone of the formatting element.
+ 10. Take all of the child nodes of the furthest block and append
+ them to the clone created in the last step.
+ 11. Append that clone to the furthest block.
+ 12. Remove the formatting element from the list of active
+ formatting elements, and insert the clone into the list of
+ active formatting elements at the position of the
+ aforementioned bookmark.
+ 13. Remove the formatting element from the stack of open elements,
+ and insert the clone into the stack of open elements
+ immediately below the position of the furthest block in that
+ stack.
+ 14. Jump back to step 1 in this series of steps.
+
+ The way these steps are defined, only elements in the formatting
+ category ever get cloned by this algorithm.
+
+ Because of the way this algorithm causes elements to change
+ parents, it has been dubbed the "adoption agency algorithm" (in
+ contrast with other possibly algorithms for dealing with
+ misnested content, which included the "incest algorithm", the
+ "secret affair algorithm", and the "Heisenberg algorithm").
+
+ A start tag whose tag name is "button"
+ If the stack of open elements has a button element in scope,
+ then this is a parse error; act as if an end tag with the tag
+ name "button" had been seen, then reprocess the token.
+
+ Otherwise:
+
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token.
+
+ Insert a marker at the end of the list of active formatting
+ elements.
+
+ A start tag token whose tag name is one of: "applet", "marquee",
+ "object"
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token.
+
+ Insert a marker at the end of the list of active formatting
+ elements.
+
+ An end tag token whose tag name is one of: "applet", "button",
+ "marquee", "object"
+ If the stack of open elements does not have an element in scope
+ with the same tag name as that of the token, then this is a
+ parse error; ignore the token.
+
+ Otherwise, run these steps:
+
+ 1. Generate implied end tags.
+ 2. If the current node is not an element with the same tag name
+ as that of the token, then this is a parse error.
+ 3. Pop elements from the stack of open elements until an element
+ with the same tag name as the token has been popped from the
+ stack.
+ 4. Clear the list of active formatting elements up to the last
+ marker.
+
+ A start tag whose tag name is "xmp"
+ Reconstruct the active formatting elements, if any.
+
+ Follow the generic CDATA element parsing algorithm.
+
+ A start tag whose tag name is "table"
+ If the stack of open elements has a p element in scope, then act
+ as if an end tag with the tag name "p" had been seen.
+
+ Insert an HTML element for the token.
+
+ Switch the insertion mode to "in table".
+
+ A start tag whose tag name is one of: "area", "basefont", "bgsound",
+ "br", "embed", "img", "input", "spacer", "wbr"
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token. Immediately pop the
+ current node off the stack of open elements.
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ A start tag whose tag name is one of: "param", "source"
+ Insert an HTML element for the token. Immediately pop the
+ current node off the stack of open elements.
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ A start tag whose tag name is "hr"
+ If the stack of open elements has a p element in scope, then act
+ as if an end tag with the tag name "p" had been seen.
+
+ Insert an HTML element for the token. Immediately pop the
+ current node off the stack of open elements.
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ A start tag whose tag name is "image"
+ Parse error. Change the token's tag name to "img" and reprocess
+ it. (Don't ask.)
+
+ A start tag whose tag name is "isindex"
+ Parse error.
+
+ If the form element pointer is not null, then ignore the token.
+
+ Otherwise:
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ Act as if a start tag token with the tag name "form" had been
+ seen.
+
+ If the token has an attribute called "action", set the action
+ attribute on the resulting form element to the value of the
+ "action" attribute of the token.
+
+ Act as if a start tag token with the tag name "hr" had been
+ seen.
+
+ Act as if a start tag token with the tag name "p" had been seen.
+
+ Act as if a start tag token with the tag name "label" had been
+ seen.
+
+ Act as if a stream of character tokens had been seen (see below
+ for what they should say).
+
+ Act as if a start tag token with the tag name "input" had been
+ seen, with all the attributes from the "isindex" token except
+ "name", "action", and "prompt". Set the name attribute of the
+ resulting input element to the value "isindex".
+
+ Act as if a stream of character tokens had been seen (see below
+ for what they should say).
+
+ Act as if an end tag token with the tag name "label" had been
+ seen.
+
+ Act as if an end tag token with the tag name "p" had been seen.
+
+ Act as if a start tag token with the tag name "hr" had been
+ seen.
+
+ Act as if an end tag token with the tag name "form" had been
+ seen.
+
+ If the token has an attribute with the name "prompt", then the
+ first stream of characters must be the same string as given in
+ that attribute, and the second stream of characters must be
+ empty. Otherwise, the two streams of character tokens together
+ should, together with the input element, express the equivalent
+ of "This is a searchable index. Insert your search keywords
+ here: (input field)" in the user's preferred language.
+
+ A start tag whose tag name is "textarea"
+
+ 1. Insert an HTML element for the token.
+ 2. If the next token is a U+000A LINE FEED (LF) character token,
+ then ignore that token and move on to the next one. (Newlines
+ at the start of textarea elements are ignored as an authoring
+ convenience.)
+ 3. Switch the tokeniser's content model flag to the RCDATA state.
+ 4. Let the original insertion mode be the current insertion mode.
+ 5. Switch the insertion mode to "in CDATA/RCDATA".
+
+ A start tag whose tag name is one of: "iframe", "noembed"
+ A start tag whose tag name is "noscript", if the scripting flag is
+ enabled
+ Follow the generic CDATA element parsing algorithm.
+
+ A start tag whose tag name is "select"
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token.
+
+ If the insertion mode is one of in table", "in caption", "in
+ column group", "in table body", "in row", or "in cell", then
+ switch the insertion mode to "in select in table". Otherwise,
+ switch the insertion mode to "in select".
+
+ A start tag whose tag name is one of: "optgroup", "option"
+ If the stack of open elements has an option element in scope,
+ then act as if an end tag with the tag name "option" had been
+ seen.
+
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token.
+
+ A start tag whose tag name is one of: "rp", "rt"
+ If the stack of open elements has a ruby element in scope, then
+ generate implied end tags. If the current node is not then a
+ ruby element, this is a parse error; pop all the nodes from the
+ current node up to the node immediately before the bottommost
+ ruby element on the stack of open elements.
+
+ Insert an HTML element for the token.
+
+ An end tag whose tag name is "br"
+ Parse error. Act as if a start tag token with the tag name "br"
+ had been seen. Ignore the end tag token.
+
+ A start tag whose tag name is "math"
+ Reconstruct the active formatting elements, if any.
+
+ Adjust MathML attributes for the token. (This fixes the case of
+ MathML attributes that are not all lowercase.)
+
+ Adjust foreign attributes for the token. (This fixes the use of
+ namespaced attributes, in particular XLink.)
+
+ Insert a foreign element for the token, in the MathML namespace.
+
+ If the token has its self-closing flag set, pop the current node
+ off the stack of open elements and acknowledge the token's
+ self-closing flag.
+
+ Otherwise, let the secondary insertion mode be the current
+ insertion mode, and then switch the insertion mode to "in
+ foreign content".
+
+ A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "frame", "frameset", "head", "tbody", "td", "tfoot", "th",
+ "thead", "tr"
+ Parse error. Ignore the token.
+
+ Any other start tag
+ Reconstruct the active formatting elements, if any.
+
+ Insert an HTML element for the token.
+
+ This element will be a phrasing element.
+
+ Any other end tag
+ Run the following steps:
+
+ 1. Initialize node to be the current node (the bottommost node of
+ the stack).
+ 2. If node has the same tag name as the end tag token, then:
+ 1. Generate implied end tags.
+ 2. If the tag name of the end tag token does not match the
+ tag name of the current node, this is a parse error.
+ 3. Pop all the nodes from the current node up to node,
+ including node, then stop these steps.
+ 3. Otherwise, if node is in neither the formatting category nor
+ the phrasing category, then this is a parse error; ignore the
+ token, and abort these steps.
+ 4. Set node to the previous entry in the stack of open elements.
+ 5. Return to step 2.
+
+ 8.2.5.11 The "in CDATA/RCDATA" insertion mode
+
+ When the insertion mode is "in CDATA/RCDATA", tokens must be handled as
+ follows:
+
+ A character token
+ Insert the token's character into the current node.
+
+ An end-of-file token
+ Parse error.
+
+ If the current node is a script element, mark the script element
+ as "already executed".
+
+ Pop the current node off the stack of open elements.
+
+ Switch the insertion mode to the original insertion mode and
+ reprocess the current token.
+
+ An end tag whose tag name is "script"
+ Let script be the current node (which will be a script element).
+
+ Pop the current node off the stack of open elements.
+
+ Switch the insertion mode to the original insertion mode.
+
+ Let the old insertion point have the same value as the current
+ insertion point. Let the insertion point be just before the next
+ input character.
+
+ Increment the parser's script nesting level by one.
+
+ Run the script. This might cause some script to execute, which
+ might cause new characters to be inserted into the tokeniser,
+ and might cause the tokeniser to output more tokens, resulting
+ in a reentrant invocation of the parser.
+
+ Decrement the parser's script nesting level by one. If the
+ parser's script nesting level is zero, then set the parser pause
+ flag to false.
+
+ Let the insertion point have the value of the old insertion
+ point. (In other words, restore the insertion point to the value
+ it had before the previous paragraph. This value might be the
+ "undefined" value.)
+
+ At this stage, if there is a pending external script, then:
+
+ If the tree construction stage is being called reentrantly, say
+ from a call to document.write():
+ Set the parser pause flag to true, and abort the
+ processing of any nested invocations of the tokeniser,
+ yielding control back to the caller. (Tokenization will
+ resume when the caller returns to the "outer" tree
+ construction stage.)
+
+ Otherwise:
+ Follow these steps:
+
+ 1. Let the script be the pending external script. There is
+ no longer a pending external script.
+ 2. Pause until the script has completed loading.
+ 3. Let the insertion point be just before the next input
+ character.
+ 4. Execute the script.
+ 5. Let the insertion point be undefined again.
+ 6. If there is once again a pending external script, then
+ repeat these steps from step 1.
+
+ Any other end tag
+ Pop the current node off the stack of open elements.
+
+ Switch the insertion mode to the original insertion mode.
+
+ 8.2.5.12 The "in table" insertion mode
+
+ When the insertion mode is "in table", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ If the current table is tainted, then act as described in the
+ "anything else" entry below.
+
+ Otherwise, insert the character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "caption"
+ Clear the stack back to a table context. (See below.)
+
+ Insert a marker at the end of the list of active formatting
+ elements.
+
+ Insert an HTML element for the token, then switch the insertion
+ mode to "in caption".
+
+ A start tag whose tag name is "colgroup"
+ Clear the stack back to a table context. (See below.)
+
+ Insert an HTML element for the token, then switch the insertion
+ mode to "in column group".
+
+ A start tag whose tag name is "col"
+ Act as if a start tag token with the tag name "colgroup" had
+ been seen, then reprocess the current token.
+
+ A start tag whose tag name is one of: "tbody", "tfoot", "thead"
+ Clear the stack back to a table context. (See below.)
+
+ Insert an HTML element for the token, then switch the insertion
+ mode to "in table body".
+
+ A start tag whose tag name is one of: "td", "th", "tr"
+ Act as if a start tag token with the tag name "tbody" had been
+ seen, then reprocess the current token.
+
+ A start tag whose tag name is "table"
+ Parse error. Act as if an end tag token with the tag name
+ "table" had been seen, then, if that token wasn't ignored,
+ reprocess the current token.
+
+ The fake end tag token here can only be ignored in the fragment
+ case.
+
+ An end tag whose tag name is "table"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse
+ error. Ignore the token. (fragment case)
+
+ Otherwise:
+
+ Pop elements from this stack until a table element has been
+ popped from the stack.
+
+ Reset the insertion mode appropriately.
+
+ An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is one of: "style", "script"
+ If the current table is tainted then act as described in the
+ "anything else" entry below.
+
+ Otherwise, process the token using the rules for the "in head"
+ insertion mode.
+
+ A start tag whose tag name is "input"
+ If the token does not have an attribute with the name "type", or
+ if it does, but that attribute's value is not an ASCII
+ case-insensitive match for the string "hidden", or, if the
+ current table is tainted, then: act as described in the
+ "anything else" entry below.
+
+ Otherwise:
+
+ Parse error.
+
+ Insert an HTML element for the token.
+
+ Pop that input element off the stack of open elements.
+
+ An end-of-file token
+ If the current node is not the root html element, then this is a
+ parse error.
+
+ It can only be the current node in the fragment case.
+
+ Stop parsing.
+
+ Anything else
+ Parse error. Process the token using the rules for the "in body"
+ insertion mode, except that if the current node is a table,
+ tbody, tfoot, thead, or tr element, then, whenever a node would
+ be inserted into the current node, it must instead be foster
+ parented.
+
+ When the steps above require the UA to clear the stack back to a table
+ context, it means that the UA must, while the current node is not a
+ table element or an html element, pop elements from the stack of open
+ elements.
+
+ The current node being an html element after this process is a fragment
+ case.
+
+ 8.2.5.13 The "in caption" insertion mode
+
+ When the insertion mode is "in caption", tokens must be handled as
+ follows:
+
+ An end tag whose tag name is "caption"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse
+ error. Ignore the token. (fragment case)
+
+ Otherwise:
+
+ Generate implied end tags.
+
+ Now, if the current node is not a caption element, then this is
+ a parse error.
+
+ Pop elements from this stack until a caption element has been
+ popped from the stack.
+
+ Clear the list of active formatting elements up to the last
+ marker.
+
+ Switch the insertion mode to "in table".
+
+ A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr"
+
+ An end tag whose tag name is "table"
+ Parse error. Act as if an end tag with the tag name "caption"
+ had been seen, then, if that token wasn't ignored, reprocess the
+ current token.
+
+ The fake end tag token here can only be ignored in the fragment
+ case.
+
+ An end tag whose tag name is one of: "body", "col", "colgroup", "html",
+ "tbody", "td", "tfoot", "th", "thead", "tr"
+ Parse error. Ignore the token.
+
+ Anything else
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ 8.2.5.14 The "in column group" insertion mode
+
+ When the insertion mode is "in column group", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Insert the character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ A start tag whose tag name is "col"
+ Insert an HTML element for the token. Immediately pop the
+ current node off the stack of open elements.
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ An end tag whose tag name is "colgroup"
+ If the current node is the root html element, then this is a
+ parse error; ignore the token. (fragment case)
+
+ Otherwise, pop the current node (which will be a colgroup
+ element) from the stack of open elements. Switch the insertion
+ mode to "in table".
+
+ An end tag whose tag name is "col"
+ Parse error. Ignore the token.
+
+ An end-of-file token
+ If the current node is the root html element, then stop parsing.
+ (fragment case)
+
+ Otherwise, act as described in the "anything else" entry below.
+
+ Anything else
+ Act as if an end tag with the tag name "colgroup" had been seen,
+ and then, if that token wasn't ignored, reprocess the current
+ token.
+
+ The fake end tag token here can only be ignored in the fragment
+ case.
+
+ 8.2.5.15 The "in table body" insertion mode
+
+ When the insertion mode is "in table body", tokens must be handled as
+ follows:
+
+ A start tag whose tag name is "tr"
+ Clear the stack back to a table body context. (See below.)
+
+ Insert an HTML element for the token, then switch the insertion
+ mode to "in row".
+
+ A start tag whose tag name is one of: "th", "td"
+ Parse error. Act as if a start tag with the tag name "tr" had
+ been seen, then reprocess the current token.
+
+ An end tag whose tag name is one of: "tbody", "tfoot", "thead"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse
+ error. Ignore the token.
+
+ Otherwise:
+
+ Clear the stack back to a table body context. (See below.)
+
+ Pop the current node from the stack of open elements. Switch the
+ insertion mode to "in table".
+
+ A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead"
+
+ An end tag whose tag name is "table"
+ If the stack of open elements does not have a tbody, thead, or
+ tfoot element in table scope, this is a parse error. Ignore the
+ token. (fragment case)
+
+ Otherwise:
+
+ Clear the stack back to a table body context. (See below.)
+
+ Act as if an end tag with the same tag name as the current node
+ ("tbody", "tfoot", or "thead") had been seen, then reprocess the
+ current token.
+
+ An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th", "tr"
+ Parse error. Ignore the token.
+
+ Anything else
+ Process the token using the rules for the "in table" insertion
+ mode.
+
+ When the steps above require the UA to clear the stack back to a table
+ body context, it means that the UA must, while the current node is not
+ a tbody, tfoot, thead, or html element, pop elements from the stack of
+ open elements.
+
+ The current node being an html element after this process is a fragment
+ case.
+
+ 8.2.5.16 The "in row" insertion mode
+
+ When the insertion mode is "in row", tokens must be handled as follows:
+
+ A start tag whose tag name is one of: "th", "td"
+ Clear the stack back to a table row context. (See below.)
+
+ Insert an HTML element for the token, then switch the insertion
+ mode to "in cell".
+
+ Insert a marker at the end of the list of active formatting
+ elements.
+
+ An end tag whose tag name is "tr"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse
+ error. Ignore the token. (fragment case)
+
+ Otherwise:
+
+ Clear the stack back to a table row context. (See below.)
+
+ Pop the current node (which will be a tr element) from the stack
+ of open elements. Switch the insertion mode to "in table body".
+
+ A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", "tr"
+
+ An end tag whose tag name is "table"
+ Act as if an end tag with the tag name "tr" had been seen, then,
+ if that token wasn't ignored, reprocess the current token.
+
+ The fake end tag token here can only be ignored in the fragment
+ case.
+
+ An end tag whose tag name is one of: "tbody", "tfoot", "thead"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse
+ error. Ignore the token.
+
+ Otherwise, act as if an end tag with the tag name "tr" had been
+ seen, then reprocess the current token.
+
+ An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th"
+ Parse error. Ignore the token.
+
+ Anything else
+ Process the token using the rules for the "in table" insertion
+ mode.
+
+ When the steps above require the UA to clear the stack back to a table
+ row context, it means that the UA must, while the current node is not a
+ tr element or an html element, pop elements from the stack of open
+ elements.
+
+ The current node being an html element after this process is a fragment
+ case.
+
+ 8.2.5.17 The "in cell" insertion mode
+
+ When the insertion mode is "in cell", tokens must be handled as
+ follows:
+
+ An end tag whose tag name is one of: "td", "th"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as that of the token, then this is
+ a parse error and the token must be ignored.
+
+ Otherwise:
+
+ Generate implied end tags.
+
+ Now, if the current node is not an element with the same tag
+ name as the token, then this is a parse error.
+
+ Pop elements from this stack until an element with the same tag
+ name as the token has been popped from the stack.
+
+ Clear the list of active formatting elements up to the last
+ marker.
+
+ Switch the insertion mode to "in row". (The current node will be
+ a tr element at this point.)
+
+ A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr"
+ If the stack of open elements does not have a td or th element
+ in table scope, then this is a parse error; ignore the token.
+ (fragment case)
+
+ Otherwise, close the cell (see below) and reprocess the current
+ token.
+
+ An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html"
+ Parse error. Ignore the token.
+
+ An end tag whose tag name is one of: "table", "tbody", "tfoot",
+ "thead", "tr"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as that of the token (which can
+ only happen for "tbody", "tfoot" and "thead", or, in the
+ fragment case), then this is a parse error and the token must be
+ ignored.
+
+ Otherwise, close the cell (see below) and reprocess the current
+ token.
+
+ Anything else
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ Where the steps above say to close the cell, they mean to run the
+ following algorithm:
+ 1. If the stack of open elements has a td element in table scope, then
+ act as if an end tag token with the tag name "td" had been seen.
+ 2. Otherwise, the stack of open elements will have a th element in
+ table scope; act as if an end tag token with the tag name "th" had
+ been seen.
+
+ The stack of open elements cannot have both a td and a th element in
+ table scope at the same time, nor can it have neither when the
+ insertion mode is "in cell".
+
+ 8.2.5.18 The "in select" insertion mode
+
+ When the insertion mode is "in select", tokens must be handled as
+ follows:
+
+ A character token
+ Insert the token's character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ A start tag whose tag name is "option"
+ If the current node is an option element, act as if an end tag
+ with the tag name "option" had been seen.
+
+ Insert an HTML element for the token.
+
+ A start tag whose tag name is "optgroup"
+ If the current node is an option element, act as if an end tag
+ with the tag name "option" had been seen.
+
+ If the current node is an optgroup element, act as if an end tag
+ with the tag name "optgroup" had been seen.
+
+ Insert an HTML element for the token.
+
+ An end tag whose tag name is "optgroup"
+ First, if the current node is an option element, and the node
+ immediately before it in the stack of open elements is an
+ optgroup element, then act as if an end tag with the tag name
+ "option" had been seen.
+
+ If the current node is an optgroup element, then pop that node
+ from the stack of open elements. Otherwise, this is a parse
+ error; ignore the token.
+
+ An end tag whose tag name is "option"
+ If the current node is an option element, then pop that node
+ from the stack of open elements. Otherwise, this is a parse
+ error; ignore the token.
+
+ An end tag whose tag name is "select"
+ If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse
+ error. Ignore the token. (fragment case)
+
+ Otherwise:
+
+ Pop elements from the stack of open elements until a select
+ element has been popped from the stack.
+
+ Reset the insertion mode appropriately.
+
+ A start tag whose tag name is "select"
+ Parse error. Act as if the token had been an end tag with the
+ tag name "select" instead.
+
+ A start tag whose tag name is one of: "input", "textarea"
+ Parse error. Act as if an end tag with the tag name "select" had
+ been seen, and reprocess the token.
+
+ A start tag token whose tag name is "script"
+ Process the token using the rules for the "in head" insertion
+ mode.
+
+ An end-of-file token
+ If the current node is not the root html element, then this is a
+ parse error.
+
+ It can only be the current node in the fragment case.
+
+ Stop parsing.
+
+ Anything else
+ Parse error. Ignore the token.
+
+ 8.2.5.19 The "in select in table" insertion mode
+
+ When the insertion mode is "in select in table", tokens must be handled
+ as follows:
+
+ A start tag whose tag name is one of: "caption", "table", "tbody",
+ "tfoot", "thead", "tr", "td", "th"
+ Parse error. Act as if an end tag with the tag name "select" had
+ been seen, and reprocess the token.
+
+ An end tag whose tag name is one of: "caption", "table", "tbody",
+ "tfoot", "thead", "tr", "td", "th"
+ Parse error.
+
+ If the stack of open elements has an element in table scope with
+ the same tag name as that of the token, then act as if an end
+ tag with the tag name "select" had been seen, and reprocess the
+ token. Otherwise, ignore the token.
+
+ Anything else
+ Process the token using the rules for the "in select" insertion
+ mode.
+
+ 8.2.5.20 The "in foreign content" insertion mode
+
+ When the insertion mode is "in foreign content", tokens must be handled
+ as follows:
+
+ A character token
+ Insert the token's character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is neither "mglyph" nor "malignmark", if the
+ current node is an mi element in the MathML namespace.
+
+ A start tag whose tag name is neither "mglyph" nor "malignmark", if the
+ current node is an mo element in the MathML namespace.
+
+ A start tag whose tag name is neither "mglyph" nor "malignmark", if the
+ current node is an mn element in the MathML namespace.
+
+ A start tag whose tag name is neither "mglyph" nor "malignmark", if the
+ current node is an ms element in the MathML namespace.
+
+ A start tag whose tag name is neither "mglyph" nor "malignmark", if the
+ current node is an mtext element in the MathML namespace.
+
+ A start tag, if the current node is an element in the HTML namespace.
+ An end tag
+ Process the token using the rules for the secondary insertion
+ mode.
+
+ If, after doing so, the insertion mode is still "in foreign
+ content", but there is no element in scope that has a namespace
+ other than the HTML namespace, switch the insertion mode to the
+ secondary insertion mode.
+
+ A start tag whose tag name is one of: "b", "big", "blockquote", "body",
+ "br", "center", "code", "dd", "div", "dl", "dt", "em", "embed",
+ "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "img",
+ "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre",
+ "ruby", "s", "small", "span", "strong", "strike", "sub", "sup",
+ "table", "tt", "u", "ul", "var"
+
+ A start tag whose tag name is "font", if the token has any attributes
+ named "color", "face", or "size"
+
+ An end-of-file token
+ Parse error.
+
+ Pop elements from the stack of open elements until the current
+ node is in the HTML namespace.
+
+ Switch the insertion mode to the secondary insertion mode, and
+ reprocess the token.
+
+ Any other start tag
+ If the current node is an element in the MathML namespace,
+ adjust MathML attributes for the token. (This fixes the case of
+ MathML attributes that are not all lowercase.)
+
+ Adjust foreign attributes for the token. (This fixes the use of
+ namespaced attributes, in particular XLink in SVG.)
+
+ Insert a foreign element for the token, in the same namespace as
+ the current node.
+
+ If the token has its self-closing flag set, pop the current node
+ off the stack of open elements and acknowledge the token's
+ self-closing flag.
+
+ 8.2.5.21 The "after body" insertion mode
+
+ When the insertion mode is "after body", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ A comment token
+ Append a Comment node to the first element in the stack of open
+ elements (the html element), with the data attribute set to the
+ data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ An end tag whose tag name is "html"
+ If the parser was originally created as part of the HTML
+ fragment parsing algorithm, this is a parse error; ignore the
+ token. (fragment case)
+
+ Otherwise, switch the insertion mode to "after after body".
+
+ An end-of-file token
+ Stop parsing.
+
+ Anything else
+ Parse error. Switch the insertion mode to "in body" and
+ reprocess the token.
+
+ 8.2.5.22 The "in frameset" insertion mode
+
+ When the insertion mode is "in frameset", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Insert the character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ A start tag whose tag name is "frameset"
+ Insert an HTML element for the token.
+
+ An end tag whose tag name is "frameset"
+ If the current node is the root html element, then this is a
+ parse error; ignore the token. (fragment case)
+
+ Otherwise, pop the current node from the stack of open elements.
+
+ If the parser was not originally created as part of the HTML
+ fragment parsing algorithm (fragment case), and the current node
+ is no longer a frameset element, then switch the insertion mode
+ to "after frameset".
+
+ A start tag whose tag name is "frame"
+ Insert an HTML element for the token. Immediately pop the
+ current node off the stack of open elements.
+
+ Acknowledge the token's self-closing flag, if it is set.
+
+ A start tag whose tag name is "noframes"
+ Process the token using the rules for the "in head" insertion
+ mode.
+
+ An end-of-file token
+ If the current node is not the root html element, then this is a
+ parse error.
+
+ It can only be the current node in the fragment case.
+
+ Stop parsing.
+
+ Anything else
+ Parse error. Ignore the token.
+
+ 8.2.5.23 The "after frameset" insertion mode
+
+ When the insertion mode is "after frameset", tokens must be handled as
+ follows:
+
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ Insert the character into the current node.
+
+ A comment token
+ Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ Parse error. Ignore the token.
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ An end tag whose tag name is "html"
+ Switch the insertion mode to "after after frameset".
+
+ A start tag whose tag name is "noframes"
+ Process the token using the rules for the "in head" insertion
+ mode.
+
+ An end-of-file token
+ Stop parsing.
+
+ Anything else
+ Parse error. Ignore the token.
+
+ This doesn't handle UAs that don't support frames, or that do support
+ frames but want to show the NOFRAMES content. Supporting the former is
+ easy; supporting the latter is harder.
+
+ 8.2.5.24 The "after after body" insertion mode
+
+ When the insertion mode is "after after body", tokens must be handled
+ as follows:
+
+ A comment token
+ Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ An end-of-file token
+ Stop parsing.
+
+ Anything else
+ Parse error. Switch the insertion mode to "in body" and
+ reprocess the token.
+
+ 8.2.5.25 The "after after frameset" insertion mode
+
+ When the insertion mode is "after after frameset", tokens must be
+ handled as follows:
+
+ A comment token
+ Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token.
+
+ A DOCTYPE token
+ A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+
+ A start tag whose tag name is "html"
+ Process the token using the rules for the "in body" insertion
+ mode.
+
+ An end-of-file token
+ Stop parsing.
+
+ A start tag whose tag name is "noframes"
+ Process the token using the rules for the "in head" insertion
+ mode.
+
+ Anything else
+ Parse error. Ignore the token.
+
+ 8.2.6 The end
+
+ Once the user agent stops parsing the document, the user agent must
+ follow the steps in this section.
+
+ First, the current document readiness must be set to "interactive".
+
+ Then, the rules for when a script completes loading start applying
+ (script execution is no longer managed by the parser).
+
+ If any of the scripts in the list of scripts that will execute as soon
+ as possible have completed loading, or if the list of scripts that will
+ execute asynchronously is not empty and the first script in that list
+ has completed loading, then the user agent must act as if those scripts
+ just completed loading, following the rules given for that in the
+ script element definition.
+
+ Then, if the list of scripts that will execute when the document has
+ finished parsing is not empty, and the first item in this list has
+ already completed loading, then the user agent must act as if that
+ script just finished loading.
+
+ By this point, there will be no scripts that have loaded but have not
+ yet been executed.
+
+ The user agent must then fire a simple event called DOMContentLoaded at
+ the Document.
+
+ Once everything that delays the load event has completed, the user
+ agent must set the current document readiness to "complete", and then
+ fire a load event at the body element.
+
+ delaying the load event for things like image loads allows for intranet
+ port scans (even without javascript!). Should we really encode that
+ into the spec?
+
+ 8.2.7 Coercing an HTML DOM into an infoset
+
+ When an application uses an HTML parser in conjunction with an XML
+ pipeline, it is possible that the constructed DOM is not compatible
+ with the XML tool chain in certain subtle ways. For example, an XML
+ toolchain might not be able to represent attributes with the name
+ xmlns, since they conflict with the Namespaces in XML syntax. There is
+ also some data that the HTML parser generates that isn't included in
+ the DOM itself. This section specifies some rules for handling these
+ issues.
+
+ If the XML API being used doesn't support DOCTYPEs, the tool may drop
+ DOCTYPEs altogether.
+
+ If the XML API doesn't support attributes in no namespace that are
+ named "xmlns", attributes whose names start with "xmlns:", or
+ attributes in the XMLNS namespace, then the tool may drop such
+ attributes.
+
+ The tool may annotate the output with any namespace declarations
+ required for proper operation.
+
+ If the XML API being used restricts the allowable characters in the
+ local names of elements and attributes, then the tool may map all
+ element and attribute local names that the API wouldn't support to a
+ set of names that are allowed, by replacing any character that isn't
+ supported with the uppercase letter U and the five digits of the
+ character's Unicode codepoint when expressed in hexadecimal, using
+ digits 0-9 and capital letters A-F as the symbols, in increasing
+ numeric order.
+
+ For example, the element name foo<bar, which can be output by the HTML
+ parser, though it is neither a legal HTML element name nor a
+ well-formed XML element name, would be converted into fooU0003Cbar,
+ which is a well-formed XML element name (though it's still not legal in
+ HTML by any means).
+
+ As another example, consider the attribute xlink:href. Used on a MathML
+ element, it becomes, after being adjusted, an attribute with a prefix
+ "xlink" and a local name "href". However, used on an HTML element, it
+ becomes an attribute with no prefix and the local name "xlink:href",
+ which is not a valid NCName, and thus might not be accepted by an XML
+ API. It could thus get converted, becoming "xlinkU0003Ahref".
+
+ The resulting names from this conversion conveniently can't clash with
+ any attribute generated by the HTML parser, since those are all either
+ lowercase or those listed in the adjust foreign attributes algorithm's
+ table.
+
+ If the XML API restricts comments from having two consecutive U+002D
+ HYPHEN-MINUS characters (--), the tool may insert a single U+0020 SPACE
+ character between any such offending characters.
+
+ If the XML API restricts comments from ending in a U+002D HYPHEN-MINUS
+ character (-), the tool may insert a single U+0020 SPACE character at
+ the end of such comments.
+
+ If the XML API restricts allowed characters in character data, the tool
+ may replace any U+000C FORM FEED (FF) character with a U+0020 SPACE
+ character, and any other literal non-XML character with a U+FFFD
+ REPLACEMENT CHARACTER.
+
+ If the tool has no way to convey out-of-band information, then the tool
+ may drop the following information:
+ * Whether the document is set to no quirks mode, limited quirks mode,
+ or quirks mode
+ * The association between form controls and forms that aren't their
+ nearest form element ancestor (use of the form element pointer in
+ the parser)
+
+ The mutations allowed by this section apply after the HTML parser's
+ rules have been applied. For example, a <a::> start tag will be closed
+ by a </a::> end tag, and never by a </aU0003AU0003A> end tag, even if
+ the user agent is using the rules above to then generate an actual
+ element in the DOM with the name aU0003AU0003A for that start tag.
+
+ 8.3 Namespaces
+
+ The HTML namespace is: http://www.w3.org/1999/xhtml
+
+ The MathML namespace is: http://www.w3.org/1998/Math/MathML
+
+ The SVG namespace is: http://www.w3.org/2000/svg
+
+ The XLink namespace is: http://www.w3.org/1999/xlink
+
+ The XML namespace is: http://www.w3.org/XML/1998/namespace
+
+ The XMLNS namespace is: http://www.w3.org/2000/xmlns/
diff --git a/parser/html/java/htmlparser/generate-encoding-data.py b/parser/html/java/htmlparser/generate-encoding-data.py
new file mode 100644
index 000000000..69b2fdc30
--- /dev/null
+++ b/parser/html/java/htmlparser/generate-encoding-data.py
@@ -0,0 +1,745 @@
+#!/usr/bin/python
+
+# Copyright (c) 2013-2015 Mozilla Foundation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import json
+
+class Label:
+ def __init__(self, label, preferred):
+ self.label = label
+ self.preferred = preferred
+ def __cmp__(self, other):
+ return cmp(self.label, other.label)
+
+# If a multi-byte encoding is on this list, it is assumed to have a
+# non-generated decoder implementation class. Otherwise, the JDK default
+# decoder is used as a placeholder.
+MULTI_BYTE_DECODER_IMPLEMENTED = [
+ u"x-user-defined",
+ u"replacement",
+ u"big5",
+]
+
+MULTI_BYTE_ENCODER_IMPLEMENTED = [
+ u"big5",
+]
+
+preferred = []
+
+labels = []
+
+data = json.load(open("../encoding/encodings.json", "r"))
+
+indexes = json.load(open("../encoding/indexes.json", "r"))
+
+single_byte = []
+
+multi_byte = []
+
+def to_camel_name(name):
+ if name == u"iso-8859-8-i":
+ return u"Iso8I"
+ if name.startswith(u"iso-8859-"):
+ return name.replace(u"iso-8859-", u"Iso")
+ return name.title().replace(u"X-", u"").replace(u"-", u"").replace(u"_", u"")
+
+def to_constant_name(name):
+ return name.replace(u"-", u"_").upper()
+
+# Encoding.java
+
+for group in data:
+ if group["heading"] == "Legacy single-byte encodings":
+ single_byte = group["encodings"]
+ else:
+ multi_byte.extend(group["encodings"])
+ for encoding in group["encodings"]:
+ preferred.append(encoding["name"])
+ for label in encoding["labels"]:
+ labels.append(Label(label, encoding["name"]))
+
+preferred.sort()
+labels.sort()
+
+label_file = open("src/nu/validator/encoding/Encoding.java", "w")
+
+label_file.write("""/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Represents an <a href="https://encoding.spec.whatwg.org/#encoding">encoding</a>
+ * as defined in the <a href="https://encoding.spec.whatwg.org/">Encoding
+ * Standard</a>, provides access to each encoding defined in the Encoding
+ * Standard via a static constant and provides the
+ * "<a href="https://encoding.spec.whatwg.org/#concept-encoding-get">get an
+ * encoding</a>" algorithm defined in the Encoding Standard.
+ *
+ * <p>This class inherits from {@link Charset} to allow the Encoding
+ * Standard-compliant encodings to be used in contexts that support
+ * <code>Charset</code> instances. However, by design, the Encoding
+ * Standard-compliant encodings are not supplied via a {@link CharsetProvider}
+ * and, therefore, are not available via and do not interfere with the static
+ * methods provided by <code>Charset</code>. (This class provides methods of
+ * the same name to hide each static method of <code>Charset</code> to help
+ * avoid accidental calls to the static methods of the superclass when working
+ * with Encoding Standard-compliant encodings.)
+ *
+ * <p>When an application needs to use a particular encoding, such as utf-8
+ * or windows-1252, the corresponding constant, i.e.
+ * {@link #UTF_8 Encoding.UTF_8} and {@link #WINDOWS_1252 Encoding.WINDOWS_1252}
+ * respectively, should be used. However, when the application receives an
+ * encoding label from external input, the method {@link #forName(String)
+ * forName()} should be used to obtain the object representing the encoding
+ * identified by the label. In contexts where labels that map to the
+ * <a href="https://encoding.spec.whatwg.org/#replacement">replacement
+ * encoding</a> should be treated as unknown, the method {@link
+ * #forNameNoReplacement(String) forNameNoReplacement()} should be used instead.
+ *
+ *
+ * @author hsivonen
+ */
+public abstract class Encoding extends Charset {
+
+ private static final String[] LABELS = {
+""")
+
+for label in labels:
+ label_file.write(" \"%s\",\n" % label.label)
+
+label_file.write(""" };
+
+ private static final Encoding[] ENCODINGS_FOR_LABELS = {
+""")
+
+for label in labels:
+ label_file.write(" %s.INSTANCE,\n" % to_camel_name(label.preferred))
+
+label_file.write(""" };
+
+ private static final Encoding[] ENCODINGS = {
+""")
+
+for label in preferred:
+ label_file.write(" %s.INSTANCE,\n" % to_camel_name(label))
+
+label_file.write(""" };
+
+""")
+
+for label in preferred:
+ label_file.write(""" /**
+ * The %s encoding.
+ */
+ public static final Encoding %s = %s.INSTANCE;
+
+""" % (label, to_constant_name(label), to_camel_name(label)))
+
+label_file.write("""
+private static SortedMap<String, Charset> encodings = null;
+
+ protected Encoding(String canonicalName, String[] aliases) {
+ super(canonicalName, aliases);
+ }
+
+ private enum State {
+ HEAD, LABEL, TAIL
+ };
+
+ public static Encoding forName(String label) {
+ if (label == null) {
+ throw new IllegalArgumentException("Label must not be null.");
+ }
+ if (label.length() == 0) {
+ throw new IllegalCharsetNameException(label);
+ }
+ // First try the fast path
+ int index = Arrays.binarySearch(LABELS, label);
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ // Else, slow path
+ StringBuilder sb = new StringBuilder();
+ State state = State.HEAD;
+ for (int i = 0; i < label.length(); i++) {
+ char c = label.charAt(i);
+ if ((c == ' ') || (c == '\\n') || (c == '\\r') || (c == '\\t')
+ || (c == '\\u000C')) {
+ if (state == State.LABEL) {
+ state = State.TAIL;
+ }
+ continue;
+ }
+ if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if ((c == '-') || (c == '+') || (c == '.') || (c == ':')
+ || (c == '_')) {
+ switch (state) {
+ case LABEL:
+ sb.append(c);
+ continue;
+ case HEAD:
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ throw new IllegalCharsetNameException(label);
+ }
+ index = Arrays.binarySearch(LABELS, sb.toString());
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ throw new UnsupportedCharsetException(label);
+ }
+
+ public static Encoding forNameNoReplacement(String label) {
+ Encoding encoding = Encoding.forName(label);
+ if (encoding == Encoding.REPLACEMENT) {
+ throw new UnsupportedCharsetException(label);
+ }
+ return encoding;
+ }
+
+ public static boolean isSupported(String label) {
+ try {
+ Encoding.forName(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static boolean isSupportedNoReplacement(String label) {
+ try {
+ Encoding.forNameNoReplacement(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static SortedMap<String, Charset> availableCharsets() {
+ if (encodings == null) {
+ TreeMap<String, Charset> map = new TreeMap<String, Charset>();
+ for (Encoding encoding : ENCODINGS) {
+ map.put(encoding.name(), encoding);
+ }
+ encodings = Collections.unmodifiableSortedMap(map);
+ }
+ return encodings;
+ }
+
+ public static Encoding defaultCharset() {
+ return WINDOWS_1252;
+ }
+
+ @Override public boolean canEncode() {
+ return false;
+ }
+
+ @Override public boolean contains(Charset cs) {
+ return false;
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ throw new UnsupportedOperationException("Encoder not implemented.");
+ }
+}
+""")
+
+label_file.close()
+
+# Single-byte encodings
+
+for encoding in single_byte:
+ name = encoding["name"]
+ labels = encoding["labels"]
+ labels.sort()
+ class_name = to_camel_name(name)
+ mapping_name = name
+ if mapping_name == u"iso-8859-8-i":
+ mapping_name = u"iso-8859-8"
+ mapping = indexes[mapping_name]
+ class_file = open("src/nu/validator/encoding/%s.java" % class_name, "w")
+ class_file.write('''/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class ''')
+ class_file.write(class_name)
+ class_file.write(''' extends Encoding {
+
+ private static final char[] TABLE = {''')
+ fallible = False
+ comma = False
+ for code_point in mapping:
+ # XXX should we have error reporting?
+ if not code_point:
+ code_point = 0xFFFD
+ fallible = True
+ if comma:
+ class_file.write(",")
+ class_file.write("\n '\u%04x'" % code_point);
+ comma = True
+ class_file.write('''
+ };
+
+ private static final String[] LABELS = {''')
+
+ comma = False
+ for label in labels:
+ if comma:
+ class_file.write(",")
+ class_file.write("\n \"%s\"" % label);
+ comma = True
+ class_file.write('''
+ };
+
+ private static final String NAME = "''')
+ class_file.write(name)
+ class_file.write('''";
+
+ static final Encoding INSTANCE = new ''')
+ class_file.write(class_name)
+ class_file.write('''();
+
+ private ''')
+ class_file.write(class_name)
+ class_file.write('''() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new ''')
+ class_file.write("Fallible" if fallible else "Infallible")
+ class_file.write('''SingleByteDecoder(this, TABLE);
+ }
+
+}
+''')
+ class_file.close()
+
+# Multi-byte encodings
+
+for encoding in multi_byte:
+ name = encoding["name"]
+ labels = encoding["labels"]
+ labels.sort()
+ class_name = to_camel_name(name)
+ class_file = open("src/nu/validator/encoding/%s.java" % class_name, "w")
+ class_file.write('''/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class ''')
+ class_file.write(class_name)
+ class_file.write(''' extends Encoding {
+
+ private static final String[] LABELS = {''')
+
+ comma = False
+ for label in labels:
+ if comma:
+ class_file.write(",")
+ class_file.write("\n \"%s\"" % label);
+ comma = True
+ class_file.write('''
+ };
+
+ private static final String NAME = "''')
+ class_file.write(name)
+ class_file.write('''";
+
+ static final ''')
+ class_file.write(class_name)
+ class_file.write(''' INSTANCE = new ''')
+ class_file.write(class_name)
+ class_file.write('''();
+
+ private ''')
+ class_file.write(class_name)
+ class_file.write('''() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ ''')
+ if name == "gbk":
+ class_file.write('''return Charset.forName("gb18030").newDecoder();''')
+ elif name in MULTI_BYTE_DECODER_IMPLEMENTED:
+ class_file.write("return new %sDecoder(this);" % class_name)
+ else:
+ class_file.write('''return Charset.forName(NAME).newDecoder();''')
+ class_file.write('''
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ ''')
+ if name in MULTI_BYTE_ENCODER_IMPLEMENTED:
+ class_file.write("return new %sEncoder(this);" % class_name)
+ else:
+ class_file.write('''return Charset.forName(NAME).newEncoder();''')
+ class_file.write('''
+ }
+}
+''')
+ class_file.close()
+
+# Big5
+
+def null_to_zero(code_point):
+ if not code_point:
+ code_point = 0
+ return code_point
+
+index = []
+
+for code_point in indexes["big5"]:
+ index.append(null_to_zero(code_point))
+
+# There are four major gaps consisting of more than 4 consecutive invalid pointers
+gaps = []
+consecutive = 0
+consecutive_start = 0
+offset = 0
+for code_point in index:
+ if code_point == 0:
+ if consecutive == 0:
+ consecutive_start = offset
+ consecutive +=1
+ else:
+ if consecutive > 4:
+ gaps.append((consecutive_start, consecutive_start + consecutive))
+ consecutive = 0
+ offset += 1
+
+def invert_ranges(ranges, cap):
+ inverted = []
+ invert_start = 0
+ for (start, end) in ranges:
+ if start != 0:
+ inverted.append((invert_start, start))
+ invert_start = end
+ inverted.append((invert_start, cap))
+ return inverted
+
+cap = len(index)
+ranges = invert_ranges(gaps, cap)
+
+# Now compute a compressed lookup table for astralness
+
+gaps = []
+consecutive = 0
+consecutive_start = 0
+offset = 0
+for code_point in index:
+ if code_point <= 0xFFFF:
+ if consecutive == 0:
+ consecutive_start = offset
+ consecutive +=1
+ else:
+ if consecutive > 40:
+ gaps.append((consecutive_start, consecutive_start + consecutive))
+ consecutive = 0
+ offset += 1
+
+astral_ranges = invert_ranges(gaps, cap)
+
+class_file = open("src/nu/validator/encoding/Big5Data.java", "w")
+class_file.write('''/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+final class Big5Data {
+
+ private static final String ASTRALNESS = "''')
+
+bits = []
+for (low, high) in astral_ranges:
+ for i in xrange(low, high):
+ bits.append(1 if index[i] > 0xFFFF else 0)
+# pad length to multiple of 16
+for j in xrange(16 - (len(bits) % 16)):
+ bits.append(0)
+
+i = 0
+while i < len(bits):
+ accu = 0
+ for j in xrange(16):
+ accu |= bits[i + j] << j
+ if accu == 0x22:
+ class_file.write('\\"')
+ else:
+ class_file.write('\\u%04X' % accu)
+ i += 16
+
+class_file.write('''";
+
+''')
+
+j = 0
+for (low, high) in ranges:
+ class_file.write(''' private static final String TABLE%d = "''' % j)
+ for i in xrange(low, high):
+ class_file.write('\\u%04X' % (index[i] & 0xFFFF))
+ class_file.write('''";
+
+''')
+ j += 1
+
+class_file.write(''' private static boolean readBit(int i) {
+ return (ASTRALNESS.charAt(i >> 4) & (1 << (i & 0xF))) != 0;
+ }
+
+ static char lowBits(int pointer) {
+''')
+
+j = 0
+for (low, high) in ranges:
+ class_file.write(''' if (pointer < %d) {
+ return '\\u0000';
+ }
+ if (pointer < %d) {
+ return TABLE%d.charAt(pointer - %d);
+ }
+''' % (low, high, j, low))
+ j += 1
+
+class_file.write(''' return '\\u0000';
+ }
+
+ static boolean isAstral(int pointer) {
+''')
+
+base = 0
+for (low, high) in astral_ranges:
+ if high - low == 1:
+ class_file.write(''' if (pointer < %d) {
+ return false;
+ }
+ if (pointer == %d) {
+ return true;
+ }
+''' % (low, low))
+ else:
+ class_file.write(''' if (pointer < %d) {
+ return false;
+ }
+ if (pointer < %d) {
+ return readBit(%d + (pointer - %d));
+ }
+''' % (low, high, base, low))
+ base += (high - low)
+
+class_file.write(''' return false;
+ }
+
+ public static int findPointer(char lowBits, boolean isAstral) {
+ if (!isAstral) {
+ switch (lowBits) {
+''')
+
+hkscs_bound = (0xA1 - 0x81) * 157
+
+prefer_last = [
+ 0x2550,
+ 0x255E,
+ 0x2561,
+ 0x256A,
+ 0x5341,
+ 0x5345,
+]
+
+for code_point in prefer_last:
+ # Python lists don't have .rindex() :-(
+ for i in xrange(len(index) - 1, -1, -1):
+ candidate = index[i]
+ if candidate == code_point:
+ class_file.write(''' case 0x%04X:
+ return %d;
+''' % (code_point, i))
+ break
+
+class_file.write(''' default:
+ break;
+ }
+ }''')
+
+j = 0
+for (low, high) in ranges:
+ if high > hkscs_bound:
+ start = 0
+ if low <= hkscs_bound and hkscs_bound < high:
+ # This is the first range we don't ignore and the
+ # range that contains the first non-HKSCS pointer.
+ # Avoid searching HKSCS.
+ start = hkscs_bound - low
+ class_file.write('''
+ for (int i = %d; i < TABLE%d.length(); i++) {
+ if (TABLE%d.charAt(i) == lowBits) {
+ int pointer = i + %d;
+ if (isAstral == isAstral(pointer)) {
+ return pointer;
+ }
+ }
+ }''' % (start, j, j, low))
+ j += 1
+
+class_file.write('''
+ return 0;
+ }
+}
+''')
+class_file.close()
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml
new file mode 100644
index 000000000..1eab09c21
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml
@@ -0,0 +1,12 @@
+<module>
+ <inherits name="com.google.gwt.core.Core"/>
+ <inherits name="com.google.gwt.user.User"/>
+ <super-source path="translatable"/>
+ <source path="annotation"/>
+ <source path="common"/>
+ <source path="impl"/>
+ <source path="gwt"/>
+ <set-property name="user.agent" value="gecko1_8"/>
+ <entry-point class="nu.validator.htmlparser.gwt.HtmlParserModule"/>
+ <add-linker name="sso"/>
+</module>
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java
new file mode 100644
index 000000000..29ef2a43a
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.gwt;
+
+import java.util.LinkedList;
+
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.validator.htmlparser.impl.CoalescingTreeBuilder;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+
+import org.xml.sax.SAXException;
+
+import com.google.gwt.core.client.JavaScriptException;
+import com.google.gwt.core.client.JavaScriptObject;
+
+class BrowserTreeBuilder extends CoalescingTreeBuilder<JavaScriptObject> {
+
+ private JavaScriptObject document;
+
+ private JavaScriptObject script;
+
+ private JavaScriptObject placeholder;
+
+ private boolean readyToRun;
+
+ private final LinkedList<ScriptHolder> scriptStack = new LinkedList<ScriptHolder>();
+
+ private class ScriptHolder {
+ private final JavaScriptObject script;
+
+ private final JavaScriptObject placeholder;
+
+ /**
+ * @param script
+ * @param placeholder
+ */
+ public ScriptHolder(JavaScriptObject script,
+ JavaScriptObject placeholder) {
+ this.script = script;
+ this.placeholder = placeholder;
+ }
+
+ /**
+ * Returns the script.
+ *
+ * @return the script
+ */
+ public JavaScriptObject getScript() {
+ return script;
+ }
+
+ /**
+ * Returns the placeholder.
+ *
+ * @return the placeholder
+ */
+ public JavaScriptObject getPlaceholder() {
+ return placeholder;
+ }
+ }
+
+ protected BrowserTreeBuilder(JavaScriptObject document) {
+ super();
+ this.document = document;
+ installExplorerCreateElementNS(document);
+ }
+
+ private static native boolean installExplorerCreateElementNS(
+ JavaScriptObject doc) /*-{
+ if (!doc.createElementNS) {
+ doc.createElementNS = function (uri, local) {
+ if ("http://www.w3.org/1999/xhtml" == uri) {
+ return doc.createElement(local);
+ } else if ("http://www.w3.org/1998/Math/MathML" == uri) {
+ if (!doc.mathplayerinitialized) {
+ var obj = document.createElement("object");
+ obj.setAttribute("id", "mathplayer");
+ obj.setAttribute("classid", "clsid:32F66A20-7614-11D4-BD11-00104BD3F987");
+ document.getElementsByTagName("head")[0].appendChild(obj);
+ document.namespaces.add("m", "http://www.w3.org/1998/Math/MathML", "#mathplayer");
+ doc.mathplayerinitialized = true;
+ }
+ return doc.createElement("m:" + local);
+ } else if ("http://www.w3.org/2000/svg" == uri) {
+ if (!doc.renesisinitialized) {
+ var obj = document.createElement("object");
+ obj.setAttribute("id", "renesis");
+ obj.setAttribute("classid", "clsid:AC159093-1683-4BA2-9DCF-0C350141D7F2");
+ document.getElementsByTagName("head")[0].appendChild(obj);
+ document.namespaces.add("s", "http://www.w3.org/2000/svg", "#renesis");
+ doc.renesisinitialized = true;
+ }
+ return doc.createElement("s:" + local);
+ } else {
+ // throw
+ }
+ }
+ }
+ }-*/;
+
+ private static native boolean hasAttributeNS(JavaScriptObject element,
+ String uri, String localName) /*-{
+ return element.hasAttributeNS(uri, localName);
+ }-*/;
+
+ private static native void setAttributeNS(JavaScriptObject element,
+ String uri, String localName, String value) /*-{
+ element.setAttributeNS(uri, localName, value);
+ }-*/;
+
+ @Override protected void addAttributesToElement(JavaScriptObject element,
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String localName = attributes.getLocalNameNoBoundsCheck(i);
+ String uri = attributes.getURINoBoundsCheck(i);
+ if (!hasAttributeNS(element, uri, localName)) {
+ setAttributeNS(element, uri, localName,
+ attributes.getValueNoBoundsCheck(i));
+ }
+ }
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ private static native void appendChild(JavaScriptObject parent,
+ JavaScriptObject child) /*-{
+ parent.appendChild(child);
+ }-*/;
+
+ private static native JavaScriptObject createTextNode(JavaScriptObject doc,
+ String text) /*-{
+ return doc.createTextNode(text);
+ }-*/;
+
+ private static native JavaScriptObject getLastChild(JavaScriptObject node) /*-{
+ return node.lastChild;
+ }-*/;
+
+ private static native void extendTextNode(JavaScriptObject node, String text) /*-{
+ node.data += text;
+ }-*/;
+
+ @Override protected void appendCharacters(JavaScriptObject parent,
+ String text) throws SAXException {
+ try {
+ if (parent == placeholder) {
+ appendChild(script, createTextNode(document, text));
+
+ }
+ JavaScriptObject lastChild = getLastChild(parent);
+ if (lastChild != null && getNodeType(lastChild) == 3) {
+ extendTextNode(lastChild, text);
+ return;
+ }
+ appendChild(parent, createTextNode(document, text));
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ private static native boolean hasChildNodes(JavaScriptObject element) /*-{
+ return element.hasChildNodes();
+ }-*/;
+
+ private static native JavaScriptObject getFirstChild(
+ JavaScriptObject element) /*-{
+ return element.firstChild;
+ }-*/;
+
+ @Override protected void appendChildrenToNewParent(
+ JavaScriptObject oldParent, JavaScriptObject newParent)
+ throws SAXException {
+ try {
+ while (hasChildNodes(oldParent)) {
+ appendChild(newParent, getFirstChild(oldParent));
+ }
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ private static native JavaScriptObject createComment(JavaScriptObject doc,
+ String text) /*-{
+ return doc.createComment(text);
+ }-*/;
+
+ @Override protected void appendComment(JavaScriptObject parent,
+ String comment) throws SAXException {
+ try {
+ if (parent == placeholder) {
+ appendChild(script, createComment(document, comment));
+ }
+ appendChild(parent, createComment(document, comment));
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void appendCommentToDocument(String comment)
+ throws SAXException {
+ try {
+ appendChild(document, createComment(document, comment));
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ private static native JavaScriptObject createElementNS(
+ JavaScriptObject doc, String ns, String local) /*-{
+ return doc.createElementNS(ns, local);
+ }-*/;
+
+ @Override protected JavaScriptObject createElement(String ns, String name,
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ JavaScriptObject rv = createElementNS(document, ns, name);
+ for (int i = 0; i < attributes.getLength(); i++) {
+ setAttributeNS(rv, attributes.getURINoBoundsCheck(i),
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i));
+ }
+
+ if ("script" == name) {
+ if (placeholder != null) {
+ scriptStack.addLast(new ScriptHolder(script, placeholder));
+ }
+ script = rv;
+ placeholder = createElementNS(document,
+ "http://n.validator.nu/placeholder/", "script");
+ rv = placeholder;
+ for (int i = 0; i < attributes.getLength(); i++) {
+ setAttributeNS(rv, attributes.getURINoBoundsCheck(i),
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i));
+ }
+ }
+
+ return rv;
+ } catch (JavaScriptException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override protected JavaScriptObject createHtmlElementSetAsRoot(
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ JavaScriptObject rv = createElementNS(document,
+ "http://www.w3.org/1999/xhtml", "html");
+ for (int i = 0; i < attributes.getLength(); i++) {
+ setAttributeNS(rv, attributes.getURINoBoundsCheck(i),
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i));
+ }
+ appendChild(document, rv);
+ return rv;
+ } catch (JavaScriptException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ private static native JavaScriptObject getParentNode(
+ JavaScriptObject element) /*-{
+ return element.parentNode;
+ }-*/;
+
+ @Override protected void appendElement(JavaScriptObject child,
+ JavaScriptObject newParent) throws SAXException {
+ try {
+ if (newParent == placeholder) {
+ appendChild(script, cloneNodeDeep(child));
+ }
+ appendChild(newParent, child);
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected boolean hasChildren(JavaScriptObject element)
+ throws SAXException {
+ try {
+ return hasChildNodes(element);
+ } catch (JavaScriptException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ private static native void insertBeforeNative(JavaScriptObject parent,
+ JavaScriptObject child, JavaScriptObject sibling) /*-{
+ parent.insertBefore(child, sibling);
+ }-*/;
+
+ private static native int getNodeType(JavaScriptObject node) /*-{
+ return node.nodeType;
+ }-*/;
+
+ private static native JavaScriptObject cloneNodeDeep(JavaScriptObject node) /*-{
+ return node.cloneNode(true);
+ }-*/;
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ JavaScriptObject getDocument() {
+ JavaScriptObject rv = document;
+ document = null;
+ return rv;
+ }
+
+ private static native JavaScriptObject createDocumentFragment(
+ JavaScriptObject doc) /*-{
+ return doc.createDocumentFragment();
+ }-*/;
+
+ JavaScriptObject getDocumentFragment() {
+ JavaScriptObject rv = createDocumentFragment(document);
+ JavaScriptObject rootElt = getFirstChild(document);
+ while (hasChildNodes(rootElt)) {
+ appendChild(rv, getFirstChild(rootElt));
+ }
+ document = null;
+ return rv;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createJavaScriptObject(String,
+ * java.lang.String, org.xml.sax.Attributes, java.lang.Object)
+ */
+ @Override protected JavaScriptObject createElement(String ns, String name,
+ HtmlAttributes attributes, JavaScriptObject form)
+ throws SAXException {
+ try {
+ JavaScriptObject rv = createElement(ns, name, attributes);
+ // rv.setUserData("nu.validator.form-pointer", form, null);
+ return rv;
+ } catch (JavaScriptException e) {
+ fatal(e);
+ return null;
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override protected void start(boolean fragment) throws SAXException {
+ script = null;
+ placeholder = null;
+ readyToRun = false;
+ }
+
+ protected void documentMode(DocumentMode mode, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+ // document.setUserData("nu.validator.document-mode", mode, null);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String,
+ * java.lang.String, java.lang.Object)
+ */
+ @Override protected void elementPopped(String ns, String name,
+ JavaScriptObject node) throws SAXException {
+ if (node == placeholder) {
+ readyToRun = true;
+ requestSuspension();
+ }
+ }
+
+ private static native void replace(JavaScriptObject oldNode,
+ JavaScriptObject newNode) /*-{
+ oldNode.parentNode.replaceChild(newNode, oldNode);
+ }-*/;
+
+ private static native JavaScriptObject getPreviousSibling(JavaScriptObject node) /*-{
+ return node.previousSibling;
+ }-*/;
+
+ void maybeRunScript() {
+ if (readyToRun) {
+ readyToRun = false;
+ replace(placeholder, script);
+ if (scriptStack.isEmpty()) {
+ script = null;
+ placeholder = null;
+ } else {
+ ScriptHolder scriptHolder = scriptStack.removeLast();
+ script = scriptHolder.getScript();
+ placeholder = scriptHolder.getPlaceholder();
+ }
+ }
+ }
+
+ @Override protected void insertFosterParentedCharacters(String text,
+ JavaScriptObject table, JavaScriptObject stackParent)
+ throws SAXException {
+ try {
+ JavaScriptObject parent = getParentNode(table);
+ if (parent != null) { // always an element if not null
+ JavaScriptObject previousSibling = getPreviousSibling(table);
+ if (previousSibling != null
+ && getNodeType(previousSibling) == 3) {
+ extendTextNode(previousSibling, text);
+ return;
+ }
+ insertBeforeNative(parent, createTextNode(document, text), table);
+ return;
+ }
+ JavaScriptObject lastChild = getLastChild(stackParent);
+ if (lastChild != null && getNodeType(lastChild) == 3) {
+ extendTextNode(lastChild, text);
+ return;
+ }
+ appendChild(stackParent, createTextNode(document, text));
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void insertFosterParentedChild(JavaScriptObject child,
+ JavaScriptObject table, JavaScriptObject stackParent)
+ throws SAXException {
+ JavaScriptObject parent = getParentNode(table);
+ try {
+ if (parent != null && getNodeType(parent) == 1) {
+ insertBeforeNative(parent, child, table);
+ } else {
+ appendChild(stackParent, child);
+ }
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+
+ private static native void removeChild(JavaScriptObject parent,
+ JavaScriptObject child) /*-{
+ parent.removeChild(child);
+ }-*/;
+
+ @Override protected void detachFromParent(JavaScriptObject element)
+ throws SAXException {
+ try {
+ JavaScriptObject parent = getParentNode(element);
+ if (parent != null) {
+ removeChild(parent, element);
+ }
+ } catch (JavaScriptException e) {
+ fatal(e);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java
new file mode 100644
index 000000000..1d71cdfd6
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.gwt;
+
+import java.util.LinkedList;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.impl.UTF16Buffer;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import com.google.gwt.core.client.JavaScriptObject;
+import com.google.gwt.user.client.Timer;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the DOM
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser treats XML 1.0-incompatible infosets as fatal errors.
+ * This corresponds to
+ * <code>FATAL</code> as the general XML violation policy. To make the parser
+ * support non-conforming HTML fully per the HTML 5 spec while on the other
+ * hand potentially violating the DOM API contract, set the general XML
+ * violation policy to <code>ALLOW</code>. This does not work with a standard
+ * DOM implementation. Handling all input without fatal errors and without
+ * violating the DOM API contract is possible by setting
+ * the general XML violation policy to <code>ALTER_INFOSET</code>. <em>This
+ * makes the parser non-conforming</em> but is probably the most useful
+ * setting for most applications.
+ *
+ * <p>The doctype is not represented in the tree.
+ *
+ * <p>The document mode is represented as user data <code>DocumentMode</code>
+ * object with the key <code>nu.validator.document-mode</code> on the document
+ * node.
+ *
+ * <p>The form pointer is also stored as user data with the key
+ * <code>nu.validator.form-pointer</code>.
+ *
+ * @version $Id: HtmlDocumentBuilder.java 255 2008-05-29 08:57:38Z hsivonen $
+ * @author hsivonen
+ */
+public class HtmlParser {
+
+ private static final int CHUNK_SIZE = 512;
+
+ private final Tokenizer tokenizer;
+
+ private final BrowserTreeBuilder domTreeBuilder;
+
+ private final StringBuilder documentWriteBuffer = new StringBuilder();
+
+ private ErrorHandler errorHandler;
+
+ private UTF16Buffer stream;
+
+ private int streamLength;
+
+ private boolean lastWasCR;
+
+ private boolean ending;
+
+ private ParseEndListener parseEndListener;
+
+ private final LinkedList<UTF16Buffer> bufferStack = new LinkedList<UTF16Buffer>();
+
+ /**
+ * Instantiates the parser
+ *
+ * @param implementation
+ * the DOM implementation
+ * @param xmlPolicy the policy
+ */
+ public HtmlParser(JavaScriptObject document) {
+ this.domTreeBuilder = new BrowserTreeBuilder(document);
+ this.tokenizer = new ErrorReportingTokenizer(domTreeBuilder);
+ this.domTreeBuilder.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
+ this.tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ this.tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ this.tokenizer.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET);
+ this.tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
+ this.tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ }
+
+ /**
+ * Parses a document from a SAX <code>InputSource</code>.
+ * @param is the source
+ * @return the doc
+ * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource)
+ */
+ public void parse(String source, ParseEndListener callback) throws SAXException {
+ parseEndListener = callback;
+ domTreeBuilder.setFragmentContext(null);
+ tokenize(source, null);
+ }
+
+ /**
+ * @param is
+ * @throws SAXException
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ private void tokenize(String source, String context) throws SAXException {
+ lastWasCR = false;
+ ending = false;
+ documentWriteBuffer.setLength(0);
+ streamLength = source.length();
+ stream = new UTF16Buffer(source.toCharArray(), 0,
+ (streamLength < CHUNK_SIZE ? streamLength : CHUNK_SIZE));
+ bufferStack.clear();
+ push(stream);
+ domTreeBuilder.setFragmentContext(context == null ? null : context.intern());
+ tokenizer.start();
+ pump();
+ }
+
+ private void pump() throws SAXException {
+ if (ending) {
+ tokenizer.end();
+ domTreeBuilder.getDocument(); // drops the internal reference
+ parseEndListener.parseComplete();
+ // Don't schedule timeout
+ return;
+ }
+
+ int docWriteLen = documentWriteBuffer.length();
+ if (docWriteLen > 0) {
+ char[] newBuf = new char[docWriteLen];
+ documentWriteBuffer.getChars(0, docWriteLen, newBuf, 0);
+ push(new UTF16Buffer(newBuf, 0, docWriteLen));
+ documentWriteBuffer.setLength(0);
+ }
+
+ for (;;) {
+ UTF16Buffer buffer = peek();
+ if (!buffer.hasMore()) {
+ if (buffer == stream) {
+ if (buffer.getEnd() == streamLength) {
+ // Stop parsing
+ tokenizer.eof();
+ ending = true;
+ break;
+ } else {
+ int newEnd = buffer.getStart() + CHUNK_SIZE;
+ buffer.setEnd(newEnd < streamLength ? newEnd
+ : streamLength);
+ continue;
+ }
+ } else {
+ pop();
+ continue;
+ }
+ }
+ // now we have a non-empty buffer
+ buffer.adjust(lastWasCR);
+ lastWasCR = false;
+ if (buffer.hasMore()) {
+ lastWasCR = tokenizer.tokenizeBuffer(buffer);
+ domTreeBuilder.maybeRunScript();
+ break;
+ } else {
+ continue;
+ }
+ }
+
+ // schedule
+ Timer timer = new Timer() {
+
+ @Override public void run() {
+ try {
+ pump();
+ } catch (SAXException e) {
+ ending = true;
+ if (errorHandler != null) {
+ try {
+ errorHandler.fatalError(new SAXParseException(
+ e.getMessage(), null, null, -1, -1, e));
+ } catch (SAXException e1) {
+ }
+ }
+ }
+ }
+
+ };
+ timer.schedule(1);
+ }
+
+ private void push(UTF16Buffer buffer) {
+ bufferStack.addLast(buffer);
+ }
+
+ private UTF16Buffer peek() {
+ return bufferStack.getLast();
+ }
+
+ private void pop() {
+ bufferStack.removeLast();
+ }
+
+ public void documentWrite(String text) throws SAXException {
+ UTF16Buffer buffer = new UTF16Buffer(text.toCharArray(), 0, text.length());
+ while (buffer.hasMore()) {
+ buffer.adjust(lastWasCR);
+ lastWasCR = false;
+ if (buffer.hasMore()) {
+ lastWasCR = tokenizer.tokenizeBuffer(buffer);
+ domTreeBuilder.maybeRunScript();
+ }
+ }
+ }
+
+ /**
+ * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler errorHandler) {
+ this.errorHandler = errorHandler;
+ domTreeBuilder.setErrorHandler(errorHandler);
+ tokenizer.setErrorHandler(errorHandler);
+ }
+
+ /**
+ * Sets whether comment nodes appear in the tree.
+ * @param ignoreComments <code>true</code> to ignore comments
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
+ */
+ public void setIgnoringComments(boolean ignoreComments) {
+ domTreeBuilder.setIgnoringComments(ignoreComments);
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ domTreeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java
new file mode 100644
index 000000000..255a02d13
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.gwt;
+
+import org.xml.sax.SAXException;
+
+import com.google.gwt.core.client.EntryPoint;
+import com.google.gwt.core.client.JavaScriptObject;
+
+public class HtmlParserModule implements EntryPoint {
+
+ private static native void zapChildren(JavaScriptObject node) /*-{
+ while (node.hasChildNodes()) {
+ node.removeChild(node.lastChild);
+ }
+ }-*/;
+
+ private static native void installDocWrite(JavaScriptObject doc, HtmlParser parser) /*-{
+ doc.write = function() {
+ if (arguments.length == 0) {
+ return;
+ }
+ var text = arguments[0];
+ for (var i = 1; i < arguments.length; i++) {
+ text += arguments[i];
+ }
+ parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text);
+ }
+ doc.writeln = function() {
+ if (arguments.length == 0) {
+ parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)("\n");
+ return;
+ }
+ var text = arguments[0];
+ for (var i = 1; i < arguments.length; i++) {
+ text += arguments[i];
+ }
+ text += "\n";
+ parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text);
+ }
+ }-*/;
+
+ @SuppressWarnings("unused")
+ private static void parseHtmlDocument(String source, JavaScriptObject document, JavaScriptObject readyCallback, JavaScriptObject errorHandler) throws SAXException {
+ if (readyCallback == null) {
+ readyCallback = JavaScriptObject.createFunction();
+ }
+ zapChildren(document);
+ HtmlParser parser = new HtmlParser(document);
+ parser.setScriptingEnabled(true);
+ // XXX error handler
+
+ installDocWrite(document, parser);
+
+ parser.parse(source, new ParseEndListener(readyCallback));
+ }
+
+ private static native void exportEntryPoints() /*-{
+ $wnd.parseHtmlDocument = @nu.validator.htmlparser.gwt.HtmlParserModule::parseHtmlDocument(Ljava/lang/String;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;);
+ }-*/;
+
+
+ public void onModuleLoad() {
+ exportEntryPoints();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java
new file mode 100644
index 000000000..43235c5be
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.gwt;
+
+import com.google.gwt.core.client.JavaScriptObject;
+
+public class ParseEndListener {
+
+ private final JavaScriptObject callback;
+
+ /**
+ * @param callback
+ */
+ public ParseEndListener(JavaScriptObject callback) {
+ this.callback = callback;
+ }
+
+ public void parseComplete() {
+ call(callback);
+ }
+
+ private static native void call(JavaScriptObject callback) /*-{
+ callback();
+ }-*/;
+
+}
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html
new file mode 100644
index 000000000..4d9cde81c
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html
@@ -0,0 +1,225 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Live DOM Viewer</title>
+ <script type="text/javascript" language="javascript" src="nu.validator.htmlparser.HtmlParser.nocache.js"></script>
+ <style>
+ h1 { margin: 0; }
+ h2 { font-size: small; margin: 1em 0 0; }
+ p, ul, pre { margin: 0; }
+ p { border: inset thin; }
+ textarea { width: 100%; -width: 99%; height: 8em; border: 0; }
+ iframe { width: 100%; height: 12em; border: 0; }
+/* iframe.large { height: 24em; } */
+ pre { border: inset thin; padding: 0.5em; color: gray; }
+ pre samp { color: black; }
+ #dom { border: inset thin; padding: 0.5em 0.5em 0.5em 1em; color: black; min-height: 5em; font-family: monospace; background: white; }
+ #dom ul { padding: 0 0 0 1em; margin: 0; }
+ #dom li { padding: 0; margin: 0; list-style: none; position: relative; }
+ #dom li li { list-style: disc; }
+ #dom .t1 code { color: purple; font-weight: bold; }
+ #dom .t2 { font-style: normal; font-family: monospace; }
+ #dom .t2 .name { color: black; font-weight: bold; }
+ #dom .t2 .value { color: blue; font-weight: normal; }
+ #dom .t3 code, #dom .t4 code, #dom .t5 code { color: gray; }
+ #dom .t7 code, #dom .t8 code { color: green; }
+ #dom span { font-style: italic; font-family: serif; }
+ #dom .t10 code { color: teal; }
+ #dom .misparented, #dom .misparented code { color: red; font-weight: bold; }
+ #dom.hidden, .hidden { visibility: hidden; margin: 0.5em 0; padding: 0; height: 0; min-height: 0; }
+ pre#log { color: black; font: small monospace; }
+ script + p { border: none; font-size: smaller; margin: 0.8em 0.3em; }
+ </style>
+ <style title="Tree View">
+ #dom li li { list-style: none; }
+ #dom li:first-child::before { position: absolute; top: 0; height: 0.6em; left: -0.75em; width: 0.5em; border-style: none none solid solid; content: ''; border-width: 0.1em; }
+ #dom li:not(:last-child)::after { position: absolute; top: 0; bottom: -0.6em; left: -0.75em; width: 0.5em; border-style: none none solid solid; content: ''; border-width: 0.1em; }
+ </style>
+ <script>
+ if (navigator.userAgent.match('Gecko/(\\d+)') && RegExp.$1 == '20060217' && RegExp.$1 != '00000000') {
+ var style = document.getElementsByTagName('style')[1];
+ style.parentNode.removeChild(style);
+ }
+ </script>
+ </head>
+ <body onload="init()">
+ <h1>Live DOM Viewer</h1>
+ <h2>Markup to test (<a href="data:," id="permalink" rel="bookmark">permalink</a>, <a href="javascript:up()">upload</a>, <a href="javascript:down()">download</a>, <a href="#" onclick="toggleVisibility(this); return false">hide</a>): <span id="updown-status"></span></h2>
+ <p><textarea oninput="updateInput(event)" onkeydown="updateInput(event)">&lt;!DOCTYPE html>
+...</textarea></p>
+ <h2><a href="data:," id="domview">DOM view</a> (<a href="#" onclick="toggleVisibility(this); return false;">hide</a>, <a href="#" onclick="updateDOM()">refresh</a>):</h2>
+ <ul id="dom"></ul>
+ <h2><a href="data:," id="link">Rendered view</a>: (<a href="#" onclick="toggleVisibility(this); return false;">hide</a><!--, <a href="#" onclick="grow(this)">grow</a>-->):</h2>
+ <p><iframe src="blank.html"></iframe></p> <!-- data:, -->
+ <h2>innerHTML view: (<a href="#" onclick="toggleVisibility(this); return false;">show</a>, <a href="#" onclick="updateDOM()">refresh</a>):</h2>
+ <pre class="hidden">&lt;!DOCTYPE HTML>&lt;html><samp></samp>&lt;/html></pre>
+ <h2>Log: (<a href="#" onclick="toggleVisibility(this); return false;">hide</a>):</h2>
+ <pre id="log">Script not loaded.</pre>
+ <script>
+ var iframe = document.getElementsByTagName('iframe')[0];
+ var textarea = document.getElementsByTagName('textarea')[0];
+ var pre = document.getElementsByTagName('samp')[0];
+ var dom = document.getElementsByTagName('ul')[0];
+ var log = document.getElementById('log');
+ var updownStatus = document.getElementById('updown-status');
+ var delayedUpdater = 0;
+ var lastString = '';
+ var logBuffer = '';
+ var logBuffering = false;
+ function updateInput(event) {
+ if (delayedUpdater) {
+ clearTimeout(delayedUpdater);
+ delayedUpdater = 0;
+ }
+ delayedUpdater = setTimeout(update, 100);
+ }
+ function afterParse() {
+ lastString = textarea.value;
+ setTimeout(updateDOM, 100);
+ updown('');
+ }
+ function update() {
+ if (lastString != textarea.value) {
+ logBuffering = true;
+ document.getElementById('link').href = 'data:text/html;charset=utf-8,' + encodeURIComponent(textarea.value);
+ iframe.contentWindow.onerror = function (a, b, c) {
+ record('error: ' + a + ' on line ' + c);
+ }
+ iframe.contentWindow.w = function (s) {
+ record('log: ' + s);
+ }
+ window.parseHtmlDocument(textarea.value, iframe.contentWindow.document, afterParse, null);
+ }
+ }
+ function updateDOM() {
+ while (pre.firstChild) pre.removeChild(pre.firstChild);
+ pre.appendChild(document.createTextNode(iframe.contentWindow.document.documentElement.innerHTML));
+ printDOM(dom, iframe.contentWindow.document);
+ document.getElementById('domview').href = 'data:text/plain;charset=utf-8,<ul class="domTree">' + encodeURIComponent(dom.innerHTML + '</ul>');
+ document.getElementById('permalink').href = '?' + encodeURIComponent(textarea.value);
+ record('rendering mode: ' + iframe.contentWindow.document.compatMode);
+ if (iframe.contentWindow.document.title)
+ record('document.title: ' + iframe.contentWindow.document.title);
+ else
+ record('document has no title');
+ while (log.firstChild != log.lastChild)
+ log.removeChild(log.lastChild);
+ log.firstChild.data = logBuffer;
+ logBuffering = false;
+ logBuffer = '';
+ }
+ function printDOM(ul, node) {
+ while (ul.firstChild) ul.removeChild(ul.firstChild);
+ for (var i = 0; i < node.childNodes.length; i += 1) {
+ var li = document.createElement('li');
+ li.className = 't' + node.childNodes[i].nodeType;
+ if (node.childNodes[i].nodeType == 10) {
+ li.appendChild(document.createTextNode('DOCTYPE: '));
+ }
+ var code = document.createElement('code');
+ code.appendChild(document.createTextNode(node.childNodes[i].nodeName));
+ li.appendChild(code);
+ if (node.childNodes[i].nodeValue) {
+ var span = document.createElement('span');
+ span.appendChild(document.createTextNode(node.childNodes[i].nodeValue));
+ li.appendChild(document.createTextNode(': '));
+ li.appendChild(span);
+ }
+ if (node.childNodes[i].attributes)
+ for (var j = 0; j < node.childNodes[i].attributes.length; j += 1) {
+ if (node.childNodes[i].attributes[j].specified) {
+ var attName = document.createElement('code');
+ attName.appendChild(document.createTextNode(node.childNodes[i].attributes[j].nodeName));
+ attName.className = 'attribute name';
+ var attValue = document.createElement('code');
+ attValue.appendChild(document.createTextNode(node.childNodes[i].attributes[j].nodeValue));
+ attValue.className = 'attribute value';
+ var att = document.createElement('span');
+ att.className = 't2';
+ att.appendChild(attName);
+ att.appendChild(document.createTextNode('="'));
+ att.appendChild(attValue);
+ att.appendChild(document.createTextNode('"'));
+ li.appendChild(document.createTextNode(' '));
+ li.appendChild(att);
+ }
+ }
+ if (node.childNodes[i].parentNode == node) {
+ if (node.childNodes[i].childNodes.length) {
+ var ul2 = document.createElement('ul');
+ li.appendChild(ul2);
+ printDOM(ul2, node.childNodes[i]);
+ }
+ } else {
+ li.className += ' misparented';
+ }
+ ul.appendChild(li);
+ }
+ }
+ function toggleVisibility(link) {
+ var n = link.parentNode.nextSibling;
+ if (n.nodeType == 3 /* text node */) n = n.nextSibling; // we should always do this but in IE, text nodes vanish
+ n.className = (n.className == "hidden") ? '' : 'hidden';
+ link.firstChild.data = n.className == "hidden" ? "show" : "hide";
+ }
+/*
+ function grow(link) {
+ var n = link.parentNode.nextSibling;
+ if (n.nodeType == 3 /-* text node *-/) n = n.nextSibling; // we should always do this but in IE, text nodes vanish
+ n.className = (n.className == "large") ? '' : 'large';
+ link.firstChild.data = n.className == "grow" ? "shrink" : "grow";
+ }
+*/
+ function down() {
+ updown('downloading...');
+ var request = window.XMLHttpRequest ? new XMLHttpRequest() : new ActiveXObject("Microsoft.XMLHTTP");
+ request.onreadystatechange = function () {
+ updown('downloading... ' + request.readyState + '/4');
+ if (request.readyState == 4) {
+ textarea.value = request.responseText;
+ update();
+ updown('downloaded');
+ }
+ };
+ request.open('GET', 'clipboard.cgi', true);
+ request.send(null);
+ }
+ function up() {
+ updown('uploading...');
+ var request = window.XMLHttpRequest ? new XMLHttpRequest() : new ActiveXObject("Microsoft.XMLHTTP");
+ request.onreadystatechange = function () {
+ updown('uploading... ' + request.readyState + '/4');
+ if (request.readyState == 4) {
+ updown('uploaded');
+ }
+ };
+ request.open('POST', 'clipboard.cgi', true);
+ request.setRequestHeader('Content-Type', 'text/plain');
+ request.send(textarea.value);
+ }
+ function init() {
+ var uri = location.search;
+ if (uri)
+ textarea.value = decodeURIComponent(uri.substring(1, uri.length));
+ update();
+ }
+ function record(s) {
+ if (logBuffering)
+ logBuffer += s + '\r\n';
+ else
+ log.appendChild(document.createTextNode(s + '\r\n'));
+ }
+ function updown(s) {
+ while (updownStatus.firstChild) updownStatus.removeChild(updownStatus.firstChild);
+ updownStatus.appendChild(document.createTextNode(s));
+ }
+ </script>
+ <p>This script puts a function <code>w(<var>s</var>)</code> into the
+ global scope of the test page, where <var>s</vaR> is a string to
+ output to the log. Also, five files are accessible in the current
+ directory for test purposes: <code>image</code> (a GIF image),
+ <code>flash</code> (a Flash file), <code>script</code> (a JS file),
+ <code>style</code> (a CSS file), and <code>document</code> (an HTML
+ file).</p>
+ </body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt
new file mode 100644
index 000000000..bd2f4fcf1
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt
@@ -0,0 +1,25 @@
+From:
+http://software.hixie.ch/utilities/js/live-dom-viewer/LICENSE
+regarding the upstream of HtmlParser.html:
+
+The MIT License
+
+Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html
new file mode 100644
index 000000000..a8756c9f7
--- /dev/null
+++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html
@@ -0,0 +1,2 @@
+<!DOCTYPE html>
+<title></title> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/README.txt b/parser/html/java/htmlparser/mozilla-export-scripts/README.txt
new file mode 100644
index 000000000..3567b846c
--- /dev/null
+++ b/parser/html/java/htmlparser/mozilla-export-scripts/README.txt
@@ -0,0 +1,25 @@
+These scripts export the Java-to-C++ translator and the java source files that
+implement the HTML5 parser. The exported translator may be used (with no
+external dependencies) to translate the exported java source files into Gecko-
+compatible C++.
+
+Hacking the translator itself still requires a working copy of the Java HTML5
+parser repository, but hacking the parser (modifying the Java source files and
+performing the translation) should now be possible using only files committed
+to the mozilla source tree.
+
+Run any of these scripts without arguments to receive usage instructions.
+
+ make-translator-jar.sh: compiles the Java-to-C++ translator into a .jar file
+ export-java-srcs.sh: exports minimal java source files implementing the
+ HTML5 parser
+ export-translator.sh: exports the compiled translator and javaparser.jar
+ export-all.sh: runs the previous two scripts
+ util.sh: provides various shell utility functions to the
+ scripts listed above (does nothing if run directly)
+
+All path arguments may be either absolute or relative. This includes the path
+to the script itself ($0), so the directory from which you run these scripts
+doesn't matter.
+
+Ben Newman (7 July 2009)
diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh b/parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh
new file mode 100644
index 000000000..9ae07d33d
--- /dev/null
+++ b/parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env sh
+
+SCRIPT_DIR=`dirname $0`
+source $SCRIPT_DIR/util.sh
+SCRIPT_DIR=`abs $SCRIPT_DIR`
+
+if [ $# -eq 1 ]
+then
+ MOZ_PARSER_PATH=`abs $1`
+else
+ echo
+ echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html"
+ echo "Note that relative paths will work just fine."
+ echo
+ exit 1
+fi
+
+$SCRIPT_DIR/export-translator.sh $MOZ_PARSER_PATH
+$SCRIPT_DIR/export-java-srcs.sh $MOZ_PARSER_PATH
+
+echo
+echo "Now go to $MOZ_PARSER_PATH and run"
+echo " java -jar javalib/translator.jar javasrc . nsHtml5AtomList.h"
+echo
diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh b/parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh
new file mode 100644
index 000000000..6d32b07da
--- /dev/null
+++ b/parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env sh
+
+SCRIPT_DIR=`dirname $0`
+source $SCRIPT_DIR/util.sh
+SCRIPT_DIR=`abs $SCRIPT_DIR`
+
+SRCDIR=`abs $SCRIPT_DIR/../src/nu/validator/htmlparser/impl`
+
+if [ $# -eq 1 ]
+then
+ MOZ_PARSER_PATH=`abs $1`
+else
+ echo
+ echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html"
+ echo "Note that relative paths will work just fine."
+ echo
+ exit 1
+fi
+
+SRCTARGET=$MOZ_PARSER_PATH/javasrc
+
+rm -rf $SRCTARGET
+mkdir $SRCTARGET
+# Avoid copying the .svn directory:
+cp -rv $SRCDIR/*.java $SRCTARGET
diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh b/parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh
new file mode 100644
index 000000000..d1f4f1c39
--- /dev/null
+++ b/parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env sh
+
+SCRIPT_DIR=`dirname $0`
+source $SCRIPT_DIR/util.sh
+SCRIPT_DIR=`abs $SCRIPT_DIR`
+
+LIBDIR=`abs $SCRIPT_DIR/../translator-lib`
+
+if [ $# -eq 1 ]
+then
+ MOZ_PARSER_PATH=`abs $1`
+else
+ echo
+ echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html"
+ echo "Note that relative paths will work just fine."
+ echo "Be sure that you have run `dirname $0`/make-translator-jar.sh before running this script."
+ echo
+ exit 1
+fi
+
+LIBTARGET=$MOZ_PARSER_PATH/javalib
+
+rm -rf $LIBTARGET
+cp -rv $LIBDIR $LIBTARGET
diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh b/parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh
new file mode 100644
index 000000000..4f21ae665
--- /dev/null
+++ b/parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env sh
+
+SCRIPT_DIR=`dirname $0`
+source $SCRIPT_DIR/util.sh
+SCRIPT_DIR=`abs $SCRIPT_DIR`
+
+SRCDIR=`abs $SCRIPT_DIR/../translator-src`
+BINDIR=`abs $SCRIPT_DIR/../translator-bin`
+LIBDIR=`abs $SCRIPT_DIR/../translator-lib`
+
+if [ $# -eq 1 ]
+then
+ JAVAPARSER_JAR_PATH=`abs $1`
+else
+ echo
+ echo "Usage: sh `basename $0` /path/to/javaparser-1.0.7.jar"
+ echo "Note that relative paths will work just fine."
+ echo "Obtain javaparser-1.0.7.jar from http://code.google.com/p/javaparser"
+ echo
+ exit 1
+fi
+
+set_up() {
+ rm -rf $BINDIR; mkdir $BINDIR
+ rm -rf $LIBDIR; mkdir $LIBDIR
+ cp $JAVAPARSER_JAR_PATH $LIBDIR/javaparser.jar
+}
+
+write_manifest() {
+ rm -f $LIBDIR/manifest
+ echo "Main-Class: nu.validator.htmlparser.cpptranslate.Main" > $LIBDIR/manifest
+ echo "Class-Path: javaparser.jar" >> $LIBDIR/manifest
+}
+
+compile_translator() {
+ find $SRCDIR -name "*.java" | \
+ xargs javac -cp $LIBDIR/javaparser.jar -g -d $BINDIR
+}
+
+generate_jar() {
+ jar cvfm $LIBDIR/translator.jar $LIBDIR/manifest -C $BINDIR .
+}
+
+clean_up() {
+ rm -f $LIBDIR/manifest
+}
+
+success_message() {
+ echo
+ echo "Successfully generated directory \"$LIBDIR\" with contents:"
+ echo
+ ls -al $LIBDIR
+ echo
+ echo "Now run `dirname $0`/export-all.sh with no arguments and follow the usage instructions."
+ echo
+}
+
+set_up && \
+ compile_translator && \
+ write_manifest && \
+ generate_jar && \
+ clean_up && \
+ success_message
diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/util.sh b/parser/html/java/htmlparser/mozilla-export-scripts/util.sh
new file mode 100644
index 000000000..348ca14f9
--- /dev/null
+++ b/parser/html/java/htmlparser/mozilla-export-scripts/util.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env sh
+
+abs() {
+ local rel
+ local p
+ if [ $# -ne 1 ]
+ then
+ rel=.
+ else
+ rel=$1
+ fi
+ if [ -d $rel ]
+ then
+ pushd $rel > /dev/null
+ p=`pwd`
+ popd > /dev/null
+ else
+ pushd `dirname $rel` > /dev/null
+ p=`pwd`/`basename $rel`
+ popd > /dev/null
+ fi
+ echo $p
+}
diff --git a/parser/html/java/htmlparser/pom.xml b/parser/html/java/htmlparser/pom.xml
new file mode 100644
index 000000000..41f46725f
--- /dev/null
+++ b/parser/html/java/htmlparser/pom.xml
@@ -0,0 +1,240 @@
+<!--
+ * Copyright (c) 2007-2012 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>nu.validator.htmlparser</groupId>
+ <artifactId>htmlparser</artifactId>
+ <packaging>bundle</packaging>
+ <version>1.4</version>
+ <name>htmlparser</name>
+ <url>http://about.validator.nu/htmlparser/</url>
+ <description>The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser.</description>
+ <!--
+ Usage notes for this POM:
+
+ To build without signing, run:
+ mvn clean source:jar javadoc:jar repository:bundle-create
+ (enter 0 <return> when prompted)
+
+ To build and sign, run:
+ mvn clean source:jar javadoc:jar package gpg:sign repository:bundle-create
+ (enter 0 <return> when prompted)
+
+ This POM file is used for creating the bundle for distribution via the
+ Maven Central Repository. It is not used as part of the normal development
+ process of the parser and the maintainer of the parser (Henri Sivonen)
+ isn't experienced in POM tweaking. If you need this POM to do something
+ that it currently does not do or do something better, you need to write
+ the changes you need yourself and contribute a patch via
+ http://bugzilla.validator.nu/
+ -->
+ <developers>
+ <developer>
+ <id>hsivonen</id>
+ <name>Henri Sivonen</name>
+ <email>hsivonen@iki.fi</email>
+ <url>http://hsivonen.iki.fi/</url>
+ </developer>
+ </developers>
+ <licenses>
+ <license>
+ <name>The MIT License</name>
+ <url>http://www.opensource.org/licenses/mit-license.php</url>
+ <distribution>repo</distribution>
+ </license>
+ <license>
+ <name>The (New) BSD License</name>
+ <url>http://www.opensource.org/licenses/bsd-license.php</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+ <scm>
+ <connection>scm:hg:http://hg.mozilla.org/projects/htmlparser/</connection>
+ <url>http://hg.mozilla.org/projects/htmlparser/</url>
+ </scm>
+ <build>
+ <sourceDirectory>${project.build.directory}/src</sourceDirectory>
+ <testSourceDirectory>${basedir}/test-src</testSourceDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.5</source>
+ <target>1.5</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <version>1.7</version>
+ <dependencies>
+ <dependency>
+ <groupId>com.sun</groupId>
+ <artifactId>tools</artifactId>
+ <version>1.5.0</version>
+ <scope>system</scope>
+ <systemPath>${java.home}/../lib/tools.jar</systemPath>
+ </dependency>
+ </dependencies>
+ <executions>
+ <execution>
+ <id>intitialize-sources</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ <configuration>
+ <target>
+ <delete dir="${project.build.sourceDirectory}"/>
+ <mkdir dir="${project.build.sourceDirectory}"/>
+ <copy todir="${project.build.sourceDirectory}">
+ <fileset dir="${basedir}/src"/>
+ </copy>
+ </target>
+ </configuration>
+ </execution>
+ <execution>
+ <id>tokenizer-hotspot-workaround</id>
+ <phase>process-sources</phase>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ <configuration>
+ <target>
+ <property name="translator.sources" value="${basedir}/translator-src"/>
+ <property name="translator.classes" value="${project.build.directory}/translator-classes"/>
+ <mkdir dir="${translator.classes}"/>
+ <javac srcdir="${translator.sources}" includes="nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java" destdir="${translator.classes}" includeantruntime="false"/>
+ <java classname="nu.validator.htmlparser.generator.ApplyHotSpotWorkaround">
+ <classpath>
+ <pathelement location="${translator.classes}"/>
+ </classpath>
+ <arg value="${project.build.sourceDirectory}/nu/validator/htmlparser/impl/Tokenizer.java"/>
+ <arg value="${project.build.sourceDirectory}/nu/validator/htmlparser/impl/HotSpotWorkaround.txt"/>
+ </java>
+ </target>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <version>2.3.7</version>
+ <extensions>true</extensions>
+ <configuration>
+ <archive>
+ <addMavenDescriptor>false</addMavenDescriptor>
+ </archive>
+ <instructions>
+ <Bundle-Name>${project.name}</Bundle-Name>
+ <Bundle-SymbolicName>nu.validator.htmlparser</Bundle-SymbolicName>
+ <Bundle-Version>${project.version}</Bundle-Version>
+ <Bundle-RequiredExecutionEnvironment>J2SE-1.5</Bundle-RequiredExecutionEnvironment>
+ <_removeheaders>Built-By,Bnd-LastModified</_removeheaders>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>rpm-maven-plugin</artifactId>
+ <configuration>
+ <release>1</release>
+ <copyright>The MIT License</copyright>
+ <group>Development/Java</group>
+ <workarea>/var/tmp/${project.build.finalName}</workarea>
+ <defineStatements>
+ <defineStatement>_javadir ${rpm.java.dir}</defineStatement>
+ <defineStatement>_javadocdir ${rpm.javadoc.dir}</defineStatement>
+ </defineStatements>
+ <mappings>
+ <mapping>
+ <directory>${rpm.java.dir}</directory>
+ <filemode>644</filemode>
+ <username>root</username>
+ <groupname>root</groupname>
+ <sources>
+ <source>
+ <location>${project.build.directory}/${project.build.finalName}.jar</location>
+ </source>
+ </sources>
+ </mapping>
+ <mapping>
+ <directory>${rpm.javadoc.dir}/${project.build.finalName}</directory>
+ <filemode>644</filemode>
+ <username>root</username>
+ <groupname>root</groupname>
+ <sources>
+ <source>
+ <location>${project.build.directory}/apidocs</location>
+ </source>
+ </sources>
+ </mapping>
+ </mappings>
+ <install>%__ln_s ${project.build.finalName}.jar %{buildroot}%{_javadir}/${project.name}.jar</install>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>com.ibm.icu</groupId>
+ <artifactId>icu4j</artifactId>
+ <version>4.0.1</version>
+ <scope>compile</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>xom</groupId>
+ <artifactId>xom</artifactId>
+ <version>1.1</version>
+ <scope>compile</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.jchardet</groupId>
+ <artifactId>jchardet</artifactId>
+ <version>1.0</version>
+ <scope>compile</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.sdicons.jsontools</groupId>
+ <artifactId>jsontools-core</artifactId>
+ <version>1.4</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <properties>
+ <rpm.java.dir>/usr/share/java</rpm.java.dir>
+ <rpm.javadoc.dir>/usr/share/javadoc</rpm.javadoc.dir>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+</project>
diff --git a/parser/html/java/htmlparser/ruby-gcj/DomUtils.java b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java
new file mode 100644
index 000000000..dc43da83d
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java
@@ -0,0 +1,36 @@
+import java.util.HashSet;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+
+public class DomUtils {
+
+ private static HashSet<Document> pinned_list = new HashSet<Document>();
+
+ public static synchronized void pin(Document d) {
+ pinned_list.add(d);
+ }
+
+ public static synchronized void unpin(Document d) {
+ pinned_list.remove(d);
+ }
+
+ // return all the text content contained by a single element
+ public static void getElementContent(Element e, StringBuffer b) {
+ for (Node n = e.getFirstChild(); n!=null; n=n.getNextSibling()) {
+ if (n.getNodeType() == n.TEXT_NODE) {
+ b.append(n.getNodeValue());
+ } else if (n.getNodeType() == n.ELEMENT_NODE) {
+ getElementContent((Element) e, b);
+ }
+ }
+ }
+
+ // replace all child nodes of a given element with a single text element
+ public static void setElementContent(Element e, String s) {
+ while (e.hasChildNodes()) {
+ e.removeChild(e.getFirstChild());
+ }
+ e.appendChild(e.getOwnerDocument().createTextNode(s));
+ }
+}
diff --git a/parser/html/java/htmlparser/ruby-gcj/README b/parser/html/java/htmlparser/ruby-gcj/README
new file mode 100644
index 000000000..b368437f7
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/README
@@ -0,0 +1,65 @@
+Disclaimer:
+
+ This code is experimental.
+
+ When some people say experimental, they mean "it may not do what it is
+ intended to do; in fact, it might even wipe out your hard drive". I mean
+ that too. But I mean something more than that.
+
+ In this case, experimental means that I don't even know what it is intended
+ to do. I just have a vague vision, and I am trying out various things in
+ the hopes that one of them will work out.
+
+Vision:
+
+ My vague vision is that I would like to see HTML 5 be a success. For me to
+ consider it to be a success, it needs to be a standard, be interoperable,
+ and be ubiquitous.
+
+ I believe that the Validator.nu parser can be used to bootstrap that
+ process. It is written in Java. Has been compiled into JavaScript. Has
+ been translated into C++ based on the Mozilla libraries with the intent of
+ being included in Firefox. It very closely tracks to the standard.
+
+ For the moment, the effort is on extending that to another language (Ruby)
+ on a single environment (i.e., Linux). Once that is complete, intent is to
+ evaluate the results, decide what needs to be changed, and what needs to be
+ done to support other languages and environments.
+
+ The bar I'm setting for myself isn't just another SWIG generated low level
+ interface to a DOM, but rather a best of breed interface; which for Ruby
+ seems to be the one pioneered by Hpricot and adopted by Nokogiri. Success
+ will mean passing all of the tests from one of those two parsers as well as
+ all of the HTML5 tests.
+
+Build instructions:
+
+ You'll need icu4j and chardet jars. If you checked out and ran dldeps you
+ are already all set:
+
+ svn co http://svn.versiondude.net/whattf/build/trunk/ build
+ python build/build.py checkout dldeps
+
+ Fedora 11:
+
+ yum install ruby-devel rubygem-rake java-1.5.0-gcj-devel gcc-c++
+
+ Ubuntu 9.04:
+
+ apt-get install ruby ruby1.8-dev rake gcj g++
+
+ Also at this time, you need to install a jdk (e.g. sun-java6-jdk), simply
+ because the javac that comes with gcj doesn't support -sourcepath, and
+ I haven't spent the time to find a replacement.
+
+ Finally, make sure that libjaxp1.3-java is *not* installed.
+
+ http://gcc.gnu.org/ml/java/2009-06/msg00055.html
+
+ If this is done, you should be all set.
+
+ cd htmlparser/ruby-gcj
+ rake test
+
+ If things are successful, the last lines of the output will list the
+ font attributes and values found in the test/google.html file.
diff --git a/parser/html/java/htmlparser/ruby-gcj/Rakefile b/parser/html/java/htmlparser/ruby-gcj/Rakefile
new file mode 100644
index 000000000..7b5180253
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/Rakefile
@@ -0,0 +1,77 @@
+deps = ENV['deps'] || '../../dependencies'
+icu4j = "#{deps}/icu4j-4_0.jar"
+chardet = "#{deps}/mozilla/intl/chardet/java/dist/lib/chardet.jar"
+libgcj = Dir['/usr/share/java/libgcj*.jar'].grep(/gcj[-\d.]*jar$/).sort.last
+
+task :default => %w(headers libs Makefile validator.so)
+
+# headers
+
+hdb = 'nu/validator/htmlparser/dom/HtmlDocumentBuilder'
+task :headers => %W(headers/DomUtils.h headers/#{hdb}.h)
+
+file 'headers/DomUtils.h' => 'DomUtils.java' do |t|
+ mkdir_p %w(classes headers), :verbose => false
+ sh "javac -d classes #{t.prerequisites.first}"
+ sh "gcjh -force -o #{t.name} -cp #{libgcj}:classes DomUtils"
+end
+
+file "headers/#{hdb}.h" => "../src/#{hdb}.java" do |t|
+ mkdir_p %w(classes headers), :verbose => false
+ sh "javac -cp #{icu4j}:#{chardet} -d classes -sourcepath ../src " +
+ t.prerequisites.first
+ sh "gcjh -force -cp classes -o #{t.name} -cp #{libgcj}:classes " +
+ hdb.gsub('/','.')
+end
+
+# libs
+
+task :libs => %w(htmlparser chardet icu).map {|name| "lib/libnu-#{name}.so"}
+
+htmlparser = Dir['../src/**/*.java'].reject {|name| name.include? '/xom/'}
+file 'lib/libnu-htmlparser.so' => htmlparser + ['DomUtils.java'] do |t|
+ mkdir_p 'lib', :verbose => false
+ sh "gcj -shared --classpath=#{icu4j}:#{chardet} -fPIC " +
+ "-o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+file 'lib/libnu-chardet.so' => chardet do |t|
+ mkdir_p 'lib', :verbose => false
+ sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+file 'lib/libnu-icu.so' => icu4j do |t|
+ mkdir_p 'lib', :verbose => false
+ sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+# module
+
+file 'Makefile' do
+ sh "ruby extconf.rb --with-gcj=#{libgcj}"
+end
+
+file 'validator.so' => %w(Makefile validator.cpp headers/DomUtils.h) do
+ system 'make'
+end
+
+file 'nu/validator.so' do
+ mkdir_p 'nu', :verbose => false
+ system 'ln -s -t nu ../validator.so'
+end
+
+# tasks
+
+task :test => [:default, 'nu/validator.so'] do
+ ENV['LD_LIBRARY_PATH']='lib'
+ sh 'ruby test/fonts.rb test/google.html'
+end
+
+task :clean do
+ rm_rf %W(classes lib nu mkmf.log headers/DomUtils.h headers/#{hdb}.h) +
+ Dir['*.o'] + Dir['*.so']
+end
+
+task :clobber => :clean do
+ rm_rf %w(headers Makefile)
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/extconf.rb b/parser/html/java/htmlparser/ruby-gcj/extconf.rb
new file mode 100644
index 000000000..415cf430a
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/extconf.rb
@@ -0,0 +1,45 @@
+require 'mkmf'
+
+# system dependencies
+gcj = with_config('gcj', '/usr/share/java/libgcj.jar')
+
+# headers for JAXP
+CONFIG['CC'] = 'g++'
+with_cppflags('-xc++') do
+
+ unless find_header('org/w3c/dom/Document.h', 'headers')
+
+ `jar tf #{gcj}`.split.each do |file|
+ next unless file =~ /\.class$/
+ next unless file =~ /^(javax|org)\/(w3c|xml)/
+ next if file.include? '$'
+
+ dest = 'headers/' + file.sub(/\.class$/,'.h')
+ name = file.sub(/\.class$/,'').gsub('/','.')
+
+ next if File.exist? dest
+
+ cmd = "gcjh -cp #{gcj} -o #{dest} #{name}"
+ puts cmd
+ break unless system cmd
+ system "ruby -pi -e '$_.sub!(/namespace namespace$/," +
+ "\"namespace namespace$\")' #{dest}"
+ system "ruby -pi -e '$_.sub!(/::namespace::/," +
+ "\"::namespace$::\")' #{dest}"
+ end
+
+ exit unless find_header('org/w3c/dom/Document.h', 'headers')
+ end
+
+ find_header 'nu/validator/htmlparser/dom/HtmlDocumentBuilder.h', 'headers'
+end
+
+# Java libraries
+Config::CONFIG['CC'] = 'g++ -shared'
+dir_config('nu-htmlparser', nil, 'lib')
+have_library 'nu-htmlparser'
+have_library 'nu-icu'
+have_library 'nu-chardet'
+
+# Ruby library
+create_makefile 'nu/validator'
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb
new file mode 100644
index 000000000..1beb94c10
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb
@@ -0,0 +1,5 @@
+require 'nu/validator'
+
+ARGV.each do |arg|
+ puts Nu::Validator::parse(open(arg)).root.name
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb
new file mode 100644
index 000000000..595e3ae06
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb
@@ -0,0 +1,11 @@
+require 'nu/validator'
+require 'open-uri'
+
+ARGV.each do |arg|
+ doc = Nu::Validator::parse(open(arg))
+ doc.xpath("//*[local-name()='font']").each do |font|
+ font.attributes.each do |name, attr|
+ puts "#{name} => #{attr.value}"
+ end
+ end
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/google.html b/parser/html/java/htmlparser/ruby-gcj/test/google.html
new file mode 100644
index 000000000..8d2183b29
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/google.html
@@ -0,0 +1,10 @@
+<!doctype html><html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><script>window.google={kEI:"vLhASujeGpTU9QT2iOnWAQ",kEXPI:"17259",kCSIE:"17259",kHL:"en"};
+window.google.sn="webhp";window.google.timers={load:{t:{start:(new Date).getTime()}}};try{window.google.pt=window.gtbExternal&&window.gtbExternal.pageT()||window.external&&window.external.pageT}catch(b){}
+window.google.jsrt_kill=1;
+var _gjwl=location;function _gjuc(){var e=_gjwl.href.indexOf("#");if(e>=0){var a=_gjwl.href.substring(e);if(a.indexOf("&q=")>0||a.indexOf("#q=")>=0){a=a.substring(1);if(a.indexOf("#")==-1){for(var c=0;c<a.length;){var d=c;if(a.charAt(d)=="&")++d;var b=a.indexOf("&",d);if(b==-1)b=a.length;var f=a.substring(d,b);if(f.indexOf("fp=")==0){a=a.substring(0,c)+a.substring(b,a.length);b=c}else if(f=="cad=h")return 0;c=b}_gjwl.href="/search?"+a+"&cad=h";return 1}}}return 0}function _gjp(){!(window._gjwl.hash&&
+window._gjuc())&&setTimeout(_gjp,500)};
+window._gjp && _gjp();</script><style>td{line-height:.8em;}.gac_c{line-height:normal;}form{margin-bottom:20px;}body,td,a,p,.h{font-family:arial,sans-serif}.h{color:#36c;font-size:20px}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:0px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#guser{padding-bottom:7px !important;text-align:right}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb3{color:#00c !important}.gb3{text-decoration:none}</style><script>google.y={};google.x=function(e,g){google.y[e.id]=[e,g];return false};</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="document.f.q.focus();if(document.images)new Image().src='/images/nav_logo4.png'" topmargin=3 marginheight=3><textarea id=csi style=display:none></textarea><iframe name=wgjf style="display:none"></iframe><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" class=gb1>Images</a> <a href="http://video.google.com/?hl=en&tab=wv" class=gb1>Video</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" class=gb3><u>more</u> &raquo;</a></nobr></div><div id=guser width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?hl=en&continue=http://www.google.com/">Sign in</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276 id=logo onload="window.lol&&lol()"><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%>&nbsp;</td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25% align=left><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><font size=-1><a href="/aclk?sa=L&ai=CqVchLbNASrv7IZa68gS13KTwAc3__IMB29PoogzB2ZzZExABIMFUUK_O0JX______wFgyQaqBAlP0BcDOBRYhqw&num=1&sig=AGiWqty21CD7ixNXZILwCnH7c_3n9v2-tg&q=http://www.allforgood.org#source=hpp">Find an opportunity to volunteer</a> in your community today.</font><br><br><br><font size=-1><a href="/intl/en/ads/">Advertising&nbsp;Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>&copy;2009 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center><div id=xjsd></div><div id=xjsi><script>if(google.y)google.y.first=[];if(google.y)google.y.first=[];google.dstr=[];google.rein=[];window.setTimeout(function(){var a=document.createElement("script");a.src="/extern_js/f/CgJlbhICdXMgACswCjggQAgsKzAOOAUsKzAYOAQsKzAlOMmIASwrMCY4BCwrMCc4ACw/1t0T7hspHT4.js";(document.getElementById("xjsd")||document.body).appendChild(a)},0);
+;google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')});google.xjs&&google.j&&google.j.xi&&google.j.xi()</script></div><script>(function(){
+function a(){google.timers.load.t.ol=(new Date).getTime();google.report&&google.report(google.timers.load,{ei:google.kEI,e:google.kCSIE})}if(window.addEventListener)window.addEventListener("load",a,false);else if(window.attachEvent)window.attachEvent("onload",a);google.timers.load.t.prt=(new Date).getTime();
+})();
+</script> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/greek.xml b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml
new file mode 100644
index 000000000..a14d23eb1
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml
@@ -0,0 +1,2 @@
+<?xml version='1.0' encoding='iso-8859-7'?>
+<root/>
diff --git a/parser/html/java/htmlparser/ruby-gcj/validator.cpp b/parser/html/java/htmlparser/ruby-gcj/validator.cpp
new file mode 100644
index 000000000..aadd24abe
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/validator.cpp
@@ -0,0 +1,210 @@
+#include <gcj/cni.h>
+
+#include <java/io/ByteArrayInputStream.h>
+#include <java/lang/System.h>
+#include <java/lang/Throwable.h>
+#include <java/util/ArrayList.h>
+#include <javax/xml/xpath/XPath.h>
+#include <javax/xml/xpath/XPathFactory.h>
+#include <javax/xml/xpath/XPathExpression.h>
+#include <javax/xml/xpath/XPathConstants.h>
+#include <javax/xml/parsers/DocumentBuilderFactory.h>
+#include <javax/xml/parsers/DocumentBuilder.h>
+#include <org/w3c/dom/Attr.h>
+#include <org/w3c/dom/Document.h>
+#include <org/w3c/dom/Element.h>
+#include <org/w3c/dom/NodeList.h>
+#include <org/w3c/dom/NamedNodeMap.h>
+#include <org/xml/sax/InputSource.h>
+
+#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h"
+
+#include "DomUtils.h"
+
+#include "ruby.h"
+
+using namespace java::io;
+using namespace java::lang;
+using namespace java::util;
+using namespace javax::xml::parsers;
+using namespace javax::xml::xpath;
+using namespace nu::validator::htmlparser::dom;
+using namespace org::w3c::dom;
+using namespace org::xml::sax;
+
+static VALUE jaxp_Document;
+static VALUE jaxp_Attr;
+static VALUE jaxp_Element;
+static ID ID_read;
+static ID ID_doc;
+static ID ID_element;
+
+// convert a Java string into a Ruby string
+static VALUE j2r(String *string) {
+ if (string == NULL) return Qnil;
+ jint len = JvGetStringUTFLength(string);
+ char buf[len];
+ JvGetStringUTFRegion(string, 0, len, buf);
+ return rb_str_new(buf, len);
+}
+
+// convert a Ruby string into a Java string
+static String *r2j(VALUE string) {
+ return JvNewStringUTF(RSTRING(string)->ptr);
+}
+
+// release the Java Document associated with this Ruby Document
+static void vnu_document_free(Document *doc) {
+ DomUtils::unpin(doc);
+}
+
+// Nu::Validator::parse( string|file )
+static VALUE vnu_parse(VALUE self, VALUE input) {
+ HtmlDocumentBuilder *parser = new HtmlDocumentBuilder();
+
+ // read file-like objects into memory. TODO: buffer such objects
+ if (rb_respond_to(input, ID_read))
+ input = rb_funcall(input, ID_read, 0);
+
+ // convert input in to a ByteArrayInputStream
+ jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
+ memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
+ InputSource *source = new InputSource(new ByteArrayInputStream(bytes));
+
+ // parse, pin, and wrap
+ Document *doc = parser->parse(source);
+ DomUtils::pin(doc);
+ return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
+}
+
+// Jaxp::parse( string|file )
+static VALUE jaxp_parse(VALUE self, VALUE input) {
+ DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance();
+ DocumentBuilder *parser = factory->newDocumentBuilder();
+
+ // read file-like objects into memory. TODO: buffer such objects
+ if (rb_respond_to(input, ID_read))
+ input = rb_funcall(input, ID_read, 0);
+
+ try {
+ jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
+ memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
+ Document *doc = parser->parse(new ByteArrayInputStream(bytes));
+ DomUtils::pin(doc);
+ return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
+ } catch (java::lang::Throwable *ex) {
+ ex->printStackTrace();
+ return Qnil;
+ }
+}
+
+
+// Nu::Validator::Document#encoding
+static VALUE jaxp_document_encoding(VALUE rdoc) {
+ Document *jdoc;
+ Data_Get_Struct(rdoc, Document, jdoc);
+ return j2r(jdoc->getXmlEncoding());
+}
+
+// Nu::Validator::Document#root
+static VALUE jaxp_document_root(VALUE rdoc) {
+ Document *jdoc;
+ Data_Get_Struct(rdoc, Document, jdoc);
+
+ Element *jelement = jdoc->getDocumentElement();
+ if (jelement==NULL) return Qnil;
+
+ VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement);
+ rb_ivar_set(relement, ID_doc, rdoc);
+ return relement;
+}
+
+// Nu::Validator::Document#xpath
+static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) {
+ Document *jdoc;
+ Data_Get_Struct(rdoc, Document, jdoc);
+
+ Element *jelement = jdoc->getDocumentElement();
+ if (jelement==NULL) return Qnil;
+
+ XPath *xpath = XPathFactory::newInstance()->newXPath();
+ XPathExpression *expr = xpath->compile(r2j(path));
+ NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET);
+
+ VALUE result = rb_ary_new();
+ for (int i=0; i<list->getLength(); i++) {
+ VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i));
+ rb_ivar_set(relement, ID_doc, rdoc);
+ rb_ary_push(result, relement);
+ }
+ return result;
+}
+
+// Nu::Validator::Element#name
+static VALUE jaxp_element_name(VALUE relement) {
+ Element *jelement;
+ Data_Get_Struct(relement, Element, jelement);
+ return j2r(jelement->getNodeName());
+}
+
+// Nu::Validator::Element#attributes
+static VALUE jaxp_element_attributes(VALUE relement) {
+ Element *jelement;
+ Data_Get_Struct(relement, Element, jelement);
+ VALUE result = rb_hash_new();
+ NamedNodeMap *map = jelement->getAttributes();
+ for (int i=0; i<map->getLength(); i++) {
+ Attr *jattr = (Attr *) map->item(i);
+ VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr);
+ rb_ivar_set(rattr, ID_element, relement);
+ rb_hash_aset(result, j2r(jattr->getName()), rattr);
+ }
+ return result;
+}
+
+// Nu::Validator::Attribute#value
+static VALUE jaxp_attribute_value(VALUE rattribute) {
+ Attr *jattribute;
+ Data_Get_Struct(rattribute, Attr, jattribute);
+ return j2r(jattribute->getValue());
+}
+
+typedef VALUE (ruby_method)(...);
+
+// Nu::Validator module initialization
+extern "C" void Init_validator() {
+ JvCreateJavaVM(NULL);
+ JvAttachCurrentThread(NULL, NULL);
+ JvInitClass(&DomUtils::class$);
+ JvInitClass(&XPathFactory::class$);
+ JvInitClass(&XPathConstants::class$);
+
+ VALUE jaxp = rb_define_module("Jaxp");
+ rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1);
+
+ VALUE nu = rb_define_module("Nu");
+ VALUE validator = rb_define_module_under(nu, "Validator");
+ rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1);
+
+ jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject);
+ rb_define_method(jaxp_Document, "encoding",
+ (ruby_method*)&jaxp_document_encoding, 0);
+ rb_define_method(jaxp_Document, "root",
+ (ruby_method*)&jaxp_document_root, 0);
+ rb_define_method(jaxp_Document, "xpath",
+ (ruby_method*)&jaxp_document_xpath, 1);
+
+ jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject);
+ rb_define_method(jaxp_Element, "name",
+ (ruby_method*)&jaxp_element_name, 0);
+ rb_define_method(jaxp_Element, "attributes",
+ (ruby_method*)&jaxp_element_attributes, 0);
+
+ jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject);
+ rb_define_method(jaxp_Attr, "value",
+ (ruby_method*)&jaxp_attribute_value, 0);
+
+ ID_read = rb_intern("read");
+ ID_doc = rb_intern("@doc");
+ ID_element = rb_intern("@element");
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java
new file mode 100644
index 000000000..00e5f7ca7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Big5 extends Encoding {
+
+ private static final String[] LABELS = {
+ "big5",
+ "big5-hkscs",
+ "cn-big5",
+ "csbig5",
+ "x-x-big5"
+ };
+
+ private static final String NAME = "big5";
+
+ static final Big5 INSTANCE = new Big5();
+
+ private Big5() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new Big5Decoder(this);
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return new Big5Encoder(this);
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java
new file mode 100644
index 000000000..9f35be341
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+final class Big5Data {
+
+ private static final String ASTRALNESS = "\uF829\u7A22\u1290\uC5C4\u0007\u0200\u7549\"\u0000\uA000\u3859\u0300\u002C\u573E\uF72B\u6EFC\u90F2\u3B7B\u83E9\uF049\u9DA6\uBBFC\uBEF7\uFDFE\u0C83\uABD1\u7BFF\u7FBF\u1804\u002C\u4840\u2046\u0408\u2A22\u4858\u091A\u5100\u3122\uC000\u5000\uC00D\u6110\uD44C\u9A24\u0180\u0004\u92B2\u0209\u8631\u1242\u8140\u0351\uAB48\u7460\uD5A2\u3E5C\uE361\u1083\u720B\u08A0\u51D6\uE00A\u8100\u1686\uC443\u1135\u6037\u7AE6\u056D\u7D0C\u0E66\u81E0\u7F88\u2420\u2406\u1D03\u340C\u4268\u454A\uF13F\u080D\u8084\uBB00\u0C4D\u6ED6\u97D7\u41DF\u5D3E\uDA68\u305C\uB800\u26E9\u80BC\u0151\uE078\u89A1\u59C0\u9679\u3BCC\u5EDE\uBC2C\uDF9B\u6C5D\u046D\u6043\u4A36\uD860\u073E\uC8C4\u6C69\uD8B1\u8302\u0F88\u0973\u806E\u3B6B\u5A17\uA503\u2D52\u3F40\u1120\u4101\u5024\uB903\u90EE\u1079\u5CAD\u1820\uDA0A\u8060\u9E26\u6E73\u1021\u080E\u4368\u6FB2\u161F\u8AFE\u76B6\u763A\u8262\u1894\u1801\uFE7D\u578D\u1327\u5BD2\u1937\uDB8C\u4862\u0024\u0000\u0010\u8000\u0000\u0000\u0038\u3800\uB9E2\uFD7D\u75F8\uDCF7\u6FF3\uBBF2\uFF4A\uAE3F\u9FC5\uEAFF\uBABA\uBC5D\u9F73\uD8FA\uDED6\u4B25\u975E\u2ADA\u6DB9\u06E6\u9D36\u53F9\u6FC5\uF98A\u49BF\uDB5D\uFFF8\u14A6\uE605\u96F7\u0A99\u00E5\u0800\u3D81\u5002\u0102\uBF49\u475E\u036F\u6280\uEECA\u4819\u6081\u205A\u24F7\u0000\u0004\u0000\u2804\u22C8\u0200\u0000\u2010\u5082\u3040\u0001\u0010\u1284\u0041\u0504\u2000\uC100\u3F7F\uB059\u8AC1\uAFAF\uAC05\u033F\u0204\u7280\u420A\u0426\u02D0\u0EC3\u0958\u0A80\u20B5\u9206\u8B77\u0560\u21C9\u4606\u6038\uC048\u24B4\u84DE\uC0E0\u3364\u3154\u300D\u688A\u5F2B\u0626\u8496\uB108\uE890\uA394\u734F\u50B8\u0D11\uDFA4\u4003\u5D20\u8480\u6160\u51CE\u800A\u58B7\u0050\uE862\u6750\u7220\u1228";
+
+ private static final String TABLE0 = "\u43F0\u4C32\u4603\u45A6\u4578\u7267\u4D77\u45B3\u7CB1\u4CE2\u7CC5\u3B95\u4736\u4744\u4C47\u4C40\u42BF\u3617\u7352\u6E8B\u70D2\u4C57\uA351\u474F\u45DA\u4C85\u7C6C\u4D07\u4AA4\u46A1\u6B23\u7225\u5A54\u1A63\u3E06\u3F61\u664D\u56FB\u0000\u7D95\u591D\u8BB9\u3DF4\u9734\u7BEF\u5BDB\u1D5E\u5AA4\u3625\u9EB0\u5AD1\u5BB7\u5CFC\u676E\u8593\u9945\u7461\u749D\u3875\u1D53\u369E\u6021\u3EEC\u58DE\u3AF5\u7AFC\u9F97\u4161\u890D\u31EA\u0A8A\u325E\u430A\u8484\u9F96\u942F\u4930\u8613\u5896\u974A\u9218\u79D0\u7A32\u6660\u6A29\u889D\u744C\u7BC5\u6782\u7A2C\u524F\u9046\u34E6\u73C4\u5DB9\u74C6\u9FC7\u57B3\u492F\u544C\u4131\u368E\u5818\u7A72\u7B65\u8B8F\u46AE\u6E88\u4181\u5D99\u7BAE\u24BC\u9FC8\u24C1\u24C9\u24CC\u9FC9\u8504\u35BB\u40B4\u9FCA\u44E1\uADFF\u62C1\u706E\u9FCB";
+
+ private static final String TABLE1 = "\u31C0\u31C1\u31C2\u31C3\u31C4\u010C\u31C5\u00D1\u00CD\u31C6\u31C7\u00CB\u1FE8\u31C8\u00CA\u31C9\u31CA\u31CB\u31CC\u010E\u31CD\u31CE\u0100\u00C1\u01CD\u00C0\u0112\u00C9\u011A\u00C8\u014C\u00D3\u01D1\u00D2\u0000\u1EBE\u0000\u1EC0\u00CA\u0101\u00E1\u01CE\u00E0\u0251\u0113\u00E9\u011B\u00E8\u012B\u00ED\u01D0\u00EC\u014D\u00F3\u01D2\u00F2\u016B\u00FA\u01D4\u00F9\u01D6\u01D8\u01DA\u01DC\u00FC\u0000\u1EBF\u0000\u1EC1\u00EA\u0261\u23DA\u23DB";
+
+ private static final String TABLE2 = "\uA3A9\u1145\u0000\u650A\u0000\u0000\u4E3D\u6EDD\u9D4E\u91DF\u0000\u0000\u7735\u6491\u4F1A\u4F28\u4FA8\u5156\u5174\u519C\u51E4\u52A1\u52A8\u533B\u534E\u53D1\u53D8\u56E2\u58F0\u5904\u5907\u5932\u5934\u5B66\u5B9E\u5B9F\u5C9A\u5E86\u603B\u6589\u67FE\u6804\u6865\u6D4E\u70BC\u7535\u7EA4\u7EAC\u7EBA\u7EC7\u7ECF\u7EDF\u7F06\u7F37\u827A\u82CF\u836F\u89C6\u8BBE\u8BE2\u8F66\u8F67\u8F6E\u7411\u7CFC\u7DCD\u6946\u7AC9\u5227\u0000\u0000\u0000\u0000\u918C\u78B8\u915E\u80BC\u0000\u8D0B\u80F6\u09E7\u0000\u0000\u809F\u9EC7\u4CCD\u9DC9\u9E0C\u4C3E\u9DF6\u700E\u9E0A\uA133\u35C1\u0000\u6E9A\u823E\u7519\u0000\u4911\u9A6C\u9A8F\u9F99\u7987\u846C\u1DCA\u05D0\u2AE6\u4E24\u4E81\u4E80\u4E87\u4EBF\u4EEB\u4F37\u344C\u4FBD\u3E48\u5003\u5088\u347D\u3493\u34A5\u5186\u5905\u51DB\u51FC\u5205\u4E89\u5279\u5290\u5327\u35C7\u53A9\u3551\u53B0\u3553\u53C2\u5423\u356D\u3572\u3681\u5493\u54A3\u54B4\u54B9\u54D0\u54EF\u5518\u5523\u5528\u3598\u553F\u35A5\u35BF\u55D7\u35C5\u7D84\u5525\u0000\u0C42\u0D15\u512B\u5590\u2CC6\u39EC\u0341\u8E46\u4DB8\u94E5\u4053\u80BE\u777A\u2C38\u3A34\u47D5\u815D\u69F2\u4DEA\u64DD\u0D7C\u0FB4\u0CD5\u10F4\u648D\u8E7E\u0E96\u0C0B\u0F64\u2CA9\u8256\u44D3\u0000\u0D46\u9A4D\u80E9\u47F4\u4EA7\u2CC2\u9AB2\u3A67\u95F4\u3FED\u3506\u52C7\u97D4\u78C8\u2D44\u9D6E\u9815\u0000\u43D9\u60A5\u64B4\u54E3\u2D4C\u2BCA\u1077\u39FB\u106F\u66DA\u6716\u79A0\u64EA\u5052\u0C43\u8E68\u21A1\u8B4C\u0731\u0000\u480B\u01A9\u3FFA\u5873\u2D8D\u0000\u45C8\u04FC\u6097\u0F4C\u0D96\u5579\u40BB\u43BA\u0000\u4AB4\u2A66\u109D\u81AA\u98F5\u0D9C\u6379\u39FE\u2775\u8DC0\u56A1\u647C\u3E43\u0000\uA601\u0E09\u2ACF\u2CC9\u0000\u10C8\u39C2\u3992\u3A06\u829B\u3578\u5E49\u20C7\u5652\u0F31\u2CB2\u9720\u34BC\u6C3D\u4E3B\u0000\u0000\u7574\u2E8B\u2208\uA65B\u8CCD\u0E7A\u0C34\u681C\u7F93\u10CF\u2803\u2939\u35FB\u51E3\u0E8C\u0F8D\u0EAA\u3F93\u0F30\u0D47\u114F\u0E4C\u0000\u0EAB\u0BA9\u0D48\u10C0\u113D\u3FF9\u2696\u6432\u0FAD\u33F4\u7639\u2BCE\u0D7E\u0D7F\u2C51\u2C55\u3A18\u0E98\u10C7\u0F2E\uA632\u6B50\u8CD2\u8D99\u8CCA\u95AA\u54CC\u82C4\u55B9\u0000\u9EC3\u9C26\u9AB6\u775E\u2DEE\u7140\u816D\u80EC\u5C1C\u6572\u8134\u3797\u535F\u80BD\u91B6\u0EFA\u0E0F\u0E77\u0EFB\u35DD\u4DEB\u3609\u0CD6\u56AF\u27B5\u10C9\u0E10\u0E78\u1078\u1148\u8207\u1455\u0E79\u4E50\u2DA4\u5A54\u101D\u101E\u10F5\u10F6\u579C\u0E11\u7694\u82CD\u0FB5\u0E7B\u517E\u3703\u0FB6\u1180\u52D8\uA2BD\u49DA\u183A\u4177\u827C\u5899\u5268\u361A\u573D\u7BB2\u5B68\u4800\u4B2C\u9F27\u49E7\u9C1F\u9B8D\u5B74\u313D\u55FB\u35F2\u5689\u4E28\u5902\u1BC1\uF878\u9751\u0086\u4E5B\u4EBB\u353E\u5C23\u5F51\u5FC4\u38FA\u624C\u6535\u6B7A\u6C35\u6C3A\u706C\u722B\u4E2C\u72AD\u48E9\u7F52\u793B\u7CF9\u7F53\u626A\u34C1\u0000\u634B\u8002\u8080\u6612\u6951\u535D\u8864\u89C1\u78B2\u8BA0\u8D1D\u9485\u9578\u957F\u95E8\u8E0F\u97E6\u9875\u98CE\u98DE\u9963\u9810\u9C7C\u9E1F\u9EC4\u6B6F\uF907\u4E37\u0087\u961D\u6237\u94A2\u0000\u503B\u6DFE\u9C73\u9FA6\u3DC9\u888F\u414E\u7077\u5CF5\u4B20\u51CD\u3559\u5D30\u6122\u8A32\u8FA7\u91F6\u7191\u6719\u73BA\u3281\uA107\u3C8B\u1980\u4B10\u78E4\u7402\u51AE\u870F\u4009\u6A63\uA2BA\u4223\u860F\u0A6F\u7A2A\u9947\u8AEA\u9755\u704D\u5324\u207E\u93F4\u76D9\u89E3\u9FA7\u77DD\u4EA3\u4FF0\u50BC\u4E2F\u4F17\u9FA8\u5434\u7D8B\u5892\u58D0\u1DB6\u5E92\u5E99\u5FC2\u2712\u658B\u33F9\u6919\u6A43\u3C63\u6CFF\u0000\u7200\u4505\u738C\u3EDB\u4A13\u5B15\u74B9\u8B83\u5CA4\u5695\u7A93\u7BEC\u7CC3\u7E6C\u82F8\u8597\u9FA9\u8890\u9FAA\u8EB9\u9FAB\u8FCF\u855F\u99E0\u9221\u9FAC\u8DB9\u143F\u4071\u42A2\u5A1A\u0000\u0000\u0000\u9868\u676B\u4276\u573D\u0000\u85D6\u497B\u82BF\u710D\u4C81\u6D74\u5D7B\u6B15\u6FBE\u9FAD\u9FAE\u5B96\u9FAF\u66E7\u7E5B\u6E57\u79CA\u3D88\u44C3\u3256\u2796\u439A\u4536\u0000\u5CD5\u3B1A\u8AF9\u5C78\u3D12\u3551\u5D78\u9FB2\u7157\u4558\u40EC\u1E23\u4C77\u3978\u344A\u01A4\u6C41\u8ACC\u4FB4\u0239\u59BF\u816C\u9856\u98FA\u5F3B\u0B9F\u0000\u21C1\u896D\u4102\u46BB\u9079\u3F07\u9FB3\uA1B5\u40F8\u37D6\u46F7\u6C46\u417C\u86B2\u73FF\u456D\u38D4\u549A\u4561\u451B\u4D89\u4C7B\u4D76\u45EA\u3FC8\u4B0F\u3661\u44DE\u44BD\u41ED\u5D3E\u5D48\u5D56\u3DFC\u380F\u5DA4\u5DB9\u3820\u3838\u5E42\u5EBD\u5F25\u5F83\u3908\u3914\u393F\u394D\u60D7\u613D\u5CE5\u3989\u61B7\u61B9\u61CF\u39B8\u622C\u6290\u62E5\u6318\u39F8\u56B1\u3A03\u63E2\u63FB\u6407\u645A\u3A4B\u64C0\u5D15\u5621\u9F9F\u3A97\u6586\u3ABD\u65FF\u6653\u3AF2\u6692\u3B22\u6716\u3B42\u67A4\u6800\u3B58\u684A\u6884\u3B72\u3B71\u3B7B\u6909\u6943\u725C\u6964\u699F\u6985\u3BBC\u69D6\u3BDD\u6A65\u6A74\u6A71\u6A82\u3BEC\u6A99\u3BF2\u6AAB\u6AB5\u6AD4\u6AF6\u6B81\u6BC1\u6BEA\u6C75\u6CAA\u3CCB\u6D02\u6D06\u6D26\u6D81\u3CEF\u6DA4\u6DB1\u6E15\u6E18\u6E29\u6E86\u89C0\u6EBB\u6EE2\u6EDA\u9F7F\u6EE8\u6EE9\u6F24\u6F34\u3D46\u3F41\u6F81\u6FBE\u3D6A\u3D75\u71B7\u5C99\u3D8A\u702C\u3D91\u7050\u7054\u706F\u707F\u7089\u0325\u43C1\u35F1\u0ED8\u3ED7\u57BE\u6ED3\u713E\u57E0\u364E\u69A2\u8BE9\u5B74\u7A49\u58E1\u94D9\u7A65\u7A7D\u59AC\u7ABB\u7AB0\u7AC2\u7AC3\u71D1\u648D\u41CA\u7ADA\u7ADD\u7AEA\u41EF\u54B2\u5C01\u7B0B\u7B55\u7B29\u530E\u5CFE\u7BA2\u7B6F\u839C\u5BB4\u6C7F\u7BD0\u8421\u7B92\u7BB8\u5D20\u3DAD\u5C65\u8492\u7BFA\u7C06\u7C35\u5CC1\u7C44\u7C83\u4882\u7CA6\u667D\u4578\u7CC9\u7CC7\u7CE6\u7C74\u7CF3\u7CF5\u7CCE\u7E67\u451D\u6E44\u7D5D\u6ED6\u748D\u7D89\u7DAB\u7135\u7DB3\u7DD2\u4057\u6029\u7DE4\u3D13\u7DF5\u17F9\u7DE5\u836D\u7E1D\u6121\u615A\u7E6E\u7E92\u432B\u946C\u7E27\u7F40\u7F41\u7F47\u7936\u62D0\u99E1\u7F97\u6351\u7FA3\u1661\u0068\u455C\u3766\u4503\u833A\u7FFA\u6489\u8005\u8008\u801D\u8028\u802F\uA087\u6CC3\u803B\u803C\u8061\u2714\u4989\u6626\u3DE3\u66E8\u6725\u80A7\u8A48\u8107\u811A\u58B0\u26F6\u6C7F\u6498\u4FB8\u64E7\u148A\u8218\u185E\u6A53\u4A65\u4A95\u447A\u8229\u0B0D\u6A52\u3D7E\u4FF9\u14FD\u84E2\u8362\u6B0A\u49A7\u3530\u1773\u3DF8\u82AA\u691B\uF994\u41DB\u854B\u82D0\u831A\u0E16\u17B4\u36C1\u317D\u355A\u827B\u82E2\u8318\u3E8B\u6DA3\u6B05\u6B97\u35CE\u3DBF\u831D\u55EC\u8385\u450B\u6DA5\u83AC\u83C1\u83D3\u347E\u6ED4\u6A57\u855A\u3496\u6E42\u2EEF\u8458\u5BE4\u8471\u3DD3\u44E4\u6AA7\u844A\u3CB5\u7958\u84A8\u6B96\u6E77\u6E43\u84DE\u840F\u8391\u44A0\u8493\u84E4\u5C91\u4240\u5CC0\u4543\u8534\u5AF2\u6E99\u4527\u8573\u4516\u67BF\u8616\u8625\u863B\u85C1\u7088\u8602\u1582\u70CD\uF9B2\u456A\u8628\u3648\u18A2\u53F7\u739A\u867E\u8771\uA0F8\u87EE\u2C27\u87B1\u87DA\u880F\u5661\u866C\u6856\u460F\u8845\u8846\u75E0\u3DB9\u75E4\u885E\u889C\u465B\u88B4\u88B5\u63C1\u88C5\u7777\u770F\u8987\u898A\u89A6\u89A9\u89A7\u89BC\u8A25\u89E7\u7924\u7ABD\u8A9C\u7793\u91FE\u8A90\u7A59\u7AE9\u7B3A\u3F8F\u4713\u7B38\u717C\u8B0C\u8B1F\u5430\u5565\u8B3F\u8B4C\u8B4D\u8AA9\u4A7A\u8B90\u8B9B\u8AAF\u16DF\u4615\u884F\u8C9B\u7D54\u7D8F\uF9D4\u3725\u7D53\u8CD6\u7D98\u7DBD\u8D12\u8D03\u1910\u8CDB\u705C\u8D11\u4CC9\u3ED0\u8D77\u8DA9\u8002\u1014\u498A\u3B7C\u81BC\u710C\u7AE7\u8EAD\u8EB6\u8EC3\u92D4\u8F19\u8F2D\u8365\u8412\u8FA5\u9303\uA29F\u0A50\u8FB3\u492A\u89DE\u853D\u3DBB\u5EF8\u3262\u8FF9\uA014\u86BC\u8501\u2325\u3980\u6ED7\u9037\u853C\u7ABE\u9061\u856C\u860B\u90A8\u8713\u90C4\u86E6\u90AE\u90FD\u9167\u3AF0\u91A9\u91C4\u7CAC\u8933\u1E89\u920E\u6C9F\u9241\u9262\u55B9\u92B9\u8AC6\u3C9B\u8B0C\u55DB\u0D31\u932C\u936B\u8AE1\u8BEB\u708F\u5AC3\u8AE2\u8AE5\u4965\u9244\u8BEC\u8C39\u8BFF\u9373\u945B\u8EBC\u9585\u95A6\u9426\u95A0\u6FF6\u42B9\u267A\u86D8\u127C\u3E2E\u49DF\u6C1C\u967B\u9696\u416C\u96A3\u6ED5\u61DA\u96B6\u78F5\u8AE0\u96BD\u53CC\u49A1\u6CB8\u0274\u6410\u90AF\u90E5\u4AD1\u1915\u330A\u9731\u8642\u9736\u4A0F\u453D\u4585\u4AE9\u7075\u5B41\u971B\u975C\u91D5\u9757\u5B4A\u91EB\u975F\u9425\u50D0\u30B7\u30BC\u9789\u979F\u97B1\u97BE\u97C0\u97D2\u97E0\u546C\u97EE\u741C\u9433\u97FF\u97F5\u941D\u797A\u4AD1\u9834\u9833\u984B\u9866\u3B0E\u7175\u3D51\u0630\u415C\u5706\u98CA\u98B7\u98C8\u98C7\u4AFF\u6D27\u16D3\u55B0\u98E1\u98E6\u98EC\u9378\u9939\u4A29\u4B72\u9857\u9905\u99F5\u9A0C\u9A3B\u9A10\u9A58\u5725\u36C4\u90B1\u9BD5\u9AE0\u9AE2\u9B05\u9AF4\u4C0E\u9B14\u9B2D\u8600\u5034\u9B34\u69A8\u38C3\u307D\u9B50\u9B40\u9D3E\u5A45\u1863\u9B8E\u424B\u9C02\u9BFF\u9C0C\u9E68\u9DD4\u9FB7\uA192\uA1AB\uA0E1\uA123\uA1DF\u9D7E\u9D83\uA134\u9E0E\u6888\u9DC4\u215B\uA193\uA220\u193B\uA233\u9D39\uA0B9\uA2B4\u9E90\u9E95\u9E9E\u9EA2\u4D34\u9EAA\u9EAF\u4364\u9EC1\u3B60\u39E5\u3D1D\u4F32\u37BE\u8C2B\u9F02\u9F08\u4B96\u9424\u6DA2\u9F17\u9F16\u9F39\u569F\u568A\u9F45\u99B8\u908B\u97F2\u847F\u9F62\u9F69\u7ADC\u9F8E\u7216\u4BBE\u4975\u49BB\u7177\u49F8\u4348\u4A51\u739E\u8BDA\u18FA\u799F\u897E\u8E36\u9369\u93F3\u8A44\u92EC\u9381\u93CB\u896C\u44B9\u7217\u3EEB\u7772\u7A43\u70D0\u4473\u43F8\u717E\u17EF\u70A3\u18BE\u3599\u3EC7\u1885\u542F\u17F8\u3722\u16FB\u1839\u36E1\u1774\u18D1\u5F4B\u3723\u16C0\u575B\u4A25\u13FE\u12A8\u13C6\u14B6\u8503\u36A6\u8503\u8455\u4994\u7165\u3E31\u555C\u3EFB\u7052\u44F4\u36EE\u999D\u6F26\u67F9\u3733\u3C15\u3DE7\u586C\u1922\u6810\u4057\u373F\u40E1\u408B\u410F\u6C21\u54CB\u569E\u66B1\u5692\u0FDF\u0BA8\u0E0D\u93C6\u8B13\u939C\u4EF8\u512B\u3819\u4436\u4EBC\u0465\u037F\u4F4B\u4F8A\u5651\u5A68\u01AB\u03CB\u3999\u030A\u0414\u3435\u4F29\u02C0\u8EB3\u0275\u8ADA\u020C\u4E98\u50CD\u510D\u4FA2\u4F03\u4A0E\u3E8A\u4F42\u502E\u506C\u5081\u4FCC\u4FE5\u5058\u50FC\u5159\u515B\u515D\u515E\u6E76\u3595\u3E39\u3EBF\u6D72\u1884\u3E89\u51A8\u51C3\u05E0\u44DD\u04A3\u0492\u0491\u8D7A\u8A9C\u070E\u5259\u52A4\u0873\u52E1\u936E\u467A\u718C\u438C\u0C20\u49AC\u10E4\u69D1\u0E1D\u7479\u3EDE\u7499\u7414\u7456\u7398\u4B8E\u4ABC\u408D\u53D0\u3584\u720F\u40C9\u55B4\u0345\u54CD\u0BC6\u571D\u925D\u96F4\u9366\u57DD\u578D\u577F\u363E\u58CB\u5A99\u8A46\u16FA\u176F\u1710\u5A2C\u59B8\u928F\u5A7E\u5ACF\u5A12\u5946\u19F3\u1861\u4295\u36F5\u6D05\u7443\u5A21\u5E83\u5A81\u8BD7\u0413\u93E0\u748C\u1303\u7105\u4972\u9408\u89FB\u93BD\u37A0\u5C1E\u5C9E\u5E5E\u5E48\u1996\u197C\u3AEE\u5ECD\u5B4F\u1903\u1904\u3701\u18A0\u36DD\u16FE\u36D3\u812A\u8A47\u1DBA\u3472\u89A8\u5F0C\u5F0E\u1927\u17AB\u5A6B\u173B\u5B44\u8614\u75FD\u8860\u607E\u2860\u262B\u5FDB\u3EB8\u25AF\u25BE\u9088\u6F73\u61C0\u003E\u0046\u261B\u6199\u6198\u6075\u2C9B\u2D07\u46D4\u914D\u6471\u4665\u2B6A\u3A29\u2B22\u3450\u98EA\u2E78\u6337\uA45B\u64B6\u6331\u63D1\u49E3\u2D67\u62A4\u2CA1\u643B\u656B\u6972\u3BF4\u308E\u32AD\u4989\u32AB\u550D\u32E0\u18D9\u943F\u66CE\u3289\u31B3\u3AE0\u4190\u5584\u8B22\u558F\u16FC\u555B\u5425\u78EE\u3103\u182A\u3234\u3464\u320F\u3182\u42C9\u668E\u6D24\u666B\u4B93\u6630\u7870\u1DEB\u6663\u32D2\u32E1\u661E\u5872\u38D1\u383A\u37BC\u3B99\u37A2\u33FE\u74D0\u3B96\u678F\u462A\u68B6\u681E\u3BC4\u6ABE\u3863\u37D5\u4487\u6A33\u6A52\u6AC9\u6B05\u1912\u6511\u6898\u6A4C\u3BD7\u6A7A\u6B57\u3FC0\u3C9A\u93A0\u92F2\u8BEA\u8ACB\u9289\u801E\u89DC\u9467\u6DA5\u6F0B\u49EC\u6D67\u3F7F\u3D8F\u6E04\u403C\u5A3D\u6E0A\u5847\u6D24\u7842\u713B\u431A\u4276\u70F1\u7250\u7287\u7294\u478F\u4725\u5179\u4AA4\u05EB\u747A\u3EF8\u365F\u4A4A\u4917\u5FE1\u3F06\u3EB1\u4ADF\u8C23\u3F35\u60A7\u3EF3\u74CC\u743C\u9387\u7437\u449F\u6DEA\u4551\u7583\u3F63\u4CD9\u4D06\u3F58\u7555\u7673\uA5C6\u3B19\u7468\u8ACC\u49AB\u498E\u3AFB\u3DCD\u4A4E\u3EFF\u49C5\u48F3\u91FA\u5732\u9342\u8AE3\u1864\u50DF\u5221\u51E7\u7778\u3232\u770E\u770F\u777B\u4697\u3781\u3A5E\u48F0\u7438\u749B\u3EBF\u4ABA\u4AC7\u40C8\u4A96\u61AE\u9307\u5581\u781E\u788D\u7888\u78D2\u73D0\u7959\u7741\u56E3\u410E\u799B\u8496\u79A5\u6A2D\u3EFA\u7A3A\u79F4\u416E\u16E6\u4132\u9235\u79F1\u0D4C\u498C\u0299\u3DBA\u176E\u3597\u556B\u3570\u36AA\u01D4\u0C0D\u7AE2\u5A59\u26F5\u5AAF\u5A9C\u5A0D\u025B\u78F0\u5A2A\u5BC6\u7AFE\u41F9\u7C5D\u7C6D\u4211\u5BB3\u5EBC\u5EA6\u7CCD\u49F9\u17B0\u7C8E\u7C7C\u7CAE\u6AB2\u7DDC\u7E07\u7DD3\u7F4E\u6261\u615C\u7B48\u7D97\u5E82\u426A\u6B75\u0916\u67D6\u004E\u35CF\u57C4\u6412\u63F8\u4962\u7FDD\u7B27\u082C\u5AE9\u5D43\u7B0C\u5E0E\u99E6\u8645\u9A63\u6A1C\u343F\u39E2\u49F7\u65AD\u9A1F\u65A0\u8480\u7127\u6CD1\u44EA\u8137\u4402\u80C6\u8109\u8142\u67B4\u98C3\u6A42\u8262\u8265\u6A51\u8453\u6DA7\u8610\u721B\u5A86\u417F\u1840\u5B2B\u18A1\u5AE4\u18D8\u86A0\uF9BC\u3D8F\u882D\u7422\u5A02\u886E\u4F45\u8887\u88BF\u88E6\u8965\u894D\u5683\u8954\u7785\u7784\u8BF5\u8BD9\u8B9C\u89F9\u3EAD\u84A3\u46F5\u46CF\u37F2\u8A3D\u8A1C\u9448\u5F4D\u922B\u4284\u65D4\u7129\u70C4\u1845\u9D6D\u8C9F\u8CE9\u7DDC\u599A\u77C3\u59F0\u436E\u36D4\u8E2A\u8EA7\u4C09\u8F30\u8F4A\u42F4\u6C58\u6FBB\u2321\u489B\u6F79\u6E8B\u17DA\u9BE9\u36B5\u492F\u90BB\u9097\u5571\u4906\u91BB\u9404\u8A4B\u4062\u8AFC\u9427\u8C1D\u8C3B\u84E5\u8A2B\u9599\u95A7\u9597\u9596\u8D34\u7445\u3EC2\u48FF\u4A42\u43EA\u3EE7\u3225\u968F\u8EE7\u8E66\u8E65\u3ECC\u49ED\u4A78\u3FEE\u7412\u746B\u3EFC\u9741\u90B0\u6847\u4A1D\u9093\u57DF\u975D\u9368\u8989\u8C26\u8B2F\u63BE\u92BA\u5B11\u8B69\u493C\u73F9\u421B\u979B\u9771\u9938\u0F26\u5DC1\u8BC5\u4AB2\u981F\u94DA\u92F6\u95D7\u91E5\u44C0\u8B50\u4A67\u8B64\u98DC\u8A45\u3F00\u922A\u4925\u8414\u993B\u994D\u7B06\u3DFD\u999B\u4B6F\u99AA\u9A5C\u8B65\u58C8\u6A8F\u9A21\u5AFE\u9A2F\u98F1\u4B90\u9948\u99BC\u4BBD\u4B97\u937D\u5872\u1302\u5822\u49B8\u14E8\u7844\u271F\u3DB8\u68C5\u3D7D\u9458\u3927\u6150\u2781\u296B\u6107\u9C4F\u9C53\u9C7B\u9C35\u9C10\u9B7F\u9BCF\u9E2D\u9B9F\uA1F5\uA0FE\u9D21\u4CAE\u4104\u9E18\u4CB0\u9D0C\uA1B4\uA0ED\uA0F3\u992F\u9DA5\u84BD\u6E12\u6FDF\u6B82\u85FC\u4533\u6DA4\u6E84\u6DF0\u8420\u85EE\u6E00\u37D7\u6064\u79E2\u359C\u3640\u492D\u49DE\u3D62\u93DB\u92BE\u9348\u02BF\u78B9\u9277\u944D\u4FE4\u3440\u9064\u555D\u783D\u7854\u78B6\u784B\u1757\u31C9\u4941\u369A\u4F72\u6FDA\u6FD9\u701E\u701E\u5414\u41B5\u57BB\u58F3\u578A\u9D16\u57D7\u7134\u34AF\u41AC\u71EB\u6C40\u4F97\u5B28\u17B5\u8A49\u610C\u5ACE\u5A0B\u42BC\u4488\u372C\u4B7B\u89FC\u93BB\u93B8\u18D6\u0F1D\u8472\u6CC0\u1413\u42FA\u2C26\u43C1\u5994\u3DB7\u6741\u7DA8\u615B\u60A4\u49B9\u498B\u89FA\u92E5\u73E2\u3EE9\u74B4\u8B63\u189F\u3EE1\u4AB3\u6AD8\u73F3\u73FB\u3ED6\u4A3E\u4A94\u17D9\u4A66\u03A7\u1424\u49E5\u7448\u4916\u70A5\u4976\u9284\u73E6\u935F\u04FE\u9331\u8ACE\u8A16\u9386\u8BE7\u55D5\u4935\u8A82\u716B\u4943\u0CFF\u56A4\u061A\u0BEB\u0CB8\u5502\u79C4\u17FA\u7DFE\u16C2\u4A50\u1852\u452E\u9401\u370A\u8AC0\u49AD\u59B0\u18BF\u1883\u7484\u5AA1\u36E2\u3D5B\u36B0\u925F\u5A79\u8A81\u1862\u9374\u3CCD\u0AB4\u4A96\u398A\u50F4\u3D69\u3D4C\u139C\u7175\u42FB\u8218\u6E0F\u90E4\u44EB\u6D57\u7E4F\u7067\u6CAF\u3CD6\u3FED\u3E2D\u6E02\u6F0C\u3D6F\u03F5\u7551\u36BC\u34C8\u4680\u3EDA\u4871\u59C4\u926E\u493E\u8F41\u8C1C\u6BC0\u5812\u57C8\u36D6\u1452\u70FE\u4362\u4A71\u2FE3\u12B0\u23BD\u68B9\u6967\u1398\u34E5\u7BF4\u36DF\u8A83\u37D6\u33FA\u4C9F\u6A1A\u36AD\u6CB7\u843E\u44DF\u44CE\u6D26\u6D51\u6C82\u6FDE\u6F17\u7109\u833D\u173A\u83ED\u6C80\u7053\u17DB\u5989\u5A82\u17B3\u5A61\u5A71\u1905\u41FC\u372D\u59EF\u173C\u36C7\u718E\u9390\u669A\u42A5\u5A6E\u5A2B\u4293\u6A2B\u3EF9\u7736\u445B\u42CA\u711D\u4259\u89E1\u4FB0\u6D28\u5CC2\u44CE\u7E4D\u43BD\u6A0C\u4256\u1304\u70A6\u7133\u43E9\u3DA5\u6CDF\uF825\u4A4F\u7E65\u59EB\u5D2F\u3DF3\u5F5C\u4A5D\u17DF\u7DA4\u8426\u5485\u3AFA\u3300\u0214\u577E\u08D5\u0619\u3FE5\u1F9E\uA2B6\u7003\u915B\u5D70\u738F\u7CD3\u8A59\u9420\u4FC8\u7FE7\u72CD\u7310\u7AF4\u7338\u7339\u56F6\u7341\u7348\u3EA9\u7B18\u906C\u71F5\u48F2\u73E1\u81F6\u3ECA\u770C\u3ED1\u6CA2\u56FD\u7419\u741E\u741F\u3EE2\u3EF0\u3EF4\u3EFA\u74D3\u3F0E\u3F53\u7542\u756D\u7572\u758D\u3F7C\u75C8\u75DC\u3FC0\u764D\u3FD7\u7674\u3FDC\u767A\u4F5C\u7188\u5623\u8980\u5869\u401D\u7743\u4039\u6761\u4045\u35DB\u7798\u406A\u406F\u5C5E\u77BE\u77CB\u58F2\u7818\u70B9\u781C\u40A8\u7839\u7847\u7851\u7866\u8448\u5535\u7933\u6803\u7932\u4103\u4109\u7991\u7999\u8FBB\u7A06\u8FBC\u4167\u7A91\u41B2\u7ABC\u8279\u41C4\u7ACF\u7ADB\u41CF\u4E21\u7B62\u7B6C\u7B7B\u7C12\u7C1B\u4260\u427A\u7C7B\u7C9C\u428C\u7CB8\u4294\u7CED\u8F93\u70C0\u0CCF\u7DCF\u7DD4\u7DD0\u7DFD\u7FAE\u7FB4\u729F\u4397\u8020\u8025\u7B39\u802E\u8031\u8054\u3DCC\u57B4\u70A0\u80B7\u80E9\u43ED\u810C\u732A\u810E\u8112\u7560\u8114\u4401\u3B39\u8156\u8159\u815A\u4413\u583A\u817C\u8184\u4425\u8193\u442D\u81A5\u57EF\u81C1\u81E4\u8254\u448F\u82A6\u8276\u82CA\u82D8\u82FF\u44B0\u8357\u9669\u698A\u8405\u70F5\u8464\u60E3\u8488\u4504\u84BE\u84E1\u84F8\u8510\u8538\u8552\u453B\u856F\u8570\u85E0\u4577\u8672\u8692\u86B2\u86EF\u9645\u878B\u4606\u4617\u88AE\u88FF\u8924\u8947\u8991\u7967\u8A29\u8A38\u8A94\u8AB4\u8C51\u8CD4\u8CF2\u8D1C\u4798\u585F\u8DC3\u47ED\u4EEE\u8E3A\u55D8\u5754\u8E71\u55F5\u8EB0\u4837\u8ECE\u8EE2\u8EE4\u8EED\u8EF2\u8FB7\u8FC1\u8FCA\u8FCC\u9033\u99C4\u48AD\u98E0\u9213\u491E\u9228\u9258\u926B\u92B1\u92AE\u92BF\u92E3\u92EB\u92F3\u92F4\u92FD\u9343\u9384\u93AD\u4945\u4951\u9EBF\u9417\u5301\u941D\u942D\u943E\u496A\u9454\u9479\u952D\u95A2\u49A7\u95F4\u9633\u49E5\u67A0\u4A24\u9740\u4A35\u97B2\u97C2\u5654\u4AE4\u60E8\u98B9\u4B19\u98F1\u5844\u990E\u9919\u51B4\u991C\u9937\u9942\u995D\u9962\u4B70\u99C5\u4B9D\u9A3C\u9B0F\u7A83\u9B69\u9B81\u9BDD\u9BF1\u9BF4\u4C6D\u9C20\u376F\u1BC2\u9D49\u9C3A\u9EFE\u5650\u9D93\u9DBD\u9DC0\u9DFC\u94F6\u8FB6\u9E7B\u9EAC\u9EB1\u9EBD\u9EC6\u94DC\u9EE2\u9EF1\u9EF8\u7AC8\u9F44\u0094\u02B7\u03A0\u691A\u94C3\u59AC\u04D7\u5840\u94C1\u37B9\u05D5\u0615\u0676\u16BA\u5757\u7173\u0AC2\u0ACD\u0BBF\u546A\uF83B\u0BCB\u549E\u0BFB\u0C3B\u0C53\u0C65\u0C7C\u60E7\u0C8D\u567A\u0CB5\u0CDD\u0CED\u0D6F\u0DB2\u0DC8\u6955\u9C2F\u87A5\u0E04\u0E0E\u0ED7\u0F90\u0F2D\u0E73\u5C20\u0FBC\u5E0B\u105C\u104F\u1076\u671E\u107B\u1088\u1096\u3647\u10BF\u10D3\u112F\u113B\u5364\u84AD\u12E3\u1375\u1336\u8B81\u1577\u1619\u17C3\u17C7\u4E78\u70BB\u182D\u196A\u1A2D\u1A45\u1C2A\u1C70\u1CAC\u1EC8\u62C3\u1ED5\u1F15\u7198\u6855\u2045\u69E9\u36C8\u227C\u23D7\u23FA\u272A\u2871\u294F\u82FD\u2967\u2993\u2AD5\u89A5\u2AE8\u8FA0\u2B0E\u97B8\u2B3F\u9847\u9ABD\u2C4C\u0000\u2C88\u2CB7\u5BE8\u2D08\u2D12\u2DB7\u2D95\u2E42\u2F74\u2FCC\u3033\u3066\u331F\u33DE\u5FB1\u6648\u66BF\u7A79\u3567\u35F3\u7201\u49BA\u77D7\u361A\u3716\u7E87\u0346\u58B5\u670E\u6918\u3AA7\u7657\u5FE2\u3E11\u3EB9\u75FE\u209A\u48D0\u4AB8\u4119\u8A9A\u42EE\u430D\u403B\u4334\u4396\u4A45\u05CA\u51D2\u0611\u599F\u1EA8\u3BBE\u3CFF\u4404\u44D6\u5788\u4674\u399B\u472F\u85E8\u99C9\u3762\u21C3\u8B5E\u8B4E\u99D6\u4812\u48FB\u4A15\u7209\u4AC0\u0C78\u5965\u4EA5\u4F86\u0779\u8EDA\u502C\u528F\u573F\u7171\u5299\u5419\u3F4A\u4AA7\u55BC\u5446\u546E\u6B52\u91D4\u3473\u553F\u7632\u555E\u4718\u5562\u5566\u57C7\u493F\u585D\u5066\u34FB\u33CC\u60DE\u5903\u477C\u8948\u5AAE\u5B89\u5C06\u1D90\u57A1\u7151\u6FB6\u6102\u7C12\u9056\u61B2\u4F9A\u8B62\u6402\u644A\u5D5B\u6BF7\u8F36\u6484\u191C\u8AEA\u49F6\u6488\u3FEF\u6512\u4BC0\u65BF\u66B5\u271B\u9465\u57E1\u6195\u5A27\uF8CD\u4FBB\u56B9\u4521\u66FC\u4E6A\u4934\u9656\u6D8F\u6CBD\u3618\u8977\u6799\u686E\u6411\u685E\u71DF\u68C7\u7B42\u90C0\u0A11\u6926\u9104\u6939\u7A45\u9DF0\u69FA\u9A26\u6A2D\u365F\u6469\u0021\u7983\u6A34\u6B5B\u5D2C\u3519\u83CF\u6B9D\u46D0\u6CA4\u753B\u8865\u6DAE\u58B6\u371C\u258D\u704B\u71CD\u3C54\u7280\u7285\u9281\u217A\u728B\u9330\u72E6\u49D0\u6C39\u949F\u7450\u0EF8\u8827\u88F5\u2926\u8473\u17B1\u6EB8\u4A2A\u1820\u39A4\u36B9\u5C10\u79E3\u453F\u66B6\u9CAD\u98A4\u8943\u77CC\u7858\u56D6\u40DF\u160A\u39A1\u372F\u80E8\u13C5\u71AD\u8366\u79DD\u91A8\u5A67\u4CB7\u70AF\u89AB\u79FD\u7A0A\u7B0B\u7D66\u417A\u7B43\u797E\u8009\u6FB5\uA2DF\u6A03\u8318\u53A2\u6E07\u93BF\u6836\u975D\u816F\u8023\u69B5\u13ED\u322F\u8048\u5D85\u8C30\u8083\u5715\u9823\u8949\u5DAB\u4988\u65BE\u69D5\u53D2\u4AA5\u3F81\u3C11\u6736\u8090\u80F4\u812E\u1FA1\u814F\u8189\u81AF\u821A\u8306\u832F\u838A\u35CA\u8468\u86AA\u48FA\u63E6\u8956\u7808\u9255\u89B8\u43F2\u89E7\u43DF\u89E8\u8B46\u8BD4\u59F8\u8C09\u8F0B\u8FC5\u90EC\u7B51\u9110\u913C\u3DF7\u915E\u4ACA\u8FD0\u728F\u568B\u94E7\u95E9\u95B0\u95B8\u9732\u98D1\u9949\u996A\u99C3\u9A28\u9B0E\u9D5A\u9D9B\u7E9F\u9EF8\u9F23\u4CA4\u9547\uA293\u71A2\uA2FF\u4D91\u9012\uA5CB\u4D9C\u0C9C\u8FBE\u55C1\u8FBA\u24B0\u8FB9\u4A93\u4509\u7E7F\u6F56\u6AB1\u4EEA\u34E4\u8B2C\u789D\u373A\u8E80\u17F5\u8024\u8B6C\u8B99\u7A3E\u66AF\u3DEB\u7655\u3CB7\u5635\u5956\u4E9A\u5E81\u6258\u56BF\u0E6D\u8E0E\u5B6D\u3E88\u4C9E\u63DE\u62D0\u17F6\u187B\u6530\u562D\u5C4A\u541A\u5311\u3DC6\u9D98\u4C7D\u5622\u561E\u7F49\u5ED8\u5975\u3D40\u8770\u4E1C\u0FEA\u0D49\u36BA\u8117\u9D5E\u8D18\u763B\u9C45\u764E\u77B9\u9345\u5432\u8148\u82F7\u5625\u8132\u8418\u80BD\u55EA\u7962\u5643\u5416\u0E9D\u35CE\u5605\u55F1\u66F1\u82E2\u362D\u7534\u55F0\u55BA\u5497\u5572\u0C41\u0C96\u5ED0\u5148\u0E76\u2C62\u0EA2\u9EAB\u7D5A\u55DE\u1075\u629D\u976D\u5494\u8CCD\u71F6\u9176\u63FC\u63B9\u63FE\u5569\u2B43\u9C72\u2EB3\u519A\u34DF\u0DA7\u51A7\u544D\u551E\u5513\u7666\u8E2D\u688A\u75B1\u80B6\u8804\u8786\u88C7\u81B6\u841C\u10C1\u44EC\u7304\u4706\u5B90\u830B\u6893\u567B\u26F4\u7D2F\u41A3\u7D73\u6ED0\u72B6\u9170\u11D9\u9208\u3CFC\uA6A9\u0EAC\u0EF9\u7266\u1CA2\u474E\u4FC2\u7FF9\u0FEB\u40FA\u9C5D\u651F\u2DA0\u48F3\u47E0\u9D7C\u0FEC\u0E0A\u6062\u75A3\u0FED\u0000\u6048\u1187\u71A3\u7E8E\u9D50\u4E1A\u4E04\u3577\u5B0D\u6CB2\u5367\u36AC\u39DC\u537D\u36A5\u4618\u589A\u4B6E\u822D\u544B\u57AA\u5A95\u0979\u0000\u3A52\u2465\u7374\u9EAC\u4D09\u9BED\u3CFE\u9F30\u4C5B\u4FA9\u959E\u9FDE\u845C\u3DB6\u72B2\u67B3\u3720\u632E\u7D25\u3EF7\u3E2C\u3A2A\u9008\u52CC\u3E74\u367A\u45E9\u048E\u7640\u5AF0\u0EB6\u787A\u7F2E\u58A7\u40BF\u567C\u9B8B\u5D74\u7654\uA434\u9E85\u4CE1\u75F9\u37FB\u6119\u30DA\u43F2\u0000\u565D\u12A9\u57A7\u4963\u9E06\u5234\u70AE\u35AD\u6C4A\u9D7C\u7C56\u9B39\u57DE\u176C\u5C53\u64D3\u94D0\u6335\u7164\u86AD\u0D28\u6D22\u4AE2\u0D71\u0000\u51FE\u1F0F\u5D8E\u9703\u1DD1\u9E81\u904C\u7B1F\u9B02\u5CD1\u7BA3\u6268\u6335\u9AFF\u7BCF\u9B2A\u7C7E\u9B2E\u7C42\u7C86\u9C15\u7BFC\u9B09\u9F17\u9C1B\u493E\u9F5A\u5573\u5BC3\u4FFD\u9E98\u4FF2\u5260\u3E06\u52D1\u5767\u5056\u59B7\u5E12\u97C8\u9DAB\u8F5C\u5469\u97B4\u9940\u97BA\u532C\u6130\u692C\u53DA\u9C0A\u9D02\u4C3B\u9641\u6980\u50A6\u7546\u176D\u99DA\u5273\u0000\u9159\u9681\u915C\u0000\u9151\u8E97\u637F\u6D23\u6ACA\u5611\u918E\u757A\u6285\u03FC\u734F\u7C70\u5C21\u3CFD\u0000\u4919\u76D6\u9B9D\u4E2A\u0CD4\u83BE\u8842\u0000\u5C4A\u69C0\u50ED\u577A\u521F\u5DF5\u4ECE\u6C31\u01F2\u4F39\u549C\u54DA\u529A\u8D82\u35FE\u5F0C\u35F3\u0000\u6B52\u917C\u9FA5\u9B97\u982E\u98B4\u9ABA\u9EA8\u9E84\u717A\u7B14\u0000\u6BFA\u8818\u7F78\u0000\u5620\uA64A\u8E77\u9F53\u0000\u8DD4\u8E4F\u9E1C\u8E01\u6282\u837D\u8E28\u8E75\u7AD3\u4A77\u7A3E\u78D8\u6CEA\u8A67\u7607\u8A5A\u9F26\u6CCE\u87D6\u75C3\uA2B2\u7853\uF840\u8D0C\u72E2\u7371\u8B2D\u7302\u74F1\u8CEB\u4ABB\u862F\u5FBA\u88A0\u44B7\u0000\u183B\u6E05\u0000\u8A7E\u251B\u0000\u60FD\u7667\u9AD7\u9D44\u936E\u9B8F\u87F5\u0000\u880F\u8CF7\u732C\u9721\u9BB0\u35D6\u72B2\u4C07\u7C51\u994A\u6159\u6159\u4C04\u9E96\u617D\u0000\u575F\u616F\u62A6\u6239\u62CE\u3A5C\u61E2\u53AA\u33F5\u6364\u6802\u35D2\u5D57\u8BC2\u8FDA\u8E39\u0000\u50D9\u1D46\u7906\u5332\u9638\u0F3B\u4065\u0000\u77FE\u0000\u7CC2\u5F1A\u7CDA\u7A2D\u8066\u8063\u7D4D\u7505\u74F2\u8994\u821A\u670C\u8062\u7486\u805B\u74F0\u8103\u7724\u8989\u67CC\u7553\u6ED1\u87A9\u87CE\u81C8\u878C\u8A49\u8CAD\u8B43\u772B\u74F8\u84DA\u3635\u69B2\u8DA6\u0000\u89A9\u7468\u6DB9\u87C1\u4011\u74E7\u3DDB\u7176\u60A4\u619C\u3CD1\u7162\u6077\u0000\u7F71\u8B2D\u7250\u60E9\u4B7E\u5220\u3C18\u3CC7\u5ED7\u7656\u5531\u1944\u12FE\u9903\u6DDC\u70AD\u5CC1\u61AD\u8A0F\u3677\u00EE\u6846\u4F0E\u4562\u5B1F\u634C\u9F50\u9EA6\u626B\u3000\uFF0C\u3001\u3002\uFF0E\u2027\uFF1B\uFF1A\uFF1F\uFF01\uFE30\u2026\u2025\uFE50\uFE51\uFE52\u00B7\uFE54\uFE55\uFE56\uFE57\uFF5C\u2013\uFE31\u2014\uFE33\u2574\uFE34\uFE4F\uFF08\uFF09\uFE35\uFE36\uFF5B\uFF5D\uFE37\uFE38\u3014\u3015\uFE39\uFE3A\u3010\u3011\uFE3B\uFE3C\u300A\u300B\uFE3D\uFE3E\u3008\u3009\uFE3F\uFE40\u300C\u300D\uFE41\uFE42\u300E\u300F\uFE43\uFE44\uFE59\uFE5A\uFE5B\uFE5C\uFE5D\uFE5E\u2018\u2019\u201C\u201D\u301D\u301E\u2035\u2032\uFF03\uFF06\uFF0A\u203B\u00A7\u3003\u25CB\u25CF\u25B3\u25B2\u25CE\u2606\u2605\u25C7\u25C6\u25A1\u25A0\u25BD\u25BC\u32A3\u2105\u00AF\uFFE3\uFF3F\u02CD\uFE49\uFE4A\uFE4D\uFE4E\uFE4B\uFE4C\uFE5F\uFE60\uFE61\uFF0B\uFF0D\u00D7\u00F7\u00B1\u221A\uFF1C\uFF1E\uFF1D\u2266\u2267\u2260\u221E\u2252\u2261\uFE62\uFE63\uFE64\uFE65\uFE66\uFF5E\u2229\u222A\u22A5\u2220\u221F\u22BF\u33D2\u33D1\u222B\u222E\u2235\u2234\u2640\u2642\u2295\u2299\u2191\u2193\u2190\u2192\u2196\u2197\u2199\u2198\u2225\u2223\uFF0F\uFF3C\u2215\uFE68\uFF04\uFFE5\u3012\uFFE0\uFFE1\uFF05\uFF20\u2103\u2109\uFE69\uFE6A\uFE6B\u33D5\u339C\u339D\u339E\u33CE\u33A1\u338E\u338F\u33C4\u00B0\u5159\u515B\u515E\u515D\u5161\u5163\u55E7\u74E9\u7CCE\u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588\u258F\u258E\u258D\u258C\u258B\u258A\u2589\u253C\u2534\u252C\u2524\u251C\u2594\u2500\u2502\u2595\u250C\u2510\u2514\u2518\u256D\u256E\u2570\u256F\u2550\u255E\u256A\u2561\u25E2\u25E3\u25E5\u25E4\u2571\u2572\u2573\uFF10\uFF11\uFF12\uFF13\uFF14\uFF15\uFF16\uFF17\uFF18\uFF19\u2160\u2161\u2162\u2163\u2164\u2165\u2166\u2167\u2168\u2169\u3021\u3022\u3023\u3024\u3025\u3026\u3027\u3028\u3029\u5341\u5344\u5345\uFF21\uFF22\uFF23\uFF24\uFF25\uFF26\uFF27\uFF28\uFF29\uFF2A\uFF2B\uFF2C\uFF2D\uFF2E\uFF2F\uFF30\uFF31\uFF32\uFF33\uFF34\uFF35\uFF36\uFF37\uFF38\uFF39\uFF3A\uFF41\uFF42\uFF43\uFF44\uFF45\uFF46\uFF47\uFF48\uFF49\uFF4A\uFF4B\uFF4C\uFF4D\uFF4E\uFF4F\uFF50\uFF51\uFF52\uFF53\uFF54\uFF55\uFF56\uFF57\uFF58\uFF59\uFF5A\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF\u03C0\u03C1\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u3105\u3106\u3107\u3108\u3109\u310A\u310B\u310C\u310D\u310E\u310F\u3110\u3111\u3112\u3113\u3114\u3115\u3116\u3117\u3118\u3119\u311A\u311B\u311C\u311D\u311E\u311F\u3120\u3121\u3122\u3123\u3124\u3125\u3126\u3127\u3128\u3129\u02D9\u02C9\u02CA\u02C7\u02CB\u2400\u2401\u2402\u2403\u2404\u2405\u2406\u2407\u2408\u2409\u240A\u240B\u240C\u240D\u240E\u240F\u2410\u2411\u2412\u2413\u2414\u2415\u2416\u2417\u2418\u2419\u241A\u241B\u241C\u241D\u241E\u241F\u2421\u20AC";
+
+ private static final String TABLE3 = "\u4E00\u4E59\u4E01\u4E03\u4E43\u4E5D\u4E86\u4E8C\u4EBA\u513F\u5165\u516B\u51E0\u5200\u5201\u529B\u5315\u5341\u535C\u53C8\u4E09\u4E0B\u4E08\u4E0A\u4E2B\u4E38\u51E1\u4E45\u4E48\u4E5F\u4E5E\u4E8E\u4EA1\u5140\u5203\u52FA\u5343\u53C9\u53E3\u571F\u58EB\u5915\u5927\u5973\u5B50\u5B51\u5B53\u5BF8\u5C0F\u5C22\u5C38\u5C71\u5DDD\u5DE5\u5DF1\u5DF2\u5DF3\u5DFE\u5E72\u5EFE\u5F0B\u5F13\u624D\u4E11\u4E10\u4E0D\u4E2D\u4E30\u4E39\u4E4B\u5C39\u4E88\u4E91\u4E95\u4E92\u4E94\u4EA2\u4EC1\u4EC0\u4EC3\u4EC6\u4EC7\u4ECD\u4ECA\u4ECB\u4EC4\u5143\u5141\u5167\u516D\u516E\u516C\u5197\u51F6\u5206\u5207\u5208\u52FB\u52FE\u52FF\u5316\u5339\u5348\u5347\u5345\u535E\u5384\u53CB\u53CA\u53CD\u58EC\u5929\u592B\u592A\u592D\u5B54\u5C11\u5C24\u5C3A\u5C6F\u5DF4\u5E7B\u5EFF\u5F14\u5F15\u5FC3\u6208\u6236\u624B\u624E\u652F\u6587\u6597\u65A4\u65B9\u65E5\u66F0\u6708\u6728\u6B20\u6B62\u6B79\u6BCB\u6BD4\u6BDB\u6C0F\u6C34\u706B\u722A\u7236\u723B\u7247\u7259\u725B\u72AC\u738B\u4E19\u4E16\u4E15\u4E14\u4E18\u4E3B\u4E4D\u4E4F\u4E4E\u4EE5\u4ED8\u4ED4\u4ED5\u4ED6\u4ED7\u4EE3\u4EE4\u4ED9\u4EDE\u5145\u5144\u5189\u518A\u51AC\u51F9\u51FA\u51F8\u520A\u52A0\u529F\u5305\u5306\u5317\u531D\u4EDF\u534A\u5349\u5361\u5360\u536F\u536E\u53BB\u53EF\u53E4\u53F3\u53EC\u53EE\u53E9\u53E8\u53FC\u53F8\u53F5\u53EB\u53E6\u53EA\u53F2\u53F1\u53F0\u53E5\u53ED\u53FB\u56DB\u56DA\u5916\u592E\u5931\u5974\u5976\u5B55\u5B83\u5C3C\u5DE8\u5DE7\u5DE6\u5E02\u5E03\u5E73\u5E7C\u5F01\u5F18\u5F17\u5FC5\u620A\u6253\u6254\u6252\u6251\u65A5\u65E6\u672E\u672C\u672A\u672B\u672D\u6B63\u6BCD\u6C11\u6C10\u6C38\u6C41\u6C40\u6C3E\u72AF\u7384\u7389\u74DC\u74E6\u7518\u751F\u7528\u7529\u7530\u7531\u7532\u7533\u758B\u767D\u76AE\u76BF\u76EE\u77DB\u77E2\u77F3\u793A\u79BE\u7A74\u7ACB\u4E1E\u4E1F\u4E52\u4E53\u4E69\u4E99\u4EA4\u4EA6\u4EA5\u4EFF\u4F09\u4F19\u4F0A\u4F15\u4F0D\u4F10\u4F11\u4F0F\u4EF2\u4EF6\u4EFB\u4EF0\u4EF3\u4EFD\u4F01\u4F0B\u5149\u5147\u5146\u5148\u5168\u5171\u518D\u51B0\u5217\u5211\u5212\u520E\u5216\u52A3\u5308\u5321\u5320\u5370\u5371\u5409\u540F\u540C\u540A\u5410\u5401\u540B\u5404\u5411\u540D\u5408\u5403\u540E\u5406\u5412\u56E0\u56DE\u56DD\u5733\u5730\u5728\u572D\u572C\u572F\u5729\u5919\u591A\u5937\u5938\u5984\u5978\u5983\u597D\u5979\u5982\u5981\u5B57\u5B58\u5B87\u5B88\u5B85\u5B89\u5BFA\u5C16\u5C79\u5DDE\u5E06\u5E76\u5E74\u5F0F\u5F1B\u5FD9\u5FD6\u620E\u620C\u620D\u6210\u6263\u625B\u6258\u6536\u65E9\u65E8\u65EC\u65ED\u66F2\u66F3\u6709\u673D\u6734\u6731\u6735\u6B21\u6B64\u6B7B\u6C16\u6C5D\u6C57\u6C59\u6C5F\u6C60\u6C50\u6C55\u6C61\u6C5B\u6C4D\u6C4E\u7070\u725F\u725D\u767E\u7AF9\u7C73\u7CF8\u7F36\u7F8A\u7FBD\u8001\u8003\u800C\u8012\u8033\u807F\u8089\u808B\u808C\u81E3\u81EA\u81F3\u81FC\u820C\u821B\u821F\u826E\u8272\u827E\u866B\u8840\u884C\u8863\u897F\u9621\u4E32\u4EA8\u4F4D\u4F4F\u4F47\u4F57\u4F5E\u4F34\u4F5B\u4F55\u4F30\u4F50\u4F51\u4F3D\u4F3A\u4F38\u4F43\u4F54\u4F3C\u4F46\u4F63\u4F5C\u4F60\u4F2F\u4F4E\u4F36\u4F59\u4F5D\u4F48\u4F5A\u514C\u514B\u514D\u5175\u51B6\u51B7\u5225\u5224\u5229\u522A\u5228\u52AB\u52A9\u52AA\u52AC\u5323\u5373\u5375\u541D\u542D\u541E\u543E\u5426\u544E\u5427\u5446\u5443\u5433\u5448\u5442\u541B\u5429\u544A\u5439\u543B\u5438\u542E\u5435\u5436\u5420\u543C\u5440\u5431\u542B\u541F\u542C\u56EA\u56F0\u56E4\u56EB\u574A\u5751\u5740\u574D\u5747\u574E\u573E\u5750\u574F\u573B\u58EF\u593E\u599D\u5992\u59A8\u599E\u59A3\u5999\u5996\u598D\u59A4\u5993\u598A\u59A5\u5B5D\u5B5C\u5B5A\u5B5B\u5B8C\u5B8B\u5B8F\u5C2C\u5C40\u5C41\u5C3F\u5C3E\u5C90\u5C91\u5C94\u5C8C\u5DEB\u5E0C\u5E8F\u5E87\u5E8A\u5EF7\u5F04\u5F1F\u5F64\u5F62\u5F77\u5F79\u5FD8\u5FCC\u5FD7\u5FCD\u5FF1\u5FEB\u5FF8\u5FEA\u6212\u6211\u6284\u6297\u6296\u6280\u6276\u6289\u626D\u628A\u627C\u627E\u6279\u6273\u6292\u626F\u6298\u626E\u6295\u6293\u6291\u6286\u6539\u653B\u6538\u65F1\u66F4\u675F\u674E\u674F\u6750\u6751\u675C\u6756\u675E\u6749\u6746\u6760\u6753\u6757\u6B65\u6BCF\u6C42\u6C5E\u6C99\u6C81\u6C88\u6C89\u6C85\u6C9B\u6C6A\u6C7A\u6C90\u6C70\u6C8C\u6C68\u6C96\u6C92\u6C7D\u6C83\u6C72\u6C7E\u6C74\u6C86\u6C76\u6C8D\u6C94\u6C98\u6C82\u7076\u707C\u707D\u7078\u7262\u7261\u7260\u72C4\u72C2\u7396\u752C\u752B\u7537\u7538\u7682\u76EF\u77E3\u79C1\u79C0\u79BF\u7A76\u7CFB\u7F55\u8096\u8093\u809D\u8098\u809B\u809A\u80B2\u826F\u8292\u828B\u828D\u898B\u89D2\u8A00\u8C37\u8C46\u8C55\u8C9D\u8D64\u8D70\u8DB3\u8EAB\u8ECA\u8F9B\u8FB0\u8FC2\u8FC6\u8FC5\u8FC4\u5DE1\u9091\u90A2\u90AA\u90A6\u90A3\u9149\u91C6\u91CC\u9632\u962E\u9631\u962A\u962C\u4E26\u4E56\u4E73\u4E8B\u4E9B\u4E9E\u4EAB\u4EAC\u4F6F\u4F9D\u4F8D\u4F73\u4F7F\u4F6C\u4F9B\u4F8B\u4F86\u4F83\u4F70\u4F75\u4F88\u4F69\u4F7B\u4F96\u4F7E\u4F8F\u4F91\u4F7A\u5154\u5152\u5155\u5169\u5177\u5176\u5178\u51BD\u51FD\u523B\u5238\u5237\u523A\u5230\u522E\u5236\u5241\u52BE\u52BB\u5352\u5354\u5353\u5351\u5366\u5377\u5378\u5379\u53D6\u53D4\u53D7\u5473\u5475\u5496\u5478\u5495\u5480\u547B\u5477\u5484\u5492\u5486\u547C\u5490\u5471\u5476\u548C\u549A\u5462\u5468\u548B\u547D\u548E\u56FA\u5783\u5777\u576A\u5769\u5761\u5766\u5764\u577C\u591C\u5949\u5947\u5948\u5944\u5954\u59BE\u59BB\u59D4\u59B9\u59AE\u59D1\u59C6\u59D0\u59CD\u59CB\u59D3\u59CA\u59AF\u59B3\u59D2\u59C5\u5B5F\u5B64\u5B63\u5B97\u5B9A\u5B98\u5B9C\u5B99\u5B9B\u5C1A\u5C48\u5C45\u5C46\u5CB7\u5CA1\u5CB8\u5CA9\u5CAB\u5CB1\u5CB3\u5E18\u5E1A\u5E16\u5E15\u5E1B\u5E11\u5E78\u5E9A\u5E97\u5E9C\u5E95\u5E96\u5EF6\u5F26\u5F27\u5F29\u5F80\u5F81\u5F7F\u5F7C\u5FDD\u5FE0\u5FFD\u5FF5\u5FFF\u600F\u6014\u602F\u6035\u6016\u602A\u6015\u6021\u6027\u6029\u602B\u601B\u6216\u6215\u623F\u623E\u6240\u627F\u62C9\u62CC\u62C4\u62BF\u62C2\u62B9\u62D2\u62DB\u62AB\u62D3\u62D4\u62CB\u62C8\u62A8\u62BD\u62BC\u62D0\u62D9\u62C7\u62CD\u62B5\u62DA\u62B1\u62D8\u62D6\u62D7\u62C6\u62AC\u62CE\u653E\u65A7\u65BC\u65FA\u6614\u6613\u660C\u6606\u6602\u660E\u6600\u660F\u6615\u660A\u6607\u670D\u670B\u676D\u678B\u6795\u6771\u679C\u6773\u6777\u6787\u679D\u6797\u676F\u6770\u677F\u6789\u677E\u6790\u6775\u679A\u6793\u677C\u676A\u6772\u6B23\u6B66\u6B67\u6B7F\u6C13\u6C1B\u6CE3\u6CE8\u6CF3\u6CB1\u6CCC\u6CE5\u6CB3\u6CBD\u6CBE\u6CBC\u6CE2\u6CAB\u6CD5\u6CD3\u6CB8\u6CC4\u6CB9\u6CC1\u6CAE\u6CD7\u6CC5\u6CF1\u6CBF\u6CBB\u6CE1\u6CDB\u6CCA\u6CAC\u6CEF\u6CDC\u6CD6\u6CE0\u7095\u708E\u7092\u708A\u7099\u722C\u722D\u7238\u7248\u7267\u7269\u72C0\u72CE\u72D9\u72D7\u72D0\u73A9\u73A8\u739F\u73AB\u73A5\u753D\u759D\u7599\u759A\u7684\u76C2\u76F2\u76F4\u77E5\u77FD\u793E\u7940\u7941\u79C9\u79C8\u7A7A\u7A79\u7AFA\u7CFE\u7F54\u7F8C\u7F8B\u8005\u80BA\u80A5\u80A2\u80B1\u80A1\u80AB\u80A9\u80B4\u80AA\u80AF\u81E5\u81FE\u820D\u82B3\u829D\u8299\u82AD\u82BD\u829F\u82B9\u82B1\u82AC\u82A5\u82AF\u82B8\u82A3\u82B0\u82BE\u82B7\u864E\u8671\u521D\u8868\u8ECB\u8FCE\u8FD4\u8FD1\u90B5\u90B8\u90B1\u90B6\u91C7\u91D1\u9577\u9580\u961C\u9640\u963F\u963B\u9644\u9642\u96B9\u96E8\u9752\u975E\u4E9F\u4EAD\u4EAE\u4FE1\u4FB5\u4FAF\u4FBF\u4FE0\u4FD1\u4FCF\u4FDD\u4FC3\u4FB6\u4FD8\u4FDF\u4FCA\u4FD7\u4FAE\u4FD0\u4FC4\u4FC2\u4FDA\u4FCE\u4FDE\u4FB7\u5157\u5192\u5191\u51A0\u524E\u5243\u524A\u524D\u524C\u524B\u5247\u52C7\u52C9\u52C3\u52C1\u530D\u5357\u537B\u539A\u53DB\u54AC\u54C0\u54A8\u54CE\u54C9\u54B8\u54A6\u54B3\u54C7\u54C2\u54BD\u54AA\u54C1\u54C4\u54C8\u54AF\u54AB\u54B1\u54BB\u54A9\u54A7\u54BF\u56FF\u5782\u578B\u57A0\u57A3\u57A2\u57CE\u57AE\u5793\u5955\u5951\u594F\u594E\u5950\u59DC\u59D8\u59FF\u59E3\u59E8\u5A03\u59E5\u59EA\u59DA\u59E6\u5A01\u59FB\u5B69\u5BA3\u5BA6\u5BA4\u5BA2\u5BA5\u5C01\u5C4E\u5C4F\u5C4D\u5C4B\u5CD9\u5CD2\u5DF7\u5E1D\u5E25\u5E1F\u5E7D\u5EA0\u5EA6\u5EFA\u5F08\u5F2D\u5F65\u5F88\u5F85\u5F8A\u5F8B\u5F87\u5F8C\u5F89\u6012\u601D\u6020\u6025\u600E\u6028\u604D\u6070\u6068\u6062\u6046\u6043\u606C\u606B\u606A\u6064\u6241\u62DC\u6316\u6309\u62FC\u62ED\u6301\u62EE\u62FD\u6307\u62F1\u62F7\u62EF\u62EC\u62FE\u62F4\u6311\u6302\u653F\u6545\u65AB\u65BD\u65E2\u6625\u662D\u6620\u6627\u662F\u661F\u6628\u6631\u6624\u66F7\u67FF\u67D3\u67F1\u67D4\u67D0\u67EC\u67B6\u67AF\u67F5\u67E9\u67EF\u67C4\u67D1\u67B4\u67DA\u67E5\u67B8\u67CF\u67DE\u67F3\u67B0\u67D9\u67E2\u67DD\u67D2\u6B6A\u6B83\u6B86\u6BB5\u6BD2\u6BD7\u6C1F\u6CC9\u6D0B\u6D32\u6D2A\u6D41\u6D25\u6D0C\u6D31\u6D1E\u6D17\u6D3B\u6D3D\u6D3E\u6D36\u6D1B\u6CF5\u6D39\u6D27\u6D38\u6D29\u6D2E\u6D35\u6D0E\u6D2B\u70AB\u70BA\u70B3\u70AC\u70AF\u70AD\u70B8\u70AE\u70A4\u7230\u7272\u726F\u7274\u72E9\u72E0\u72E1\u73B7\u73CA\u73BB\u73B2\u73CD\u73C0\u73B3\u751A\u752D\u754F\u754C\u754E\u754B\u75AB\u75A4\u75A5\u75A2\u75A3\u7678\u7686\u7687\u7688\u76C8\u76C6\u76C3\u76C5\u7701\u76F9\u76F8\u7709\u770B\u76FE\u76FC\u7707\u77DC\u7802\u7814\u780C\u780D\u7946\u7949\u7948\u7947\u79B9\u79BA\u79D1\u79D2\u79CB\u7A7F\u7A81\u7AFF\u7AFD\u7C7D\u7D02\u7D05\u7D00\u7D09\u7D07\u7D04\u7D06\u7F38\u7F8E\u7FBF\u8004\u8010\u800D\u8011\u8036\u80D6\u80E5\u80DA\u80C3\u80C4\u80CC\u80E1\u80DB\u80CE\u80DE\u80E4\u80DD\u81F4\u8222\u82E7\u8303\u8305\u82E3\u82DB\u82E6\u8304\u82E5\u8302\u8309\u82D2\u82D7\u82F1\u8301\u82DC\u82D4\u82D1\u82DE\u82D3\u82DF\u82EF\u8306\u8650\u8679\u867B\u867A\u884D\u886B\u8981\u89D4\u8A08\u8A02\u8A03\u8C9E\u8CA0\u8D74\u8D73\u8DB4\u8ECD\u8ECC\u8FF0\u8FE6\u8FE2\u8FEA\u8FE5\u8FED\u8FEB\u8FE4\u8FE8\u90CA\u90CE\u90C1\u90C3\u914B\u914A\u91CD\u9582\u9650\u964B\u964C\u964D\u9762\u9769\u97CB\u97ED\u97F3\u9801\u98A8\u98DB\u98DF\u9996\u9999\u4E58\u4EB3\u500C\u500D\u5023\u4FEF\u5026\u5025\u4FF8\u5029\u5016\u5006\u503C\u501F\u501A\u5012\u5011\u4FFA\u5000\u5014\u5028\u4FF1\u5021\u500B\u5019\u5018\u4FF3\u4FEE\u502D\u502A\u4FFE\u502B\u5009\u517C\u51A4\u51A5\u51A2\u51CD\u51CC\u51C6\u51CB\u5256\u525C\u5254\u525B\u525D\u532A\u537F\u539F\u539D\u53DF\u54E8\u5510\u5501\u5537\u54FC\u54E5\u54F2\u5506\u54FA\u5514\u54E9\u54ED\u54E1\u5509\u54EE\u54EA\u54E6\u5527\u5507\u54FD\u550F\u5703\u5704\u57C2\u57D4\u57CB\u57C3\u5809\u590F\u5957\u5958\u595A\u5A11\u5A18\u5A1C\u5A1F\u5A1B\u5A13\u59EC\u5A20\u5A23\u5A29\u5A25\u5A0C\u5A09\u5B6B\u5C58\u5BB0\u5BB3\u5BB6\u5BB4\u5BAE\u5BB5\u5BB9\u5BB8\u5C04\u5C51\u5C55\u5C50\u5CED\u5CFD\u5CFB\u5CEA\u5CE8\u5CF0\u5CF6\u5D01\u5CF4\u5DEE\u5E2D\u5E2B\u5EAB\u5EAD\u5EA7\u5F31\u5F92\u5F91\u5F90\u6059\u6063\u6065\u6050\u6055\u606D\u6069\u606F\u6084\u609F\u609A\u608D\u6094\u608C\u6085\u6096\u6247\u62F3\u6308\u62FF\u634E\u633E\u632F\u6355\u6342\u6346\u634F\u6349\u633A\u6350\u633D\u632A\u632B\u6328\u634D\u634C\u6548\u6549\u6599\u65C1\u65C5\u6642\u6649\u664F\u6643\u6652\u664C\u6645\u6641\u66F8\u6714\u6715\u6717\u6821\u6838\u6848\u6846\u6853\u6839\u6842\u6854\u6829\u68B3\u6817\u684C\u6851\u683D\u67F4\u6850\u6840\u683C\u6843\u682A\u6845\u6813\u6818\u6841\u6B8A\u6B89\u6BB7\u6C23\u6C27\u6C28\u6C26\u6C24\u6CF0\u6D6A\u6D95\u6D88\u6D87\u6D66\u6D78\u6D77\u6D59\u6D93\u6D6C\u6D89\u6D6E\u6D5A\u6D74\u6D69\u6D8C\u6D8A\u6D79\u6D85\u6D65\u6D94\u70CA\u70D8\u70E4\u70D9\u70C8\u70CF\u7239\u7279\u72FC\u72F9\u72FD\u72F8\u72F7\u7386\u73ED\u7409\u73EE\u73E0\u73EA\u73DE\u7554\u755D\u755C\u755A\u7559\u75BE\u75C5\u75C7\u75B2\u75B3\u75BD\u75BC\u75B9\u75C2\u75B8\u768B\u76B0\u76CA\u76CD\u76CE\u7729\u771F\u7720\u7728\u77E9\u7830\u7827\u7838\u781D\u7834\u7837\u7825\u782D\u7820\u781F\u7832\u7955\u7950\u7960\u795F\u7956\u795E\u795D\u7957\u795A\u79E4\u79E3\u79E7\u79DF\u79E6\u79E9\u79D8\u7A84\u7A88\u7AD9\u7B06\u7B11\u7C89\u7D21\u7D17\u7D0B\u7D0A\u7D20\u7D22\u7D14\u7D10\u7D15\u7D1A\u7D1C\u7D0D\u7D19\u7D1B\u7F3A\u7F5F\u7F94\u7FC5\u7FC1\u8006\u8018\u8015\u8019\u8017\u803D\u803F\u80F1\u8102\u80F0\u8105\u80ED\u80F4\u8106\u80F8\u80F3\u8108\u80FD\u810A\u80FC\u80EF\u81ED\u81EC\u8200\u8210\u822A\u822B\u8228\u822C\u82BB\u832B\u8352\u8354\u834A\u8338\u8350\u8349\u8335\u8334\u834F\u8332\u8339\u8336\u8317\u8340\u8331\u8328\u8343\u8654\u868A\u86AA\u8693\u86A4\u86A9\u868C\u86A3\u869C\u8870\u8877\u8881\u8882\u887D\u8879\u8A18\u8A10\u8A0E\u8A0C\u8A15\u8A0A\u8A17\u8A13\u8A16\u8A0F\u8A11\u8C48\u8C7A\u8C79\u8CA1\u8CA2\u8D77\u8EAC\u8ED2\u8ED4\u8ECF\u8FB1\u9001\u9006\u8FF7\u9000\u8FFA\u8FF4\u9003\u8FFD\u9005\u8FF8\u9095\u90E1\u90DD\u90E2\u9152\u914D\u914C\u91D8\u91DD\u91D7\u91DC\u91D9\u9583\u9662\u9663\u9661\u965B\u965D\u9664\u9658\u965E\u96BB\u98E2\u99AC\u9AA8\u9AD8\u9B25\u9B32\u9B3C\u4E7E\u507A\u507D\u505C\u5047\u5043\u504C\u505A\u5049\u5065\u5076\u504E\u5055\u5075\u5074\u5077\u504F\u500F\u506F\u506D\u515C\u5195\u51F0\u526A\u526F\u52D2\u52D9\u52D8\u52D5\u5310\u530F\u5319\u533F\u5340\u533E\u53C3\u66FC\u5546\u556A\u5566\u5544\u555E\u5561\u5543\u554A\u5531\u5556\u554F\u5555\u552F\u5564\u5538\u552E\u555C\u552C\u5563\u5533\u5541\u5557\u5708\u570B\u5709\u57DF\u5805\u580A\u5806\u57E0\u57E4\u57FA\u5802\u5835\u57F7\u57F9\u5920\u5962\u5A36\u5A41\u5A49\u5A66\u5A6A\u5A40\u5A3C\u5A62\u5A5A\u5A46\u5A4A\u5B70\u5BC7\u5BC5\u5BC4\u5BC2\u5BBF\u5BC6\u5C09\u5C08\u5C07\u5C60\u5C5C\u5C5D\u5D07\u5D06\u5D0E\u5D1B\u5D16\u5D22\u5D11\u5D29\u5D14\u5D19\u5D24\u5D27\u5D17\u5DE2\u5E38\u5E36\u5E33\u5E37\u5EB7\u5EB8\u5EB6\u5EB5\u5EBE\u5F35\u5F37\u5F57\u5F6C\u5F69\u5F6B\u5F97\u5F99\u5F9E\u5F98\u5FA1\u5FA0\u5F9C\u607F\u60A3\u6089\u60A0\u60A8\u60CB\u60B4\u60E6\u60BD\u60C5\u60BB\u60B5\u60DC\u60BC\u60D8\u60D5\u60C6\u60DF\u60B8\u60DA\u60C7\u621A\u621B\u6248\u63A0\u63A7\u6372\u6396\u63A2\u63A5\u6377\u6367\u6398\u63AA\u6371\u63A9\u6389\u6383\u639B\u636B\u63A8\u6384\u6388\u6399\u63A1\u63AC\u6392\u638F\u6380\u637B\u6369\u6368\u637A\u655D\u6556\u6551\u6559\u6557\u555F\u654F\u6558\u6555\u6554\u659C\u659B\u65AC\u65CF\u65CB\u65CC\u65CE\u665D\u665A\u6664\u6668\u6666\u665E\u66F9\u52D7\u671B\u6881\u68AF\u68A2\u6893\u68B5\u687F\u6876\u68B1\u68A7\u6897\u68B0\u6883\u68C4\u68AD\u6886\u6885\u6894\u689D\u68A8\u689F\u68A1\u6882\u6B32\u6BBA\u6BEB\u6BEC\u6C2B\u6D8E\u6DBC\u6DF3\u6DD9\u6DB2\u6DE1\u6DCC\u6DE4\u6DFB\u6DFA\u6E05\u6DC7\u6DCB\u6DAF\u6DD1\u6DAE\u6DDE\u6DF9\u6DB8\u6DF7\u6DF5\u6DC5\u6DD2\u6E1A\u6DB5\u6DDA\u6DEB\u6DD8\u6DEA\u6DF1\u6DEE\u6DE8\u6DC6\u6DC4\u6DAA\u6DEC\u6DBF\u6DE6\u70F9\u7109\u710A\u70FD\u70EF\u723D\u727D\u7281\u731C\u731B\u7316\u7313\u7319\u7387\u7405\u740A\u7403\u7406\u73FE\u740D\u74E0\u74F6\u74F7\u751C\u7522\u7565\u7566\u7562\u7570\u758F\u75D4\u75D5\u75B5\u75CA\u75CD\u768E\u76D4\u76D2\u76DB\u7737\u773E\u773C\u7736\u7738\u773A\u786B\u7843\u784E\u7965\u7968\u796D\u79FB\u7A92\u7A95\u7B20\u7B28\u7B1B\u7B2C\u7B26\u7B19\u7B1E\u7B2E\u7C92\u7C97\u7C95\u7D46\u7D43\u7D71\u7D2E\u7D39\u7D3C\u7D40\u7D30\u7D33\u7D44\u7D2F\u7D42\u7D32\u7D31\u7F3D\u7F9E\u7F9A\u7FCC\u7FCE\u7FD2\u801C\u804A\u8046\u812F\u8116\u8123\u812B\u8129\u8130\u8124\u8202\u8235\u8237\u8236\u8239\u838E\u839E\u8398\u8378\u83A2\u8396\u83BD\u83AB\u8392\u838A\u8393\u8389\u83A0\u8377\u837B\u837C\u8386\u83A7\u8655\u5F6A\u86C7\u86C0\u86B6\u86C4\u86B5\u86C6\u86CB\u86B1\u86AF\u86C9\u8853\u889E\u8888\u88AB\u8892\u8896\u888D\u888B\u8993\u898F\u8A2A\u8A1D\u8A23\u8A25\u8A31\u8A2D\u8A1F\u8A1B\u8A22\u8C49\u8C5A\u8CA9\u8CAC\u8CAB\u8CA8\u8CAA\u8CA7\u8D67\u8D66\u8DBE\u8DBA\u8EDB\u8EDF\u9019\u900D\u901A\u9017\u9023\u901F\u901D\u9010\u9015\u901E\u9020\u900F\u9022\u9016\u901B\u9014\u90E8\u90ED\u90FD\u9157\u91CE\u91F5\u91E6\u91E3\u91E7\u91ED\u91E9\u9589\u966A\u9675\u9673\u9678\u9670\u9674\u9676\u9677\u966C\u96C0\u96EA\u96E9\u7AE0\u7ADF\u9802\u9803\u9B5A\u9CE5\u9E75\u9E7F\u9EA5\u9EBB\u50A2\u508D\u5085\u5099\u5091\u5080\u5096\u5098\u509A\u6700\u51F1\u5272\u5274\u5275\u5269\u52DE\u52DD\u52DB\u535A\u53A5\u557B\u5580\u55A7\u557C\u558A\u559D\u5598\u5582\u559C\u55AA\u5594\u5587\u558B\u5583\u55B3\u55AE\u559F\u553E\u55B2\u559A\u55BB\u55AC\u55B1\u557E\u5589\u55AB\u5599\u570D\u582F\u582A\u5834\u5824\u5830\u5831\u5821\u581D\u5820\u58F9\u58FA\u5960\u5A77\u5A9A\u5A7F\u5A92\u5A9B\u5AA7\u5B73\u5B71\u5BD2\u5BCC\u5BD3\u5BD0\u5C0A\u5C0B\u5C31\u5D4C\u5D50\u5D34\u5D47\u5DFD\u5E45\u5E3D\u5E40\u5E43\u5E7E\u5ECA\u5EC1\u5EC2\u5EC4\u5F3C\u5F6D\u5FA9\u5FAA\u5FA8\u60D1\u60E1\u60B2\u60B6\u60E0\u611C\u6123\u60FA\u6115\u60F0\u60FB\u60F4\u6168\u60F1\u610E\u60F6\u6109\u6100\u6112\u621F\u6249\u63A3\u638C\u63CF\u63C0\u63E9\u63C9\u63C6\u63CD\u63D2\u63E3\u63D0\u63E1\u63D6\u63ED\u63EE\u6376\u63F4\u63EA\u63DB\u6452\u63DA\u63F9\u655E\u6566\u6562\u6563\u6591\u6590\u65AF\u666E\u6670\u6674\u6676\u666F\u6691\u667A\u667E\u6677\u66FE\u66FF\u671F\u671D\u68FA\u68D5\u68E0\u68D8\u68D7\u6905\u68DF\u68F5\u68EE\u68E7\u68F9\u68D2\u68F2\u68E3\u68CB\u68CD\u690D\u6912\u690E\u68C9\u68DA\u696E\u68FB\u6B3E\u6B3A\u6B3D\u6B98\u6B96\u6BBC\u6BEF\u6C2E\u6C2F\u6C2C\u6E2F\u6E38\u6E54\u6E21\u6E32\u6E67\u6E4A\u6E20\u6E25\u6E23\u6E1B\u6E5B\u6E58\u6E24\u6E56\u6E6E\u6E2D\u6E26\u6E6F\u6E34\u6E4D\u6E3A\u6E2C\u6E43\u6E1D\u6E3E\u6ECB\u6E89\u6E19\u6E4E\u6E63\u6E44\u6E72\u6E69\u6E5F\u7119\u711A\u7126\u7130\u7121\u7136\u716E\u711C\u724C\u7284\u7280\u7336\u7325\u7334\u7329\u743A\u742A\u7433\u7422\u7425\u7435\u7436\u7434\u742F\u741B\u7426\u7428\u7525\u7526\u756B\u756A\u75E2\u75DB\u75E3\u75D9\u75D8\u75DE\u75E0\u767B\u767C\u7696\u7693\u76B4\u76DC\u774F\u77ED\u785D\u786C\u786F\u7A0D\u7A08\u7A0B\u7A05\u7A00\u7A98\u7A97\u7A96\u7AE5\u7AE3\u7B49\u7B56\u7B46\u7B50\u7B52\u7B54\u7B4D\u7B4B\u7B4F\u7B51\u7C9F\u7CA5\u7D5E\u7D50\u7D68\u7D55\u7D2B\u7D6E\u7D72\u7D61\u7D66\u7D62\u7D70\u7D73\u5584\u7FD4\u7FD5\u800B\u8052\u8085\u8155\u8154\u814B\u8151\u814E\u8139\u8146\u813E\u814C\u8153\u8174\u8212\u821C\u83E9\u8403\u83F8\u840D\u83E0\u83C5\u840B\u83C1\u83EF\u83F1\u83F4\u8457\u840A\u83F0\u840C\u83CC\u83FD\u83F2\u83CA\u8438\u840E\u8404\u83DC\u8407\u83D4\u83DF\u865B\u86DF\u86D9\u86ED\u86D4\u86DB\u86E4\u86D0\u86DE\u8857\u88C1\u88C2\u88B1\u8983\u8996\u8A3B\u8A60\u8A55\u8A5E\u8A3C\u8A41\u8A54\u8A5B\u8A50\u8A46\u8A34\u8A3A\u8A36\u8A56\u8C61\u8C82\u8CAF\u8CBC\u8CB3\u8CBD\u8CC1\u8CBB\u8CC0\u8CB4\u8CB7\u8CB6\u8CBF\u8CB8\u8D8A\u8D85\u8D81\u8DCE\u8DDD\u8DCB\u8DDA\u8DD1\u8DCC\u8DDB\u8DC6\u8EFB\u8EF8\u8EFC\u8F9C\u902E\u9035\u9031\u9038\u9032\u9036\u9102\u90F5\u9109\u90FE\u9163\u9165\u91CF\u9214\u9215\u9223\u9209\u921E\u920D\u9210\u9207\u9211\u9594\u958F\u958B\u9591\u9593\u9592\u958E\u968A\u968E\u968B\u967D\u9685\u9686\u968D\u9672\u9684\u96C1\u96C5\u96C4\u96C6\u96C7\u96EF\u96F2\u97CC\u9805\u9806\u9808\u98E7\u98EA\u98EF\u98E9\u98F2\u98ED\u99AE\u99AD\u9EC3\u9ECD\u9ED1\u4E82\u50AD\u50B5\u50B2\u50B3\u50C5\u50BE\u50AC\u50B7\u50BB\u50AF\u50C7\u527F\u5277\u527D\u52DF\u52E6\u52E4\u52E2\u52E3\u532F\u55DF\u55E8\u55D3\u55E6\u55CE\u55DC\u55C7\u55D1\u55E3\u55E4\u55EF\u55DA\u55E1\u55C5\u55C6\u55E5\u55C9\u5712\u5713\u585E\u5851\u5858\u5857\u585A\u5854\u586B\u584C\u586D\u584A\u5862\u5852\u584B\u5967\u5AC1\u5AC9\u5ACC\u5ABE\u5ABD\u5ABC\u5AB3\u5AC2\u5AB2\u5D69\u5D6F\u5E4C\u5E79\u5EC9\u5EC8\u5F12\u5F59\u5FAC\u5FAE\u611A\u610F\u6148\u611F\u60F3\u611B\u60F9\u6101\u6108\u614E\u614C\u6144\u614D\u613E\u6134\u6127\u610D\u6106\u6137\u6221\u6222\u6413\u643E\u641E\u642A\u642D\u643D\u642C\u640F\u641C\u6414\u640D\u6436\u6416\u6417\u6406\u656C\u659F\u65B0\u6697\u6689\u6687\u6688\u6696\u6684\u6698\u668D\u6703\u6994\u696D\u695A\u6977\u6960\u6954\u6975\u6930\u6982\u694A\u6968\u696B\u695E\u6953\u6979\u6986\u695D\u6963\u695B\u6B47\u6B72\u6BC0\u6BBF\u6BD3\u6BFD\u6EA2\u6EAF\u6ED3\u6EB6\u6EC2\u6E90\u6E9D\u6EC7\u6EC5\u6EA5\u6E98\u6EBC\u6EBA\u6EAB\u6ED1\u6E96\u6E9C\u6EC4\u6ED4\u6EAA\u6EA7\u6EB4\u714E\u7159\u7169\u7164\u7149\u7167\u715C\u716C\u7166\u714C\u7165\u715E\u7146\u7168\u7156\u723A\u7252\u7337\u7345\u733F\u733E\u746F\u745A\u7455\u745F\u745E\u7441\u743F\u7459\u745B\u745C\u7576\u7578\u7600\u75F0\u7601\u75F2\u75F1\u75FA\u75FF\u75F4\u75F3\u76DE\u76DF\u775B\u776B\u7766\u775E\u7763\u7779\u776A\u776C\u775C\u7765\u7768\u7762\u77EE\u788E\u78B0\u7897\u7898\u788C\u7889\u787C\u7891\u7893\u787F\u797A\u797F\u7981\u842C\u79BD\u7A1C\u7A1A\u7A20\u7A14\u7A1F\u7A1E\u7A9F\u7AA0\u7B77\u7BC0\u7B60\u7B6E\u7B67\u7CB1\u7CB3\u7CB5\u7D93\u7D79\u7D91\u7D81\u7D8F\u7D5B\u7F6E\u7F69\u7F6A\u7F72\u7FA9\u7FA8\u7FA4\u8056\u8058\u8086\u8084\u8171\u8170\u8178\u8165\u816E\u8173\u816B\u8179\u817A\u8166\u8205\u8247\u8482\u8477\u843D\u8431\u8475\u8466\u846B\u8449\u846C\u845B\u843C\u8435\u8461\u8463\u8469\u846D\u8446\u865E\u865C\u865F\u86F9\u8713\u8708\u8707\u8700\u86FE\u86FB\u8702\u8703\u8706\u870A\u8859\u88DF\u88D4\u88D9\u88DC\u88D8\u88DD\u88E1\u88CA\u88D5\u88D2\u899C\u89E3\u8A6B\u8A72\u8A73\u8A66\u8A69\u8A70\u8A87\u8A7C\u8A63\u8AA0\u8A71\u8A85\u8A6D\u8A62\u8A6E\u8A6C\u8A79\u8A7B\u8A3E\u8A68\u8C62\u8C8A\u8C89\u8CCA\u8CC7\u8CC8\u8CC4\u8CB2\u8CC3\u8CC2\u8CC5\u8DE1\u8DDF\u8DE8\u8DEF\u8DF3\u8DFA\u8DEA\u8DE4\u8DE6\u8EB2\u8F03\u8F09\u8EFE\u8F0A\u8F9F\u8FB2\u904B\u904A\u9053\u9042\u9054\u903C\u9055\u9050\u9047\u904F\u904E\u904D\u9051\u903E\u9041\u9112\u9117\u916C\u916A\u9169\u91C9\u9237\u9257\u9238\u923D\u9240\u923E\u925B\u924B\u9264\u9251\u9234\u9249\u924D\u9245\u9239\u923F\u925A\u9598\u9698\u9694\u9695\u96CD\u96CB\u96C9\u96CA\u96F7\u96FB\u96F9\u96F6\u9756\u9774\u9776\u9810\u9811\u9813\u980A\u9812\u980C\u98FC\u98F4\u98FD\u98FE\u99B3\u99B1\u99B4\u9AE1\u9CE9\u9E82\u9F0E\u9F13\u9F20\u50E7\u50EE\u50E5\u50D6\u50ED\u50DA\u50D5\u50CF\u50D1\u50F1\u50CE\u50E9\u5162\u51F3\u5283\u5282\u5331\u53AD\u55FE\u5600\u561B\u5617\u55FD\u5614\u5606\u5609\u560D\u560E\u55F7\u5616\u561F\u5608\u5610\u55F6\u5718\u5716\u5875\u587E\u5883\u5893\u588A\u5879\u5885\u587D\u58FD\u5925\u5922\u5924\u596A\u5969\u5AE1\u5AE6\u5AE9\u5AD7\u5AD6\u5AD8\u5AE3\u5B75\u5BDE\u5BE7\u5BE1\u5BE5\u5BE6\u5BE8\u5BE2\u5BE4\u5BDF\u5C0D\u5C62\u5D84\u5D87\u5E5B\u5E63\u5E55\u5E57\u5E54\u5ED3\u5ED6\u5F0A\u5F46\u5F70\u5FB9\u6147\u613F\u614B\u6177\u6162\u6163\u615F\u615A\u6158\u6175\u622A\u6487\u6458\u6454\u64A4\u6478\u645F\u647A\u6451\u6467\u6434\u646D\u647B\u6572\u65A1\u65D7\u65D6\u66A2\u66A8\u669D\u699C\u69A8\u6995\u69C1\u69AE\u69D3\u69CB\u699B\u69B7\u69BB\u69AB\u69B4\u69D0\u69CD\u69AD\u69CC\u69A6\u69C3\u69A3\u6B49\u6B4C\u6C33\u6F33\u6F14\u6EFE\u6F13\u6EF4\u6F29\u6F3E\u6F20\u6F2C\u6F0F\u6F02\u6F22\u6EFF\u6EEF\u6F06\u6F31\u6F38\u6F32\u6F23\u6F15\u6F2B\u6F2F\u6F88\u6F2A\u6EEC\u6F01\u6EF2\u6ECC\u6EF7\u7194\u7199\u717D\u718A\u7184\u7192\u723E\u7292\u7296\u7344\u7350\u7464\u7463\u746A\u7470\u746D\u7504\u7591\u7627\u760D\u760B\u7609\u7613\u76E1\u76E3\u7784\u777D\u777F\u7761\u78C1\u789F\u78A7\u78B3\u78A9\u78A3\u798E\u798F\u798D\u7A2E\u7A31\u7AAA\u7AA9\u7AED\u7AEF\u7BA1\u7B95\u7B8B\u7B75\u7B97\u7B9D\u7B94\u7B8F\u7BB8\u7B87\u7B84\u7CB9\u7CBD\u7CBE\u7DBB\u7DB0\u7D9C\u7DBD\u7DBE\u7DA0\u7DCA\u7DB4\u7DB2\u7DB1\u7DBA\u7DA2\u7DBF\u7DB5\u7DB8\u7DAD\u7DD2\u7DC7\u7DAC\u7F70\u7FE0\u7FE1\u7FDF\u805E\u805A\u8087\u8150\u8180\u818F\u8188\u818A\u817F\u8182\u81E7\u81FA\u8207\u8214\u821E\u824B\u84C9\u84BF\u84C6\u84C4\u8499\u849E\u84B2\u849C\u84CB\u84B8\u84C0\u84D3\u8490\u84BC\u84D1\u84CA\u873F\u871C\u873B\u8722\u8725\u8734\u8718\u8755\u8737\u8729\u88F3\u8902\u88F4\u88F9\u88F8\u88FD\u88E8\u891A\u88EF\u8AA6\u8A8C\u8A9E\u8AA3\u8A8D\u8AA1\u8A93\u8AA4\u8AAA\u8AA5\u8AA8\u8A98\u8A91\u8A9A\u8AA7\u8C6A\u8C8D\u8C8C\u8CD3\u8CD1\u8CD2\u8D6B\u8D99\u8D95\u8DFC\u8F14\u8F12\u8F15\u8F13\u8FA3\u9060\u9058\u905C\u9063\u9059\u905E\u9062\u905D\u905B\u9119\u9118\u911E\u9175\u9178\u9177\u9174\u9278\u9280\u9285\u9298\u9296\u927B\u9293\u929C\u92A8\u927C\u9291\u95A1\u95A8\u95A9\u95A3\u95A5\u95A4\u9699\u969C\u969B\u96CC\u96D2\u9700\u977C\u9785\u97F6\u9817\u9818\u98AF\u98B1\u9903\u9905\u990C\u9909\u99C1\u9AAF\u9AB0\u9AE6\u9B41\u9B42\u9CF4\u9CF6\u9CF3\u9EBC\u9F3B\u9F4A\u5104\u5100\u50FB\u50F5\u50F9\u5102\u5108\u5109\u5105\u51DC\u5287\u5288\u5289\u528D\u528A\u52F0\u53B2\u562E\u563B\u5639\u5632\u563F\u5634\u5629\u5653\u564E\u5657\u5674\u5636\u562F\u5630\u5880\u589F\u589E\u58B3\u589C\u58AE\u58A9\u58A6\u596D\u5B09\u5AFB\u5B0B\u5AF5\u5B0C\u5B08\u5BEE\u5BEC\u5BE9\u5BEB\u5C64\u5C65\u5D9D\u5D94\u5E62\u5E5F\u5E61\u5EE2\u5EDA\u5EDF\u5EDD\u5EE3\u5EE0\u5F48\u5F71\u5FB7\u5FB5\u6176\u6167\u616E\u615D\u6155\u6182\u617C\u6170\u616B\u617E\u61A7\u6190\u61AB\u618E\u61AC\u619A\u61A4\u6194\u61AE\u622E\u6469\u646F\u6479\u649E\u64B2\u6488\u6490\u64B0\u64A5\u6493\u6495\u64A9\u6492\u64AE\u64AD\u64AB\u649A\u64AC\u6499\u64A2\u64B3\u6575\u6577\u6578\u66AE\u66AB\u66B4\u66B1\u6A23\u6A1F\u69E8\u6A01\u6A1E\u6A19\u69FD\u6A21\u6A13\u6A0A\u69F3\u6A02\u6A05\u69ED\u6A11\u6B50\u6B4E\u6BA4\u6BC5\u6BC6\u6F3F\u6F7C\u6F84\u6F51\u6F66\u6F54\u6F86\u6F6D\u6F5B\u6F78\u6F6E\u6F8E\u6F7A\u6F70\u6F64\u6F97\u6F58\u6ED5\u6F6F\u6F60\u6F5F\u719F\u71AC\u71B1\u71A8\u7256\u729B\u734E\u7357\u7469\u748B\u7483\u747E\u7480\u757F\u7620\u7629\u761F\u7624\u7626\u7621\u7622\u769A\u76BA\u76E4\u778E\u7787\u778C\u7791\u778B\u78CB\u78C5\u78BA\u78CA\u78BE\u78D5\u78BC\u78D0\u7A3F\u7A3C\u7A40\u7A3D\u7A37\u7A3B\u7AAF\u7AAE\u7BAD\u7BB1\u7BC4\u7BB4\u7BC6\u7BC7\u7BC1\u7BA0\u7BCC\u7CCA\u7DE0\u7DF4\u7DEF\u7DFB\u7DD8\u7DEC\u7DDD\u7DE8\u7DE3\u7DDA\u7DDE\u7DE9\u7D9E\u7DD9\u7DF2\u7DF9\u7F75\u7F77\u7FAF\u7FE9\u8026\u819B\u819C\u819D\u81A0\u819A\u8198\u8517\u853D\u851A\u84EE\u852C\u852D\u8513\u8511\u8523\u8521\u8514\u84EC\u8525\u84FF\u8506\u8782\u8774\u8776\u8760\u8766\u8778\u8768\u8759\u8757\u874C\u8753\u885B\u885D\u8910\u8907\u8912\u8913\u8915\u890A\u8ABC\u8AD2\u8AC7\u8AC4\u8A95\u8ACB\u8AF8\u8AB2\u8AC9\u8AC2\u8ABF\u8AB0\u8AD6\u8ACD\u8AB6\u8AB9\u8ADB\u8C4C\u8C4E\u8C6C\u8CE0\u8CDE\u8CE6\u8CE4\u8CEC\u8CED\u8CE2\u8CE3\u8CDC\u8CEA\u8CE1\u8D6D\u8D9F\u8DA3\u8E2B\u8E10\u8E1D\u8E22\u8E0F\u8E29\u8E1F\u8E21\u8E1E\u8EBA\u8F1D\u8F1B\u8F1F\u8F29\u8F26\u8F2A\u8F1C\u8F1E\u8F25\u9069\u906E\u9068\u906D\u9077\u9130\u912D\u9127\u9131\u9187\u9189\u918B\u9183\u92C5\u92BB\u92B7\u92EA\u92AC\u92E4\u92C1\u92B3\u92BC\u92D2\u92C7\u92F0\u92B2\u95AD\u95B1\u9704\u9706\u9707\u9709\u9760\u978D\u978B\u978F\u9821\u982B\u981C\u98B3\u990A\u9913\u9912\u9918\u99DD\u99D0\u99DF\u99DB\u99D1\u99D5\u99D2\u99D9\u9AB7\u9AEE\u9AEF\u9B27\u9B45\u9B44\u9B77\u9B6F\u9D06\u9D09\u9D03\u9EA9\u9EBE\u9ECE\u58A8\u9F52\u5112\u5118\u5114\u5110\u5115\u5180\u51AA\u51DD\u5291\u5293\u52F3\u5659\u566B\u5679\u5669\u5664\u5678\u566A\u5668\u5665\u5671\u566F\u566C\u5662\u5676\u58C1\u58BE\u58C7\u58C5\u596E\u5B1D\u5B34\u5B78\u5BF0\u5C0E\u5F4A\u61B2\u6191\u61A9\u618A\u61CD\u61B6\u61BE\u61CA\u61C8\u6230\u64C5\u64C1\u64CB\u64BB\u64BC\u64DA\u64C4\u64C7\u64C2\u64CD\u64BF\u64D2\u64D4\u64BE\u6574\u66C6\u66C9\u66B9\u66C4\u66C7\u66B8\u6A3D\u6A38\u6A3A\u6A59\u6A6B\u6A58\u6A39\u6A44\u6A62\u6A61\u6A4B\u6A47\u6A35\u6A5F\u6A48\u6B59\u6B77\u6C05\u6FC2\u6FB1\u6FA1\u6FC3\u6FA4\u6FC1\u6FA7\u6FB3\u6FC0\u6FB9\u6FB6\u6FA6\u6FA0\u6FB4\u71BE\u71C9\u71D0\u71D2\u71C8\u71D5\u71B9\u71CE\u71D9\u71DC\u71C3\u71C4\u7368\u749C\u74A3\u7498\u749F\u749E\u74E2\u750C\u750D\u7634\u7638\u763A\u76E7\u76E5\u77A0\u779E\u779F\u77A5\u78E8\u78DA\u78EC\u78E7\u79A6\u7A4D\u7A4E\u7A46\u7A4C\u7A4B\u7ABA\u7BD9\u7C11\u7BC9\u7BE4\u7BDB\u7BE1\u7BE9\u7BE6\u7CD5\u7CD6\u7E0A\u7E11\u7E08\u7E1B\u7E23\u7E1E\u7E1D\u7E09\u7E10\u7F79\u7FB2\u7FF0\u7FF1\u7FEE\u8028\u81B3\u81A9\u81A8\u81FB\u8208\u8258\u8259\u854A\u8559\u8548\u8568\u8569\u8543\u8549\u856D\u856A\u855E\u8783\u879F\u879E\u87A2\u878D\u8861\u892A\u8932\u8925\u892B\u8921\u89AA\u89A6\u8AE6\u8AFA\u8AEB\u8AF1\u8B00\u8ADC\u8AE7\u8AEE\u8AFE\u8B01\u8B02\u8AF7\u8AED\u8AF3\u8AF6\u8AFC\u8C6B\u8C6D\u8C93\u8CF4\u8E44\u8E31\u8E34\u8E42\u8E39\u8E35\u8F3B\u8F2F\u8F38\u8F33\u8FA8\u8FA6\u9075\u9074\u9078\u9072\u907C\u907A\u9134\u9192\u9320\u9336\u92F8\u9333\u932F\u9322\u92FC\u932B\u9304\u931A\u9310\u9326\u9321\u9315\u932E\u9319\u95BB\u96A7\u96A8\u96AA\u96D5\u970E\u9711\u9716\u970D\u9713\u970F\u975B\u975C\u9766\u9798\u9830\u9838\u983B\u9837\u982D\u9839\u9824\u9910\u9928\u991E\u991B\u9921\u991A\u99ED\u99E2\u99F1\u9AB8\u9ABC\u9AFB\u9AED\u9B28\u9B91\u9D15\u9D23\u9D26\u9D28\u9D12\u9D1B\u9ED8\u9ED4\u9F8D\u9F9C\u512A\u511F\u5121\u5132\u52F5\u568E\u5680\u5690\u5685\u5687\u568F\u58D5\u58D3\u58D1\u58CE\u5B30\u5B2A\u5B24\u5B7A\u5C37\u5C68\u5DBC\u5DBA\u5DBD\u5DB8\u5E6B\u5F4C\u5FBD\u61C9\u61C2\u61C7\u61E6\u61CB\u6232\u6234\u64CE\u64CA\u64D8\u64E0\u64F0\u64E6\u64EC\u64F1\u64E2\u64ED\u6582\u6583\u66D9\u66D6\u6A80\u6A94\u6A84\u6AA2\u6A9C\u6ADB\u6AA3\u6A7E\u6A97\u6A90\u6AA0\u6B5C\u6BAE\u6BDA\u6C08\u6FD8\u6FF1\u6FDF\u6FE0\u6FDB\u6FE4\u6FEB\u6FEF\u6F80\u6FEC\u6FE1\u6FE9\u6FD5\u6FEE\u6FF0\u71E7\u71DF\u71EE\u71E6\u71E5\u71ED\u71EC\u71F4\u71E0\u7235\u7246\u7370\u7372\u74A9\u74B0\u74A6\u74A8\u7646\u7642\u764C\u76EA\u77B3\u77AA\u77B0\u77AC\u77A7\u77AD\u77EF\u78F7\u78FA\u78F4\u78EF\u7901\u79A7\u79AA\u7A57\u7ABF\u7C07\u7C0D\u7BFE\u7BF7\u7C0C\u7BE0\u7CE0\u7CDC\u7CDE\u7CE2\u7CDF\u7CD9\u7CDD\u7E2E\u7E3E\u7E46\u7E37\u7E32\u7E43\u7E2B\u7E3D\u7E31\u7E45\u7E41\u7E34\u7E39\u7E48\u7E35\u7E3F\u7E2F\u7F44\u7FF3\u7FFC\u8071\u8072\u8070\u806F\u8073\u81C6\u81C3\u81BA\u81C2\u81C0\u81BF\u81BD\u81C9\u81BE\u81E8\u8209\u8271\u85AA\u8584\u857E\u859C\u8591\u8594\u85AF\u859B\u8587\u85A8\u858A\u8667\u87C0\u87D1\u87B3\u87D2\u87C6\u87AB\u87BB\u87BA\u87C8\u87CB\u893B\u8936\u8944\u8938\u893D\u89AC\u8B0E\u8B17\u8B19\u8B1B\u8B0A\u8B20\u8B1D\u8B04\u8B10\u8C41\u8C3F\u8C73\u8CFA\u8CFD\u8CFC\u8CF8\u8CFB\u8DA8\u8E49\u8E4B\u8E48\u8E4A\u8F44\u8F3E\u8F42\u8F45\u8F3F\u907F\u907D\u9084\u9081\u9082\u9080\u9139\u91A3\u919E\u919C\u934D\u9382\u9328\u9375\u934A\u9365\u934B\u9318\u937E\u936C\u935B\u9370\u935A\u9354\u95CA\u95CB\u95CC\u95C8\u95C6\u96B1\u96B8\u96D6\u971C\u971E\u97A0\u97D3\u9846\u98B6\u9935\u9A01\u99FF\u9BAE\u9BAB\u9BAA\u9BAD\u9D3B\u9D3F\u9E8B\u9ECF\u9EDE\u9EDC\u9EDD\u9EDB\u9F3E\u9F4B\u53E2\u5695\u56AE\u58D9\u58D8\u5B38\u5F5D\u61E3\u6233\u64F4\u64F2\u64FE\u6506\u64FA\u64FB\u64F7\u65B7\u66DC\u6726\u6AB3\u6AAC\u6AC3\u6ABB\u6AB8\u6AC2\u6AAE\u6AAF\u6B5F\u6B78\u6BAF\u7009\u700B\u6FFE\u7006\u6FFA\u7011\u700F\u71FB\u71FC\u71FE\u71F8\u7377\u7375\u74A7\u74BF\u7515\u7656\u7658\u7652\u77BD\u77BF\u77BB\u77BC\u790E\u79AE\u7A61\u7A62\u7A60\u7AC4\u7AC5\u7C2B\u7C27\u7C2A\u7C1E\u7C23\u7C21\u7CE7\u7E54\u7E55\u7E5E\u7E5A\u7E61\u7E52\u7E59\u7F48\u7FF9\u7FFB\u8077\u8076\u81CD\u81CF\u820A\u85CF\u85A9\u85CD\u85D0\u85C9\u85B0\u85BA\u85B9\u85A6\u87EF\u87EC\u87F2\u87E0\u8986\u89B2\u89F4\u8B28\u8B39\u8B2C\u8B2B\u8C50\u8D05\u8E59\u8E63\u8E66\u8E64\u8E5F\u8E55\u8EC0\u8F49\u8F4D\u9087\u9083\u9088\u91AB\u91AC\u91D0\u9394\u938A\u9396\u93A2\u93B3\u93AE\u93AC\u93B0\u9398\u939A\u9397\u95D4\u95D6\u95D0\u95D5\u96E2\u96DC\u96D9\u96DB\u96DE\u9724\u97A3\u97A6\u97AD\u97F9\u984D\u984F\u984C\u984E\u9853\u98BA\u993E\u993F\u993D\u992E\u99A5\u9A0E\u9AC1\u9B03\u9B06\u9B4F\u9B4E\u9B4D\u9BCA\u9BC9\u9BFD\u9BC8\u9BC0\u9D51\u9D5D\u9D60\u9EE0\u9F15\u9F2C\u5133\u56A5\u58DE\u58DF\u58E2\u5BF5\u9F90\u5EEC\u61F2\u61F7\u61F6\u61F5\u6500\u650F\u66E0\u66DD\u6AE5\u6ADD\u6ADA\u6AD3\u701B\u701F\u7028\u701A\u701D\u7015\u7018\u7206\u720D\u7258\u72A2\u7378\u737A\u74BD\u74CA\u74E3\u7587\u7586\u765F\u7661\u77C7\u7919\u79B1\u7A6B\u7A69\u7C3E\u7C3F\u7C38\u7C3D\u7C37\u7C40\u7E6B\u7E6D\u7E79\u7E69\u7E6A\u7F85\u7E73\u7FB6\u7FB9\u7FB8\u81D8\u85E9\u85DD\u85EA\u85D5\u85E4\u85E5\u85F7\u87FB\u8805\u880D\u87F9\u87FE\u8960\u895F\u8956\u895E\u8B41\u8B5C\u8B58\u8B49\u8B5A\u8B4E\u8B4F\u8B46\u8B59\u8D08\u8D0A\u8E7C\u8E72\u8E87\u8E76\u8E6C\u8E7A\u8E74\u8F54\u8F4E\u8FAD\u908A\u908B\u91B1\u91AE\u93E1\u93D1\u93DF\u93C3\u93C8\u93DC\u93DD\u93D6\u93E2\u93CD\u93D8\u93E4\u93D7\u93E8\u95DC\u96B4\u96E3\u972A\u9727\u9761\u97DC\u97FB\u985E\u9858\u985B\u98BC\u9945\u9949\u9A16\u9A19\u9B0D\u9BE8\u9BE7\u9BD6\u9BDB\u9D89\u9D61\u9D72\u9D6A\u9D6C\u9E92\u9E97\u9E93\u9EB4\u52F8\u56A8\u56B7\u56B6\u56B4\u56BC\u58E4\u5B40\u5B43\u5B7D\u5BF6\u5DC9\u61F8\u61FA\u6518\u6514\u6519\u66E6\u6727\u6AEC\u703E\u7030\u7032\u7210\u737B\u74CF\u7662\u7665\u7926\u792A\u792C\u792B\u7AC7\u7AF6\u7C4C\u7C43\u7C4D\u7CEF\u7CF0\u8FAE\u7E7D\u7E7C\u7E82\u7F4C\u8000\u81DA\u8266\u85FB\u85F9\u8611\u85FA\u8606\u860B\u8607\u860A\u8814\u8815\u8964\u89BA\u89F8\u8B70\u8B6C\u8B66\u8B6F\u8B5F\u8B6B\u8D0F\u8D0D\u8E89\u8E81\u8E85\u8E82\u91B4\u91CB\u9418\u9403\u93FD\u95E1\u9730\u98C4\u9952\u9951\u99A8\u9A2B\u9A30\u9A37\u9A35\u9C13\u9C0D\u9E79\u9EB5\u9EE8\u9F2F\u9F5F\u9F63\u9F61\u5137\u5138\u56C1\u56C0\u56C2\u5914\u5C6C\u5DCD\u61FC\u61FE\u651D\u651C\u6595\u66E9\u6AFB\u6B04\u6AFA\u6BB2\u704C\u721B\u72A7\u74D6\u74D4\u7669\u77D3\u7C50\u7E8F\u7E8C\u7FBC\u8617\u862D\u861A\u8823\u8822\u8821\u881F\u896A\u896C\u89BD\u8B74\u8B77\u8B7D\u8D13\u8E8A\u8E8D\u8E8B\u8F5F\u8FAF\u91BA\u942E\u9433\u9435\u943A\u9438\u9432\u942B\u95E2\u9738\u9739\u9732\u97FF\u9867\u9865\u9957\u9A45\u9A43\u9A40\u9A3E\u9ACF\u9B54\u9B51\u9C2D\u9C25\u9DAF\u9DB4\u9DC2\u9DB8\u9E9D\u9EEF\u9F19\u9F5C\u9F66\u9F67\u513C\u513B\u56C8\u56CA\u56C9\u5B7F\u5DD4\u5DD2\u5F4E\u61FF\u6524\u6B0A\u6B61\u7051\u7058\u7380\u74E4\u758A\u766E\u766C\u79B3\u7C60\u7C5F\u807E\u807D\u81DF\u8972\u896F\u89FC\u8B80\u8D16\u8D17\u8E91\u8E93\u8F61\u9148\u9444\u9451\u9452\u973D\u973E\u97C3\u97C1\u986B\u9955\u9A55\u9A4D\u9AD2\u9B1A\u9C49\u9C31\u9C3E\u9C3B\u9DD3\u9DD7\u9F34\u9F6C\u9F6A\u9F94\u56CC\u5DD6\u6200\u6523\u652B\u652A\u66EC\u6B10\u74DA\u7ACA\u7C64\u7C63\u7C65\u7E93\u7E96\u7E94\u81E2\u8638\u863F\u8831\u8B8A\u9090\u908F\u9463\u9460\u9464\u9768\u986F\u995C\u9A5A\u9A5B\u9A57\u9AD3\u9AD4\u9AD1\u9C54\u9C57\u9C56\u9DE5\u9E9F\u9EF4\u56D1\u58E9\u652C\u705E\u7671\u7672\u77D7\u7F50\u7F88\u8836\u8839\u8862\u8B93\u8B92\u8B96\u8277\u8D1B\u91C0\u946A\u9742\u9748\u9744\u97C6\u9870\u9A5F\u9B22\u9B58\u9C5F\u9DF9\u9DFA\u9E7C\u9E7D\u9F07\u9F77\u9F72\u5EF3\u6B16\u7063\u7C6C\u7C6E\u883B\u89C0\u8EA1\u91C1\u9472\u9470\u9871\u995E\u9AD6\u9B23\u9ECC\u7064\u77DA\u8B9A\u9477\u97C9\u9A62\u9A65\u7E9C\u8B9C\u8EAA\u91C5\u947D\u947E\u947C\u9C77\u9C78\u9EF7\u8C54\u947F\u9E1A\u7228\u9A6A\u9B31\u9E1B\u9E1E\u7C72\u2460\u2461\u2462\u2463\u2464\u2465\u2466\u2467\u2468\u2469\u2474\u2475\u2476\u2477\u2478\u2479\u247A\u247B\u247C\u247D\u2170\u2171\u2172\u2173\u2174\u2175\u2176\u2177\u2178\u2179\u4E36\u4E3F\u4E85\u4EA0\u5182\u5196\u51AB\u52F9\u5338\u5369\u53B6\u590A\u5B80\u5DDB\u2F33\u5E7F\u5EF4\u5F50\u5F61\u6534\u65E0\u7592\u7676\u8FB5\u96B6\u00A8\u02C6\u30FD\u30FE\u309D\u309E\u3003\u4EDD\u3005\u3006\u3007\u30FC\uFF3B\uFF3D\u273D\u3041\u3042\u3043\u3044\u3045\u3046\u3047\u3048\u3049\u304A\u304B\u304C\u304D\u304E\u304F\u3050\u3051\u3052\u3053\u3054\u3055\u3056\u3057\u3058\u3059\u305A\u305B\u305C\u305D\u305E\u305F\u3060\u3061\u3062\u3063\u3064\u3065\u3066\u3067\u3068\u3069\u306A\u306B\u306C\u306D\u306E\u306F\u3070\u3071\u3072\u3073\u3074\u3075\u3076\u3077\u3078\u3079\u307A\u307B\u307C\u307D\u307E\u307F\u3080\u3081\u3082\u3083\u3084\u3085\u3086\u3087\u3088\u3089\u308A\u308B\u308C\u308D\u308E\u308F\u3090\u3091\u3092\u3093\u30A1\u30A2\u30A3\u30A4\u30A5\u30A6\u30A7\u30A8\u30A9\u30AA\u30AB\u30AC\u30AD\u30AE\u30AF\u30B0\u30B1\u30B2\u30B3\u30B4\u30B5\u30B6\u30B7\u30B8\u30B9\u30BA\u30BB\u30BC\u30BD\u30BE\u30BF\u30C0\u30C1\u30C2\u30C3\u30C4\u30C5\u30C6\u30C7\u30C8\u30C9\u30CA\u30CB\u30CC\u30CD\u30CE\u30CF\u30D0\u30D1\u30D2\u30D3\u30D4\u30D5\u30D6\u30D7\u30D8\u30D9\u30DA\u30DB\u30DC\u30DD\u30DE\u30DF\u30E0\u30E1\u30E2\u30E3\u30E4\u30E5\u30E6\u30E7\u30E8\u30E9\u30EA\u30EB\u30EC\u30ED\u30EE\u30EF\u30F0\u30F1\u30F2\u30F3\u30F4\u30F5\u30F6\u0410\u0411\u0412\u0413\u0414\u0415\u0401\u0416\u0417\u0418\u0419\u041A\u041B\u041C\u041D\u041E\u041F\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042A\u042B\u042C\u042D\u042E\u042F\u0430\u0431\u0432\u0433\u0434\u0435\u0451\u0436\u0437\u0438\u0439\u043A\u043B\u043C\u043D\u043E\u043F\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044A\u044B\u044C\u044D\u044E\u044F\u21E7\u21B8\u21B9\u31CF\u00CC\u4E5A\u008A\u5202\u4491\u9FB0\u5188\u9FB1\u7607";
+
+ private static final String TABLE4 = "\uFFE2\uFFE4\uFF07\uFF02\u3231\u2116\u2121\u309B\u309C\u2E80\u2E84\u2E86\u2E87\u2E88\u2E8A\u2E8C\u2E8D\u2E95\u2E9C\u2E9D\u2EA5\u2EA7\u2EAA\u2EAC\u2EAE\u2EB6\u2EBC\u2EBE\u2EC6\u2ECA\u2ECC\u2ECD\u2ECF\u2ED6\u2ED7\u2EDE\u2EE3\u0000\u0000\u0000\u0283\u0250\u025B\u0254\u0275\u0153\u00F8\u014B\u028A\u026A\u4E42\u4E5C\u51F5\u531A\u5382\u4E07\u4E0C\u4E47\u4E8D\u56D7\uFA0C\u5C6E\u5F73\u4E0F\u5187\u4E0E\u4E2E\u4E93\u4EC2\u4EC9\u4EC8\u5198\u52FC\u536C\u53B9\u5720\u5903\u592C\u5C10\u5DFF\u65E1\u6BB3\u6BCC\u6C14\u723F\u4E31\u4E3C\u4EE8\u4EDC\u4EE9\u4EE1\u4EDD\u4EDA\u520C\u531C\u534C\u5722\u5723\u5917\u592F\u5B81\u5B84\u5C12\u5C3B\u5C74\u5C73\u5E04\u5E80\u5E82\u5FC9\u6209\u6250\u6C15\u6C36\u6C43\u6C3F\u6C3B\u72AE\u72B0\u738A\u79B8\u808A\u961E\u4F0E\u4F18\u4F2C\u4EF5\u4F14\u4EF1\u4F00\u4EF7\u4F08\u4F1D\u4F02\u4F05\u4F22\u4F13\u4F04\u4EF4\u4F12\u51B1\u5213\u5209\u5210\u52A6\u5322\u531F\u534D\u538A\u5407\u56E1\u56DF\u572E\u572A\u5734\u593C\u5980\u597C\u5985\u597B\u597E\u5977\u597F\u5B56\u5C15\u5C25\u5C7C\u5C7A\u5C7B\u5C7E\u5DDF\u5E75\u5E84\u5F02\u5F1A\u5F74\u5FD5\u5FD4\u5FCF\u625C\u625E\u6264\u6261\u6266\u6262\u6259\u6260\u625A\u6265\u65EF\u65EE\u673E\u6739\u6738\u673B\u673A\u673F\u673C\u6733\u6C18\u6C46\u6C52\u6C5C\u6C4F\u6C4A\u6C54\u6C4B\u6C4C\u7071\u725E\u72B4\u72B5\u738E\u752A\u767F\u7A75\u7F51\u8278\u827C\u8280\u827D\u827F\u864D\u897E\u9099\u9097\u9098\u909B\u9094\u9622\u9624\u9620\u9623\u4F56\u4F3B\u4F62\u4F49\u4F53\u4F64\u4F3E\u4F67\u4F52\u4F5F\u4F41\u4F58\u4F2D\u4F33\u4F3F\u4F61\u518F\u51B9\u521C\u521E\u5221\u52AD\u52AE\u5309\u5363\u5372\u538E\u538F\u5430\u5437\u542A\u5454\u5445\u5419\u541C\u5425\u5418\u543D\u544F\u5441\u5428\u5424\u5447\u56EE\u56E7\u56E5\u5741\u5745\u574C\u5749\u574B\u5752\u5906\u5940\u59A6\u5998\u59A0\u5997\u598E\u59A2\u5990\u598F\u59A7\u59A1\u5B8E\u5B92\u5C28\u5C2A\u5C8D\u5C8F\u5C88\u5C8B\u5C89\u5C92\u5C8A\u5C86\u5C93\u5C95\u5DE0\u5E0A\u5E0E\u5E8B\u5E89\u5E8C\u5E88\u5E8D\u5F05\u5F1D\u5F78\u5F76\u5FD2\u5FD1\u5FD0\u5FED\u5FE8\u5FEE\u5FF3\u5FE1\u5FE4\u5FE3\u5FFA\u5FEF\u5FF7\u5FFB\u6000\u5FF4\u623A\u6283\u628C\u628E\u628F\u6294\u6287\u6271\u627B\u627A\u6270\u6281\u6288\u6277\u627D\u6272\u6274\u6537\u65F0\u65F4\u65F3\u65F2\u65F5\u6745\u6747\u6759\u6755\u674C\u6748\u675D\u674D\u675A\u674B\u6BD0\u6C19\u6C1A\u6C78\u6C67\u6C6B\u6C84\u6C8B\u6C8F\u6C71\u6C6F\u6C69\u6C9A\u6C6D\u6C87\u6C95\u6C9C\u6C66\u6C73\u6C65\u6C7B\u6C8E\u7074\u707A\u7263\u72BF\u72BD\u72C3\u72C6\u72C1\u72BA\u72C5\u7395\u7397\u7393\u7394\u7392\u753A\u7539\u7594\u7595\u7681\u793D\u8034\u8095\u8099\u8090\u8092\u809C\u8290\u828F\u8285\u828E\u8291\u8293\u828A\u8283\u8284\u8C78\u8FC9\u8FBF\u909F\u90A1\u90A5\u909E\u90A7\u90A0\u9630\u9628\u962F\u962D\u4E33\u4F98\u4F7C\u4F85\u4F7D\u4F80\u4F87\u4F76\u4F74\u4F89\u4F84\u4F77\u4F4C\u4F97\u4F6A\u4F9A\u4F79\u4F81\u4F78\u4F90\u4F9C\u4F94\u4F9E\u4F92\u4F82\u4F95\u4F6B\u4F6E\u519E\u51BC\u51BE\u5235\u5232\u5233\u5246\u5231\u52BC\u530A\u530B\u533C\u5392\u5394\u5487\u547F\u5481\u5491\u5482\u5488\u546B\u547A\u547E\u5465\u546C\u5474\u5466\u548D\u546F\u5461\u5460\u5498\u5463\u5467\u5464\u56F7\u56F9\u576F\u5772\u576D\u576B\u5771\u5770\u5776\u5780\u5775\u577B\u5773\u5774\u5762\u5768\u577D\u590C\u5945\u59B5\u59BA\u59CF\u59CE\u59B2\u59CC\u59C1\u59B6\u59BC\u59C3\u59D6\u59B1\u59BD\u59C0\u59C8\u59B4\u59C7\u5B62\u5B65\u5B93\u5B95\u5C44\u5C47\u5CAE\u5CA4\u5CA0\u5CB5\u5CAF\u5CA8\u5CAC\u5C9F\u5CA3\u5CAD\u5CA2\u5CAA\u5CA7\u5C9D\u5CA5\u5CB6\u5CB0\u5CA6\u5E17\u5E14\u5E19\u5F28\u5F22\u5F23\u5F24\u5F54\u5F82\u5F7E\u5F7D\u5FDE\u5FE5\u602D\u6026\u6019\u6032\u600B\u6034\u600A\u6017\u6033\u601A\u601E\u602C\u6022\u600D\u6010\u602E\u6013\u6011\u600C\u6009\u601C\u6214\u623D\u62AD\u62B4\u62D1\u62BE\u62AA\u62B6\u62CA\u62AE\u62B3\u62AF\u62BB\u62A9\u62B0\u62B8\u653D\u65A8\u65BB\u6609\u65FC\u6604\u6612\u6608\u65FB\u6603\u660B\u660D\u6605\u65FD\u6611\u6610\u66F6\u670A\u6785\u676C\u678E\u6792\u6776\u677B\u6798\u6786\u6784\u6774\u678D\u678C\u677A\u679F\u6791\u6799\u6783\u677D\u6781\u6778\u6779\u6794\u6B25\u6B80\u6B7E\u6BDE\u6C1D\u6C93\u6CEC\u6CEB\u6CEE\u6CD9\u6CB6\u6CD4\u6CAD\u6CE7\u6CB7\u6CD0\u6CC2\u6CBA\u6CC3\u6CC6\u6CED\u6CF2\u6CD2\u6CDD\u6CB4\u6C8A\u6C9D\u6C80\u6CDE\u6CC0\u6D30\u6CCD\u6CC7\u6CB0\u6CF9\u6CCF\u6CE9\u6CD1\u7094\u7098\u7085\u7093\u7086\u7084\u7091\u7096\u7082\u709A\u7083\u726A\u72D6\u72CB\u72D8\u72C9\u72DC\u72D2\u72D4\u72DA\u72CC\u72D1\u73A4\u73A1\u73AD\u73A6\u73A2\u73A0\u73AC\u739D\u74DD\u74E8\u753F\u7540\u753E\u758C\u7598\u76AF\u76F3\u76F1\u76F0\u76F5\u77F8\u77FC\u77F9\u77FB\u77FA\u77F7\u7942\u793F\u79C5\u7A78\u7A7B\u7AFB\u7C75\u7CFD\u8035\u808F\u80AE\u80A3\u80B8\u80B5\u80AD\u8220\u82A0\u82C0\u82AB\u829A\u8298\u829B\u82B5\u82A7\u82AE\u82BC\u829E\u82BA\u82B4\u82A8\u82A1\u82A9\u82C2\u82A4\u82C3\u82B6\u82A2\u8670\u866F\u866D\u866E\u8C56\u8FD2\u8FCB\u8FD3\u8FCD\u8FD6\u8FD5\u8FD7\u90B2\u90B4\u90AF\u90B3\u90B0\u9639\u963D\u963C\u963A\u9643\u4FCD\u4FC5\u4FD3\u4FB2\u4FC9\u4FCB\u4FC1\u4FD4\u4FDC\u4FD9\u4FBB\u4FB3\u4FDB\u4FC7\u4FD6\u4FBA\u4FC0\u4FB9\u4FEC\u5244\u5249\u52C0\u52C2\u533D\u537C\u5397\u5396\u5399\u5398\u54BA\u54A1\u54AD\u54A5\u54CF\u54C3\u830D\u54B7\u54AE\u54D6\u54B6\u54C5\u54C6\u54A0\u5470\u54BC\u54A2\u54BE\u5472\u54DE\u54B0\u57B5\u579E\u579F\u57A4\u578C\u5797\u579D\u579B\u5794\u5798\u578F\u5799\u57A5\u579A\u5795\u58F4\u590D\u5953\u59E1\u59DE\u59EE\u5A00\u59F1\u59DD\u59FA\u59FD\u59FC\u59F6\u59E4\u59F2\u59F7\u59DB\u59E9\u59F3\u59F5\u59E0\u59FE\u59F4\u59ED\u5BA8\u5C4C\u5CD0\u5CD8\u5CCC\u5CD7\u5CCB\u5CDB\u5CDE\u5CDA\u5CC9\u5CC7\u5CCA\u5CD6\u5CD3\u5CD4\u5CCF\u5CC8\u5CC6\u5CCE\u5CDF\u5CF8\u5DF9\u5E21\u5E22\u5E23\u5E20\u5E24\u5EB0\u5EA4\u5EA2\u5E9B\u5EA3\u5EA5\u5F07\u5F2E\u5F56\u5F86\u6037\u6039\u6054\u6072\u605E\u6045\u6053\u6047\u6049\u605B\u604C\u6040\u6042\u605F\u6024\u6044\u6058\u6066\u606E\u6242\u6243\u62CF\u630D\u630B\u62F5\u630E\u6303\u62EB\u62F9\u630F\u630C\u62F8\u62F6\u6300\u6313\u6314\u62FA\u6315\u62FB\u62F0\u6541\u6543\u65AA\u65BF\u6636\u6621\u6632\u6635\u661C\u6626\u6622\u6633\u662B\u663A\u661D\u6634\u6639\u662E\u670F\u6710\u67C1\u67F2\u67C8\u67BA\u67DC\u67BB\u67F8\u67D8\u67C0\u67B7\u67C5\u67EB\u67E4\u67DF\u67B5\u67CD\u67B3\u67F7\u67F6\u67EE\u67E3\u67C2\u67B9\u67CE\u67E7\u67F0\u67B2\u67FC\u67C6\u67ED\u67CC\u67AE\u67E6\u67DB\u67FA\u67C9\u67CA\u67C3\u67EA\u67CB\u6B28\u6B82\u6B84\u6BB6\u6BD6\u6BD8\u6BE0\u6C20\u6C21\u6D28\u6D34\u6D2D\u6D1F\u6D3C\u6D3F\u6D12\u6D0A\u6CDA\u6D33\u6D04\u6D19\u6D3A\u6D1A\u6D11\u6D00\u6D1D\u6D42\u6D01\u6D18\u6D37\u6D03\u6D0F\u6D40\u6D07\u6D20\u6D2C\u6D08\u6D22\u6D09\u6D10\u70B7\u709F\u70BE\u70B1\u70B0\u70A1\u70B4\u70B5\u70A9\u7241\u7249\u724A\u726C\u7270\u7273\u726E\u72CA\u72E4\u72E8\u72EB\u72DF\u72EA\u72E6\u72E3\u7385\u73CC\u73C2\u73C8\u73C5\u73B9\u73B6\u73B5\u73B4\u73EB\u73BF\u73C7\u73BE\u73C3\u73C6\u73B8\u73CB\u74EC\u74EE\u752E\u7547\u7548\u75A7\u75AA\u7679\u76C4\u7708\u7703\u7704\u7705\u770A\u76F7\u76FB\u76FA\u77E7\u77E8\u7806\u7811\u7812\u7805\u7810\u780F\u780E\u7809\u7803\u7813\u794A\u794C\u794B\u7945\u7944\u79D5\u79CD\u79CF\u79D6\u79CE\u7A80\u7A7E\u7AD1\u7B00\u7B01\u7C7A\u7C78\u7C79\u7C7F\u7C80\u7C81\u7D03\u7D08\u7D01\u7F58\u7F91\u7F8D\u7FBE\u8007\u800E\u800F\u8014\u8037\u80D8\u80C7\u80E0\u80D1\u80C8\u80C2\u80D0\u80C5\u80E3\u80D9\u80DC\u80CA\u80D5\u80C9\u80CF\u80D7\u80E6\u80CD\u81FF\u8221\u8294\u82D9\u82FE\u82F9\u8307\u82E8\u8300\u82D5\u833A\u82EB\u82D6\u82F4\u82EC\u82E1\u82F2\u82F5\u830C\u82FB\u82F6\u82F0\u82EA\u82E4\u82E0\u82FA\u82F3\u82ED\u8677\u8674\u867C\u8673\u8841\u884E\u8867\u886A\u8869\u89D3\u8A04\u8A07\u8D72\u8FE3\u8FE1\u8FEE\u8FE0\u90F1\u90BD\u90BF\u90D5\u90C5\u90BE\u90C7\u90CB\u90C8\u91D4\u91D3\u9654\u964F\u9651\u9653\u964A\u964E\u501E\u5005\u5007\u5013\u5022\u5030\u501B\u4FF5\u4FF4\u5033\u5037\u502C\u4FF6\u4FF7\u5017\u501C\u5020\u5027\u5035\u502F\u5031\u500E\u515A\u5194\u5193\u51CA\u51C4\u51C5\u51C8\u51CE\u5261\u525A\u5252\u525E\u525F\u5255\u5262\u52CD\u530E\u539E\u5526\u54E2\u5517\u5512\u54E7\u54F3\u54E4\u551A\u54FF\u5504\u5508\u54EB\u5511\u5505\u54F1\u550A\u54FB\u54F7\u54F8\u54E0\u550E\u5503\u550B\u5701\u5702\u57CC\u5832\u57D5\u57D2\u57BA\u57C6\u57BD\u57BC\u57B8\u57B6\u57BF\u57C7\u57D0\u57B9\u57C1\u590E\u594A\u5A19\u5A16\u5A2D\u5A2E\u5A15\u5A0F\u5A17\u5A0A\u5A1E\u5A33\u5B6C\u5BA7\u5BAD\u5BAC\u5C03\u5C56\u5C54\u5CEC\u5CFF\u5CEE\u5CF1\u5CF7\u5D00\u5CF9\u5E29\u5E28\u5EA8\u5EAE\u5EAA\u5EAC\u5F33\u5F30\u5F67\u605D\u605A\u6067\u6041\u60A2\u6088\u6080\u6092\u6081\u609D\u6083\u6095\u609B\u6097\u6087\u609C\u608E\u6219\u6246\u62F2\u6310\u6356\u632C\u6344\u6345\u6336\u6343\u63E4\u6339\u634B\u634A\u633C\u6329\u6341\u6334\u6358\u6354\u6359\u632D\u6347\u6333\u635A\u6351\u6338\u6357\u6340\u6348\u654A\u6546\u65C6\u65C3\u65C4\u65C2\u664A\u665F\u6647\u6651\u6712\u6713\u681F\u681A\u6849\u6832\u6833\u683B\u684B\u684F\u6816\u6831\u681C\u6835\u682B\u682D\u682F\u684E\u6844\u6834\u681D\u6812\u6814\u6826\u6828\u682E\u684D\u683A\u6825\u6820\u6B2C\u6B2F\u6B2D\u6B31\u6B34\u6B6D\u8082\u6B88\u6BE6\u6BE4\u6BE8\u6BE3\u6BE2\u6BE7\u6C25\u6D7A\u6D63\u6D64\u6D76\u6D0D\u6D61\u6D92\u6D58\u6D62\u6D6D\u6D6F\u6D91\u6D8D\u6DEF\u6D7F\u6D86\u6D5E\u6D67\u6D60\u6D97\u6D70\u6D7C\u6D5F\u6D82\u6D98\u6D2F\u6D68\u6D8B\u6D7E\u6D80\u6D84\u6D16\u6D83\u6D7B\u6D7D\u6D75\u6D90\u70DC\u70D3\u70D1\u70DD\u70CB\u7F39\u70E2\u70D7\u70D2\u70DE\u70E0\u70D4\u70CD\u70C5\u70C6\u70C7\u70DA\u70CE\u70E1\u7242\u7278\u7277\u7276\u7300\u72FA\u72F4\u72FE\u72F6\u72F3\u72FB\u7301\u73D3\u73D9\u73E5\u73D6\u73BC\u73E7\u73E3\u73E9\u73DC\u73D2\u73DB\u73D4\u73DD\u73DA\u73D7\u73D8\u73E8\u74DE\u74DF\u74F4\u74F5\u7521\u755B\u755F\u75B0\u75C1\u75BB\u75C4\u75C0\u75BF\u75B6\u75BA\u768A\u76C9\u771D\u771B\u7710\u7713\u7712\u7723\u7711\u7715\u7719\u771A\u7722\u7727\u7823\u782C\u7822\u7835\u782F\u7828\u782E\u782B\u7821\u7829\u7833\u782A\u7831\u7954\u795B\u794F\u795C\u7953\u7952\u7951\u79EB\u79EC\u79E0\u79EE\u79ED\u79EA\u79DC\u79DE\u79DD\u7A86\u7A89\u7A85\u7A8B\u7A8C\u7A8A\u7A87\u7AD8\u7B10\u7B04\u7B13\u7B05\u7B0F\u7B08\u7B0A\u7B0E\u7B09\u7B12\u7C84\u7C91\u7C8A\u7C8C\u7C88\u7C8D\u7C85\u7D1E\u7D1D\u7D11\u7D0E\u7D18\u7D16\u7D13\u7D1F\u7D12\u7D0F\u7D0C\u7F5C\u7F61\u7F5E\u7F60\u7F5D\u7F5B\u7F96\u7F92\u7FC3\u7FC2\u7FC0\u8016\u803E\u8039\u80FA\u80F2\u80F9\u80F5\u8101\u80FB\u8100\u8201\u822F\u8225\u8333\u832D\u8344\u8319\u8351\u8325\u8356\u833F\u8341\u8326\u831C\u8322\u8342\u834E\u831B\u832A\u8308\u833C\u834D\u8316\u8324\u8320\u8337\u832F\u8329\u8347\u8345\u834C\u8353\u831E\u832C\u834B\u8327\u8348\u8653\u8652\u86A2\u86A8\u8696\u868D\u8691\u869E\u8687\u8697\u8686\u868B\u869A\u8685\u86A5\u8699\u86A1\u86A7\u8695\u8698\u868E\u869D\u8690\u8694\u8843\u8844\u886D\u8875\u8876\u8872\u8880\u8871\u887F\u886F\u8883\u887E\u8874\u887C\u8A12\u8C47\u8C57\u8C7B\u8CA4\u8CA3\u8D76\u8D78\u8DB5\u8DB7\u8DB6\u8ED1\u8ED3\u8FFE\u8FF5\u9002\u8FFF\u8FFB\u9004\u8FFC\u8FF6\u90D6\u90E0\u90D9\u90DA\u90E3\u90DF\u90E5\u90D8\u90DB\u90D7\u90DC\u90E4\u9150\u914E\u914F\u91D5\u91E2\u91DA\u965C\u965F\u96BC\u98E3\u9ADF\u9B2F\u4E7F\u5070\u506A\u5061\u505E\u5060\u5053\u504B\u505D\u5072\u5048\u504D\u5041\u505B\u504A\u5062\u5015\u5045\u505F\u5069\u506B\u5063\u5064\u5046\u5040\u506E\u5073\u5057\u5051\u51D0\u526B\u526D\u526C\u526E\u52D6\u52D3\u532D\u539C\u5575\u5576\u553C\u554D\u5550\u5534\u552A\u5551\u5562\u5536\u5535\u5530\u5552\u5545\u550C\u5532\u5565\u554E\u5539\u5548\u552D\u553B\u5540\u554B\u570A\u5707\u57FB\u5814\u57E2\u57F6\u57DC\u57F4\u5800\u57ED\u57FD\u5808\u57F8\u580B\u57F3\u57CF\u5807\u57EE\u57E3\u57F2\u57E5\u57EC\u57E1\u580E\u57FC\u5810\u57E7\u5801\u580C\u57F1\u57E9\u57F0\u580D\u5804\u595C\u5A60\u5A58\u5A55\u5A67\u5A5E\u5A38\u5A35\u5A6D\u5A50\u5A5F\u5A65\u5A6C\u5A53\u5A64\u5A57\u5A43\u5A5D\u5A52\u5A44\u5A5B\u5A48\u5A8E\u5A3E\u5A4D\u5A39\u5A4C\u5A70\u5A69\u5A47\u5A51\u5A56\u5A42\u5A5C\u5B72\u5B6E\u5BC1\u5BC0\u5C59\u5D1E\u5D0B\u5D1D\u5D1A\u5D20\u5D0C\u5D28\u5D0D\u5D26\u5D25\u5D0F\u5D30\u5D12\u5D23\u5D1F\u5D2E\u5E3E\u5E34\u5EB1\u5EB4\u5EB9\u5EB2\u5EB3\u5F36\u5F38\u5F9B\u5F96\u5F9F\u608A\u6090\u6086\u60BE\u60B0\u60BA\u60D3\u60D4\u60CF\u60E4\u60D9\u60DD\u60C8\u60B1\u60DB\u60B7\u60CA\u60BF\u60C3\u60CD\u60C0\u6332\u6365\u638A\u6382\u637D\u63BD\u639E\u63AD\u639D\u6397\u63AB\u638E\u636F\u6387\u6390\u636E\u63AF\u6375\u639C\u636D\u63AE\u637C\u63A4\u633B\u639F\u6378\u6385\u6381\u6391\u638D\u6370\u6553\u65CD\u6665\u6661\u665B\u6659\u665C\u6662\u6718\u6879\u6887\u6890\u689C\u686D\u686E\u68AE\u68AB\u6956\u686F\u68A3\u68AC\u68A9\u6875\u6874\u68B2\u688F\u6877\u6892\u687C\u686B\u6872\u68AA\u6880\u6871\u687E\u689B\u6896\u688B\u68A0\u6889\u68A4\u6878\u687B\u6891\u688C\u688A\u687D\u6B36\u6B33\u6B37\u6B38\u6B91\u6B8F\u6B8D\u6B8E\u6B8C\u6C2A\u6DC0\u6DAB\u6DB4\u6DB3\u6E74\u6DAC\u6DE9\u6DE2\u6DB7\u6DF6\u6DD4\u6E00\u6DC8\u6DE0\u6DDF\u6DD6\u6DBE\u6DE5\u6DDC\u6DDD\u6DDB\u6DF4\u6DCA\u6DBD\u6DED\u6DF0\u6DBA\u6DD5\u6DC2\u6DCF\u6DC9\u6DD0\u6DF2\u6DD3\u6DFD\u6DD7\u6DCD\u6DE3\u6DBB\u70FA\u710D\u70F7\u7117\u70F4\u710C\u70F0\u7104\u70F3\u7110\u70FC\u70FF\u7106\u7113\u7100\u70F8\u70F6\u710B\u7102\u710E\u727E\u727B\u727C\u727F\u731D\u7317\u7307\u7311\u7318\u730A\u7308\u72FF\u730F\u731E\u7388\u73F6\u73F8\u73F5\u7404\u7401\u73FD\u7407\u7400\u73FA\u73FC\u73FF\u740C\u740B\u73F4\u7408\u7564\u7563\u75CE\u75D2\u75CF\u75CB\u75CC\u75D1\u75D0\u768F\u7689\u76D3\u7739\u772F\u772D\u7731\u7732\u7734\u7733\u773D\u7725\u773B\u7735\u7848\u7852\u7849\u784D\u784A\u784C\u7826\u7845\u7850\u7964\u7967\u7969\u796A\u7963\u796B\u7961\u79BB\u79FA\u79F8\u79F6\u79F7\u7A8F\u7A94\u7A90\u7B35\u7B47\u7B34\u7B25\u7B30\u7B22\u7B24\u7B33\u7B18\u7B2A\u7B1D\u7B31\u7B2B\u7B2D\u7B2F\u7B32\u7B38\u7B1A\u7B23\u7C94\u7C98\u7C96\u7CA3\u7D35\u7D3D\u7D38\u7D36\u7D3A\u7D45\u7D2C\u7D29\u7D41\u7D47\u7D3E\u7D3F\u7D4A\u7D3B\u7D28\u7F63\u7F95\u7F9C\u7F9D\u7F9B\u7FCA\u7FCB\u7FCD\u7FD0\u7FD1\u7FC7\u7FCF\u7FC9\u801F\u801E\u801B\u8047\u8043\u8048\u8118\u8125\u8119\u811B\u812D\u811F\u812C\u811E\u8121\u8115\u8127\u811D\u8122\u8211\u8238\u8233\u823A\u8234\u8232\u8274\u8390\u83A3\u83A8\u838D\u837A\u8373\u83A4\u8374\u838F\u8381\u8395\u8399\u8375\u8394\u83A9\u837D\u8383\u838C\u839D\u839B\u83AA\u838B\u837E\u83A5\u83AF\u8388\u8397\u83B0\u837F\u83A6\u8387\u83AE\u8376\u839A\u8659\u8656\u86BF\u86B7\u86C2\u86C1\u86C5\u86BA\u86B0\u86C8\u86B9\u86B3\u86B8\u86CC\u86B4\u86BB\u86BC\u86C3\u86BD\u86BE\u8852\u8889\u8895\u88A8\u88A2\u88AA\u889A\u8891\u88A1\u889F\u8898\u88A7\u8899\u889B\u8897\u88A4\u88AC\u888C\u8893\u888E\u8982\u89D6\u89D9\u89D5\u8A30\u8A27\u8A2C\u8A1E\u8C39\u8C3B\u8C5C\u8C5D\u8C7D\u8CA5\u8D7D\u8D7B\u8D79\u8DBC\u8DC2\u8DB9\u8DBF\u8DC1\u8ED8\u8EDE\u8EDD\u8EDC\u8ED7\u8EE0\u8EE1\u9024\u900B\u9011\u901C\u900C\u9021\u90EF\u90EA\u90F0\u90F4\u90F2\u90F3\u90D4\u90EB\u90EC\u90E9\u9156\u9158\u915A\u9153\u9155\u91EC\u91F4\u91F1\u91F3\u91F8\u91E4\u91F9\u91EA\u91EB\u91F7\u91E8\u91EE\u957A\u9586\u9588\u967C\u966D\u966B\u9671\u966F\u96BF\u976A\u9804\u98E5\u9997\u509B\u5095\u5094\u509E\u508B\u50A3\u5083\u508C\u508E\u509D\u5068\u509C\u5092\u5082\u5087\u515F\u51D4\u5312\u5311\u53A4\u53A7\u5591\u55A8\u55A5\u55AD\u5577\u5645\u55A2\u5593\u5588\u558F\u55B5\u5581\u55A3\u5592\u55A4\u557D\u558C\u55A6\u557F\u5595\u55A1\u558E\u570C\u5829\u5837\u5819\u581E\u5827\u5823\u5828\u57F5\u5848\u5825\u581C\u581B\u5833\u583F\u5836\u582E\u5839\u5838\u582D\u582C\u583B\u5961\u5AAF\u5A94\u5A9F\u5A7A\u5AA2\u5A9E\u5A78\u5AA6\u5A7C\u5AA5\u5AAC\u5A95\u5AAE\u5A37\u5A84\u5A8A\u5A97\u5A83\u5A8B\u5AA9\u5A7B\u5A7D\u5A8C\u5A9C\u5A8F\u5A93\u5A9D\u5BEA\u5BCD\u5BCB\u5BD4\u5BD1\u5BCA\u5BCE\u5C0C\u5C30\u5D37\u5D43\u5D6B\u5D41\u5D4B\u5D3F\u5D35\u5D51\u5D4E\u5D55\u5D33\u5D3A\u5D52\u5D3D\u5D31\u5D59\u5D42\u5D39\u5D49\u5D38\u5D3C\u5D32\u5D36\u5D40\u5D45\u5E44\u5E41\u5F58\u5FA6\u5FA5\u5FAB\u60C9\u60B9\u60CC\u60E2\u60CE\u60C4\u6114\u60F2\u610A\u6116\u6105\u60F5\u6113\u60F8\u60FC\u60FE\u60C1\u6103\u6118\u611D\u6110\u60FF\u6104\u610B\u624A\u6394\u63B1\u63B0\u63CE\u63E5\u63E8\u63EF\u63C3\u649D\u63F3\u63CA\u63E0\u63F6\u63D5\u63F2\u63F5\u6461\u63DF\u63BE\u63DD\u63DC\u63C4\u63D8\u63D3\u63C2\u63C7\u63CC\u63CB\u63C8\u63F0\u63D7\u63D9\u6532\u6567\u656A\u6564\u655C\u6568\u6565\u658C\u659D\u659E\u65AE\u65D0\u65D2\u667C\u666C\u667B\u6680\u6671\u6679\u666A\u6672\u6701\u690C\u68D3\u6904\u68DC\u692A\u68EC\u68EA\u68F1\u690F\u68D6\u68F7\u68EB\u68E4\u68F6\u6913\u6910\u68F3\u68E1\u6907\u68CC\u6908\u6970\u68B4\u6911\u68EF\u68C6\u6914\u68F8\u68D0\u68FD\u68FC\u68E8\u690B\u690A\u6917\u68CE\u68C8\u68DD\u68DE\u68E6\u68F4\u68D1\u6906\u68D4\u68E9\u6915\u6925\u68C7\u6B39\u6B3B\u6B3F\u6B3C\u6B94\u6B97\u6B99\u6B95\u6BBD\u6BF0\u6BF2\u6BF3\u6C30\u6DFC\u6E46\u6E47\u6E1F\u6E49\u6E88\u6E3C\u6E3D\u6E45\u6E62\u6E2B\u6E3F\u6E41\u6E5D\u6E73\u6E1C\u6E33\u6E4B\u6E40\u6E51\u6E3B\u6E03\u6E2E\u6E5E\u6E68\u6E5C\u6E61\u6E31\u6E28\u6E60\u6E71\u6E6B\u6E39\u6E22\u6E30\u6E53\u6E65\u6E27\u6E78\u6E64\u6E77\u6E55\u6E79\u6E52\u6E66\u6E35\u6E36\u6E5A\u7120\u711E\u712F\u70FB\u712E\u7131\u7123\u7125\u7122\u7132\u711F\u7128\u713A\u711B\u724B\u725A\u7288\u7289\u7286\u7285\u728B\u7312\u730B\u7330\u7322\u7331\u7333\u7327\u7332\u732D\u7326\u7323\u7335\u730C\u742E\u742C\u7430\u742B\u7416\u741A\u7421\u742D\u7431\u7424\u7423\u741D\u7429\u7420\u7432\u74FB\u752F\u756F\u756C\u75E7\u75DA\u75E1\u75E6\u75DD\u75DF\u75E4\u75D7\u7695\u7692\u76DA\u7746\u7747\u7744\u774D\u7745\u774A\u774E\u774B\u774C\u77DE\u77EC\u7860\u7864\u7865\u785C\u786D\u7871\u786A\u786E\u7870\u7869\u7868\u785E\u7862\u7974\u7973\u7972\u7970\u7A02\u7A0A\u7A03\u7A0C\u7A04\u7A99\u7AE6\u7AE4\u7B4A\u7B3B\u7B44\u7B48\u7B4C\u7B4E\u7B40\u7B58\u7B45\u7CA2\u7C9E\u7CA8\u7CA1\u7D58\u7D6F\u7D63\u7D53\u7D56\u7D67\u7D6A\u7D4F\u7D6D\u7D5C\u7D6B\u7D52\u7D54\u7D69\u7D51\u7D5F\u7D4E\u7F3E\u7F3F\u7F65\u7F66\u7FA2\u7FA0\u7FA1\u7FD7\u8051\u804F\u8050\u80FE\u80D4\u8143\u814A\u8152\u814F\u8147\u813D\u814D\u813A\u81E6\u81EE\u81F7\u81F8\u81F9\u8204\u823C\u823D\u823F\u8275\u833B\u83CF\u83F9\u8423\u83C0\u83E8\u8412\u83E7\u83E4\u83FC\u83F6\u8410\u83C6\u83C8\u83EB\u83E3\u83BF\u8401\u83DD\u83E5\u83D8\u83FF\u83E1\u83CB\u83CE\u83D6\u83F5\u83C9\u8409\u840F\u83DE\u8411\u8406\u83C2\u83F3\u83D5\u83FA\u83C7\u83D1\u83EA\u8413\u83C3\u83EC\u83EE\u83C4\u83FB\u83D7\u83E2\u841B\u83DB\u83FE\u86D8\u86E2\u86E6\u86D3\u86E3\u86DA\u86EA\u86DD\u86EB\u86DC\u86EC\u86E9\u86D7\u86E8\u86D1\u8848\u8856\u8855\u88BA\u88D7\u88B9\u88B8\u88C0\u88BE\u88B6\u88BC\u88B7\u88BD\u88B2\u8901\u88C9\u8995\u8998\u8997\u89DD\u89DA\u89DB\u8A4E\u8A4D\u8A39\u8A59\u8A40\u8A57\u8A58\u8A44\u8A45\u8A52\u8A48\u8A51\u8A4A\u8A4C\u8A4F\u8C5F\u8C81\u8C80\u8CBA\u8CBE\u8CB0\u8CB9\u8CB5\u8D84\u8D80\u8D89\u8DD8\u8DD3\u8DCD\u8DC7\u8DD6\u8DDC\u8DCF\u8DD5\u8DD9\u8DC8\u8DD7\u8DC5\u8EEF\u8EF7\u8EFA\u8EF9\u8EE6\u8EEE\u8EE5\u8EF5\u8EE7\u8EE8\u8EF6\u8EEB\u8EF1\u8EEC\u8EF4\u8EE9\u902D\u9034\u902F\u9106\u912C\u9104\u90FF\u90FC\u9108\u90F9\u90FB\u9101\u9100\u9107\u9105\u9103\u9161\u9164\u915F\u9162\u9160\u9201\u920A\u9225\u9203\u921A\u9226\u920F\u920C\u9200\u9212\u91FF\u91FD\u9206\u9204\u9227\u9202\u921C\u9224\u9219\u9217\u9205\u9216\u957B\u958D\u958C\u9590\u9687\u967E\u9688\u9689\u9683\u9680\u96C2\u96C8\u96C3\u96F1\u96F0\u976C\u9770\u976E\u9807\u98A9\u98EB\u9CE6\u9EF9\u4E83\u4E84\u4EB6\u50BD\u50BF\u50C6\u50AE\u50C4\u50CA\u50B4\u50C8\u50C2\u50B0\u50C1\u50BA\u50B1\u50CB\u50C9\u50B6\u50B8\u51D7\u527A\u5278\u527B\u527C\u55C3\u55DB\u55CC\u55D0\u55CB\u55CA\u55DD\u55C0\u55D4\u55C4\u55E9\u55BF\u55D2\u558D\u55CF\u55D5\u55E2\u55D6\u55C8\u55F2\u55CD\u55D9\u55C2\u5714\u5853\u5868\u5864\u584F\u584D\u5849\u586F\u5855\u584E\u585D\u5859\u5865\u585B\u583D\u5863\u5871\u58FC\u5AC7\u5AC4\u5ACB\u5ABA\u5AB8\u5AB1\u5AB5\u5AB0\u5ABF\u5AC8\u5ABB\u5AC6\u5AB7\u5AC0\u5ACA\u5AB4\u5AB6\u5ACD\u5AB9\u5A90\u5BD6\u5BD8\u5BD9\u5C1F\u5C33\u5D71\u5D63\u5D4A\u5D65\u5D72\u5D6C\u5D5E\u5D68\u5D67\u5D62\u5DF0\u5E4F\u5E4E\u5E4A\u5E4D\u5E4B\u5EC5\u5ECC\u5EC6\u5ECB\u5EC7\u5F40\u5FAF\u5FAD\u60F7\u6149\u614A\u612B\u6145\u6136\u6132\u612E\u6146\u612F\u614F\u6129\u6140\u6220\u9168\u6223\u6225\u6224\u63C5\u63F1\u63EB\u6410\u6412\u6409\u6420\u6424\u6433\u6443\u641F\u6415\u6418\u6439\u6437\u6422\u6423\u640C\u6426\u6430\u6428\u6441\u6435\u642F\u640A\u641A\u6440\u6425\u6427\u640B\u63E7\u641B\u642E\u6421\u640E\u656F\u6592\u65D3\u6686\u668C\u6695\u6690\u668B\u668A\u6699\u6694\u6678\u6720\u6966\u695F\u6938\u694E\u6962\u6971\u693F\u6945\u696A\u6939\u6942\u6957\u6959\u697A\u6948\u6949\u6935\u696C\u6933\u693D\u6965\u68F0\u6978\u6934\u6969\u6940\u696F\u6944\u6976\u6958\u6941\u6974\u694C\u693B\u694B\u6937\u695C\u694F\u6951\u6932\u6952\u692F\u697B\u693C\u6B46\u6B45\u6B43\u6B42\u6B48\u6B41\u6B9B\uFA0D\u6BFB\u6BFC\u6BF9\u6BF7\u6BF8\u6E9B\u6ED6\u6EC8\u6E8F\u6EC0\u6E9F\u6E93\u6E94\u6EA0\u6EB1\u6EB9\u6EC6\u6ED2\u6EBD\u6EC1\u6E9E\u6EC9\u6EB7\u6EB0\u6ECD\u6EA6\u6ECF\u6EB2\u6EBE\u6EC3\u6EDC\u6ED8\u6E99\u6E92\u6E8E\u6E8D\u6EA4\u6EA1\u6EBF\u6EB3\u6ED0\u6ECA\u6E97\u6EAE\u6EA3\u7147\u7154\u7152\u7163\u7160\u7141\u715D\u7162\u7172\u7178\u716A\u7161\u7142\u7158\u7143\u714B\u7170\u715F\u7150\u7153\u7144\u714D\u715A\u724F\u728D\u728C\u7291\u7290\u728E\u733C\u7342\u733B\u733A\u7340\u734A\u7349\u7444\u744A\u744B\u7452\u7451\u7457\u7440\u744F\u7450\u744E\u7442\u7446\u744D\u7454\u74E1\u74FF\u74FE\u74FD\u751D\u7579\u7577\u6983\u75EF\u760F\u7603\u75F7\u75FE\u75FC\u75F9\u75F8\u7610\u75FB\u75F6\u75ED\u75F5\u75FD\u7699\u76B5\u76DD\u7755\u775F\u7760\u7752\u7756\u775A\u7769\u7767\u7754\u7759\u776D\u77E0\u7887\u789A\u7894\u788F\u7884\u7895\u7885\u7886\u78A1\u7883\u7879\u7899\u7880\u7896\u787B\u797C\u7982\u797D\u7979\u7A11\u7A18\u7A19\u7A12\u7A17\u7A15\u7A22\u7A13\u7A1B\u7A10\u7AA3\u7AA2\u7A9E\u7AEB\u7B66\u7B64\u7B6D\u7B74\u7B69\u7B72\u7B65\u7B73\u7B71\u7B70\u7B61\u7B78\u7B76\u7B63\u7CB2\u7CB4\u7CAF\u7D88\u7D86\u7D80\u7D8D\u7D7F\u7D85\u7D7A\u7D8E\u7D7B\u7D83\u7D7C\u7D8C\u7D94\u7D84\u7D7D\u7D92\u7F6D\u7F6B\u7F67\u7F68\u7F6C\u7FA6\u7FA5\u7FA7\u7FDB\u7FDC\u8021\u8164\u8160\u8177\u815C\u8169\u815B\u8162\u8172\u6721\u815E\u8176\u8167\u816F\u8144\u8161\u821D\u8249\u8244\u8240\u8242\u8245\u84F1\u843F\u8456\u8476\u8479\u848F\u848D\u8465\u8451\u8440\u8486\u8467\u8430\u844D\u847D\u845A\u8459\u8474\u8473\u845D\u8507\u845E\u8437\u843A\u8434\u847A\u8443\u8478\u8432\u8445\u8429\u83D9\u844B\u842F\u8442\u842D\u845F\u8470\u8439\u844E\u844C\u8452\u846F\u84C5\u848E\u843B\u8447\u8436\u8433\u8468\u847E\u8444\u842B\u8460\u8454\u846E\u8450\u870B\u8704\u86F7\u870C\u86FA\u86D6\u86F5\u874D\u86F8\u870E\u8709\u8701\u86F6\u870D\u8705\u88D6\u88CB\u88CD\u88CE\u88DE\u88DB\u88DA\u88CC\u88D0\u8985\u899B\u89DF\u89E5\u89E4\u89E1\u89E0\u89E2\u89DC\u89E6\u8A76\u8A86\u8A7F\u8A61\u8A3F\u8A77\u8A82\u8A84\u8A75\u8A83\u8A81\u8A74\u8A7A\u8C3C\u8C4B\u8C4A\u8C65\u8C64\u8C66\u8C86\u8C84\u8C85\u8CCC\u8D68\u8D69\u8D91\u8D8C\u8D8E\u8D8F\u8D8D\u8D93\u8D94\u8D90\u8D92\u8DF0\u8DE0\u8DEC\u8DF1\u8DEE\u8DD0\u8DE9\u8DE3\u8DE2\u8DE7\u8DF2\u8DEB\u8DF4\u8F06\u8EFF\u8F01\u8F00\u8F05\u8F07\u8F08\u8F02\u8F0B\u9052\u903F\u9044\u9049\u903D\u9110\u910D\u910F\u9111\u9116\u9114\u910B\u910E\u916E\u916F\u9248\u9252\u9230\u923A\u9266\u9233\u9265\u925E\u9283\u922E\u924A\u9246\u926D\u926C\u924F\u9260\u9267\u926F\u9236\u9261\u9270\u9231\u9254\u9263\u9250\u9272\u924E\u9253\u924C\u9256\u9232\u959F\u959C\u959E\u959B\u9692\u9693\u9691\u9697\u96CE\u96FA\u96FD\u96F8\u96F5\u9773\u9777\u9778\u9772\u980F\u980D\u980E\u98AC\u98F6\u98F9\u99AF\u99B2\u99B0\u99B5\u9AAD\u9AAB\u9B5B\u9CEA\u9CED\u9CE7\u9E80\u9EFD\u50E6\u50D4\u50D7\u50E8\u50F3\u50DB\u50EA\u50DD\u50E4\u50D3\u50EC\u50F0\u50EF\u50E3\u50E0\u51D8\u5280\u5281\u52E9\u52EB\u5330\u53AC\u5627\u5615\u560C\u5612\u55FC\u560F\u561C\u5601\u5613\u5602\u55FA\u561D\u5604\u55FF\u55F9\u5889\u587C\u5890\u5898\u5886\u5881\u587F\u5874\u588B\u587A\u5887\u5891\u588E\u5876\u5882\u5888\u587B\u5894\u588F\u58FE\u596B\u5ADC\u5AEE\u5AE5\u5AD5\u5AEA\u5ADA\u5AED\u5AEB\u5AF3\u5AE2\u5AE0\u5ADB\u5AEC\u5ADE\u5ADD\u5AD9\u5AE8\u5ADF\u5B77\u5BE0\u5BE3\u5C63\u5D82\u5D80\u5D7D\u5D86\u5D7A\u5D81\u5D77\u5D8A\u5D89\u5D88\u5D7E\u5D7C\u5D8D\u5D79\u5D7F\u5E58\u5E59\u5E53\u5ED8\u5ED1\u5ED7\u5ECE\u5EDC\u5ED5\u5ED9\u5ED2\u5ED4\u5F44\u5F43\u5F6F\u5FB6\u612C\u6128\u6141\u615E\u6171\u6173\u6152\u6153\u6172\u616C\u6180\u6174\u6154\u617A\u615B\u6165\u613B\u616A\u6161\u6156\u6229\u6227\u622B\u642B\u644D\u645B\u645D\u6474\u6476\u6472\u6473\u647D\u6475\u6466\u64A6\u644E\u6482\u645E\u645C\u644B\u6453\u6460\u6450\u647F\u643F\u646C\u646B\u6459\u6465\u6477\u6573\u65A0\u66A1\u66A0\u669F\u6705\u6704\u6722\u69B1\u69B6\u69C9\u69A0\u69CE\u6996\u69B0\u69AC\u69BC\u6991\u6999\u698E\u69A7\u698D\u69A9\u69BE\u69AF\u69BF\u69C4\u69BD\u69A4\u69D4\u69B9\u69CA\u699A\u69CF\u69B3\u6993\u69AA\u69A1\u699E\u69D9\u6997\u6990\u69C2\u69B5\u69A5\u69C6\u6B4A\u6B4D\u6B4B\u6B9E\u6B9F\u6BA0\u6BC3\u6BC4\u6BFE\u6ECE\u6EF5\u6EF1\u6F03\u6F25\u6EF8\u6F37\u6EFB\u6F2E\u6F09\u6F4E\u6F19\u6F1A\u6F27\u6F18\u6F3B\u6F12\u6EED\u6F0A\u6F36\u6F73\u6EF9\u6EEE\u6F2D\u6F40\u6F30\u6F3C\u6F35\u6EEB\u6F07\u6F0E\u6F43\u6F05\u6EFD\u6EF6\u6F39\u6F1C\u6EFC\u6F3A\u6F1F\u6F0D\u6F1E\u6F08\u6F21\u7187\u7190\u7189\u7180\u7185\u7182\u718F\u717B\u7186\u7181\u7197\u7244\u7253\u7297\u7295\u7293\u7343\u734D\u7351\u734C\u7462\u7473\u7471\u7475\u7472\u7467\u746E\u7500\u7502\u7503\u757D\u7590\u7616\u7608\u760C\u7615\u7611\u760A\u7614\u76B8\u7781\u777C\u7785\u7782\u776E\u7780\u776F\u777E\u7783\u78B2\u78AA\u78B4\u78AD\u78A8\u787E\u78AB\u789E\u78A5\u78A0\u78AC\u78A2\u78A4\u7998\u798A\u798B\u7996\u7995\u7994\u7993\u7997\u7988\u7992\u7990\u7A2B\u7A4A\u7A30\u7A2F\u7A28\u7A26\u7AA8\u7AAB\u7AAC\u7AEE\u7B88\u7B9C\u7B8A\u7B91\u7B90\u7B96\u7B8D\u7B8C\u7B9B\u7B8E\u7B85\u7B98\u5284\u7B99\u7BA4\u7B82\u7CBB\u7CBF\u7CBC\u7CBA\u7DA7\u7DB7\u7DC2\u7DA3\u7DAA\u7DC1\u7DC0\u7DC5\u7D9D\u7DCE\u7DC4\u7DC6\u7DCB\u7DCC\u7DAF\u7DB9\u7D96\u7DBC\u7D9F\u7DA6\u7DAE\u7DA9\u7DA1\u7DC9\u7F73\u7FE2\u7FE3\u7FE5\u7FDE\u8024\u805D\u805C\u8189\u8186\u8183\u8187\u818D\u818C\u818B\u8215\u8497\u84A4\u84A1\u849F\u84BA\u84CE\u84C2\u84AC\u84AE\u84AB\u84B9\u84B4\u84C1\u84CD\u84AA\u849A\u84B1\u84D0\u849D\u84A7\u84BB\u84A2\u8494\u84C7\u84CC\u849B\u84A9\u84AF\u84A8\u84D6\u8498\u84B6\u84CF\u84A0\u84D7\u84D4\u84D2\u84DB\u84B0\u8491\u8661\u8733\u8723\u8728\u876B\u8740\u872E\u871E\u8721\u8719\u871B\u8743\u872C\u8741\u873E\u8746\u8720\u8732\u872A\u872D\u873C\u8712\u873A\u8731\u8735\u8742\u8726\u8727\u8738\u8724\u871A\u8730\u8711\u88F7\u88E7\u88F1\u88F2\u88FA\u88FE\u88EE\u88FC\u88F6\u88FB\u88F0\u88EC\u88EB\u899D\u89A1\u899F\u899E\u89E9\u89EB\u89E8\u8AAB\u8A99\u8A8B\u8A92\u8A8F\u8A96\u8C3D\u8C68\u8C69\u8CD5\u8CCF\u8CD7\u8D96\u8E09\u8E02\u8DFF\u8E0D\u8DFD\u8E0A\u8E03\u8E07\u8E06\u8E05\u8DFE\u8E00\u8E04\u8F10\u8F11\u8F0E\u8F0D\u9123\u911C\u9120\u9122\u911F\u911D\u911A\u9124\u9121\u911B\u917A\u9172\u9179\u9173\u92A5\u92A4\u9276\u929B\u927A\u92A0\u9294\u92AA\u928D\u92A6\u929A\u92AB\u9279\u9297\u927F\u92A3\u92EE\u928E\u9282\u9295\u92A2\u927D\u9288\u92A1\u928A\u9286\u928C\u9299\u92A7\u927E\u9287\u92A9\u929D\u928B\u922D\u969E\u96A1\u96FF\u9758\u977D\u977A\u977E\u9783\u9780\u9782\u977B\u9784\u9781\u977F\u97CE\u97CD\u9816\u98AD\u98AE\u9902\u9900\u9907\u999D\u999C\u99C3\u99B9\u99BB\u99BA\u99C2\u99BD\u99C7\u9AB1\u9AE3\u9AE7\u9B3E\u9B3F\u9B60\u9B61\u9B5F\u9CF1\u9CF2\u9CF5\u9EA7\u50FF\u5103\u5130\u50F8\u5106\u5107\u50F6\u50FE\u510B\u510C\u50FD\u510A\u528B\u528C\u52F1\u52EF\u5648\u5642\u564C\u5635\u5641\u564A\u5649\u5646\u5658\u565A\u5640\u5633\u563D\u562C\u563E\u5638\u562A\u563A\u571A\u58AB\u589D\u58B1\u58A0\u58A3\u58AF\u58AC\u58A5\u58A1\u58FF\u5AFF\u5AF4\u5AFD\u5AF7\u5AF6\u5B03\u5AF8\u5B02\u5AF9\u5B01\u5B07\u5B05\u5B0F\u5C67\u5D99\u5D97\u5D9F\u5D92\u5DA2\u5D93\u5D95\u5DA0\u5D9C\u5DA1\u5D9A\u5D9E\u5E69\u5E5D\u5E60\u5E5C\u7DF3\u5EDB\u5EDE\u5EE1\u5F49\u5FB2\u618B\u6183\u6179\u61B1\u61B0\u61A2\u6189\u619B\u6193\u61AF\u61AD\u619F\u6192\u61AA\u61A1\u618D\u6166\u61B3\u622D\u646E\u6470\u6496\u64A0\u6485\u6497\u649C\u648F\u648B\u648A\u648C\u64A3\u649F\u6468\u64B1\u6498\u6576\u657A\u6579\u657B\u65B2\u65B3\u66B5\u66B0\u66A9\u66B2\u66B7\u66AA\u66AF\u6A00\u6A06\u6A17\u69E5\u69F8\u6A15\u69F1\u69E4\u6A20\u69FF\u69EC\u69E2\u6A1B\u6A1D\u69FE\u6A27\u69F2\u69EE\u6A14\u69F7\u69E7\u6A40\u6A08\u69E6\u69FB\u6A0D\u69FC\u69EB\u6A09\u6A04\u6A18\u6A25\u6A0F\u69F6\u6A26\u6A07\u69F4\u6A16\u6B51\u6BA5\u6BA3\u6BA2\u6BA6\u6C01\u6C00\u6BFF\u6C02\u6F41\u6F26\u6F7E\u6F87\u6FC6\u6F92\u6F8D\u6F89\u6F8C\u6F62\u6F4F\u6F85\u6F5A\u6F96\u6F76\u6F6C\u6F82\u6F55\u6F72\u6F52\u6F50\u6F57\u6F94\u6F93\u6F5D\u6F00\u6F61\u6F6B\u6F7D\u6F67\u6F90\u6F53\u6F8B\u6F69\u6F7F\u6F95\u6F63\u6F77\u6F6A\u6F7B\u71B2\u71AF\u719B\u71B0\u71A0\u719A\u71A9\u71B5\u719D\u71A5\u719E\u71A4\u71A1\u71AA\u719C\u71A7\u71B3\u7298\u729A\u7358\u7352\u735E\u735F\u7360\u735D\u735B\u7361\u735A\u7359\u7362\u7487\u7489\u748A\u7486\u7481\u747D\u7485\u7488\u747C\u7479\u7508\u7507\u757E\u7625\u761E\u7619\u761D\u761C\u7623\u761A\u7628\u761B\u769C\u769D\u769E\u769B\u778D\u778F\u7789\u7788\u78CD\u78BB\u78CF\u78CC\u78D1\u78CE\u78D4\u78C8\u78C3\u78C4\u78C9\u799A\u79A1\u79A0\u799C\u79A2\u799B\u6B76\u7A39\u7AB2\u7AB4\u7AB3\u7BB7\u7BCB\u7BBE\u7BAC\u7BCE\u7BAF\u7BB9\u7BCA\u7BB5\u7CC5\u7CC8\u7CCC\u7CCB\u7DF7\u7DDB\u7DEA\u7DE7\u7DD7\u7DE1\u7E03\u7DFA\u7DE6\u7DF6\u7DF1\u7DF0\u7DEE\u7DDF\u7F76\u7FAC\u7FB0\u7FAD\u7FED\u7FEB\u7FEA\u7FEC\u7FE6\u7FE8\u8064\u8067\u81A3\u819F\u819E\u8195\u81A2\u8199\u8197\u8216\u824F\u8253\u8252\u8250\u824E\u8251\u8524\u853B\u850F\u8500\u8529\u850E\u8509\u850D\u851F\u850A\u8527\u851C\u84FB\u852B\u84FA\u8508\u850C\u84F4\u852A\u84F2\u8515\u84F7\u84EB\u84F3\u84FC\u8512\u84EA\u84E9\u8516\u84FE\u8528\u851D\u852E\u8502\u84FD\u851E\u84F6\u8531\u8526\u84E7\u84E8\u84F0\u84EF\u84F9\u8518\u8520\u8530\u850B\u8519\u852F\u8662\u8756\u8763\u8764\u8777\u87E1\u8773\u8758\u8754\u875B\u8752\u8761\u875A\u8751\u875E\u876D\u876A\u8750\u874E\u875F\u875D\u876F\u876C\u877A\u876E\u875C\u8765\u874F\u877B\u8775\u8762\u8767\u8769\u885A\u8905\u890C\u8914\u890B\u8917\u8918\u8919\u8906\u8916\u8911\u890E\u8909\u89A2\u89A4\u89A3\u89ED\u89F0\u89EC\u8ACF\u8AC6\u8AB8\u8AD3\u8AD1\u8AD4\u8AD5\u8ABB\u8AD7\u8ABE\u8AC0\u8AC5\u8AD8\u8AC3\u8ABA\u8ABD\u8AD9\u8C3E\u8C4D\u8C8F\u8CE5\u8CDF\u8CD9\u8CE8\u8CDA\u8CDD\u8CE7\u8DA0\u8D9C\u8DA1\u8D9B\u8E20\u8E23\u8E25\u8E24\u8E2E\u8E15\u8E1B\u8E16\u8E11\u8E19\u8E26\u8E27\u8E14\u8E12\u8E18\u8E13\u8E1C\u8E17\u8E1A\u8F2C\u8F24\u8F18\u8F1A\u8F20\u8F23\u8F16\u8F17\u9073\u9070\u906F\u9067\u906B\u912F\u912B\u9129\u912A\u9132\u9126\u912E\u9185\u9186\u918A\u9181\u9182\u9184\u9180\u92D0\u92C3\u92C4\u92C0\u92D9\u92B6\u92CF\u92F1\u92DF\u92D8\u92E9\u92D7\u92DD\u92CC\u92EF\u92C2\u92E8\u92CA\u92C8\u92CE\u92E6\u92CD\u92D5\u92C9\u92E0\u92DE\u92E7\u92D1\u92D3\u92B5\u92E1\u92C6\u92B4\u957C\u95AC\u95AB\u95AE\u95B0\u96A4\u96A2\u96D3\u9705\u9708\u9702\u975A\u978A\u978E\u9788\u97D0\u97CF\u981E\u981D\u9826\u9829\u9828\u9820\u981B\u9827\u98B2\u9908\u98FA\u9911\u9914\u9916\u9917\u9915\u99DC\u99CD\u99CF\u99D3\u99D4\u99CE\u99C9\u99D6\u99D8\u99CB\u99D7\u99CC\u9AB3\u9AEC\u9AEB\u9AF3\u9AF2\u9AF1\u9B46\u9B43\u9B67\u9B74\u9B71\u9B66\u9B76\u9B75\u9B70\u9B68\u9B64\u9B6C\u9CFC\u9CFA\u9CFD\u9CFF\u9CF7\u9D07\u9D00\u9CF9\u9CFB\u9D08\u9D05\u9D04\u9E83\u9ED3\u9F0F\u9F10\u511C\u5113\u5117\u511A\u5111\u51DE\u5334\u53E1\u5670\u5660\u566E\u5673\u5666\u5663\u566D\u5672\u565E\u5677\u571C\u571B\u58C8\u58BD\u58C9\u58BF\u58BA\u58C2\u58BC\u58C6\u5B17\u5B19\u5B1B\u5B21\u5B14\u5B13\u5B10\u5B16\u5B28\u5B1A\u5B20\u5B1E\u5BEF\u5DAC\u5DB1\u5DA9\u5DA7\u5DB5\u5DB0\u5DAE\u5DAA\u5DA8\u5DB2\u5DAD\u5DAF\u5DB4\u5E67\u5E68\u5E66\u5E6F\u5EE9\u5EE7\u5EE6\u5EE8\u5EE5\u5F4B\u5FBC\u619D\u61A8\u6196\u61C5\u61B4\u61C6\u61C1\u61CC\u61BA\u61BF\u61B8\u618C\u64D7\u64D6\u64D0\u64CF\u64C9\u64BD\u6489\u64C3\u64DB\u64F3\u64D9\u6533\u657F\u657C\u65A2\u66C8\u66BE\u66C0\u66CA\u66CB\u66CF\u66BD\u66BB\u66BA\u66CC\u6723\u6A34\u6A66\u6A49\u6A67\u6A32\u6A68\u6A3E\u6A5D\u6A6D\u6A76\u6A5B\u6A51\u6A28\u6A5A\u6A3B\u6A3F\u6A41\u6A6A\u6A64\u6A50\u6A4F\u6A54\u6A6F\u6A69\u6A60\u6A3C\u6A5E\u6A56\u6A55\u6A4D\u6A4E\u6A46\u6B55\u6B54\u6B56\u6BA7\u6BAA\u6BAB\u6BC8\u6BC7\u6C04\u6C03\u6C06\u6FAD\u6FCB\u6FA3\u6FC7\u6FBC\u6FCE\u6FC8\u6F5E\u6FC4\u6FBD\u6F9E\u6FCA\u6FA8\u7004\u6FA5\u6FAE\u6FBA\u6FAC\u6FAA\u6FCF\u6FBF\u6FB8\u6FA2\u6FC9\u6FAB\u6FCD\u6FAF\u6FB2\u6FB0\u71C5\u71C2\u71BF\u71B8\u71D6\u71C0\u71C1\u71CB\u71D4\u71CA\u71C7\u71CF\u71BD\u71D8\u71BC\u71C6\u71DA\u71DB\u729D\u729E\u7369\u7366\u7367\u736C\u7365\u736B\u736A\u747F\u749A\u74A0\u7494\u7492\u7495\u74A1\u750B\u7580\u762F\u762D\u7631\u763D\u7633\u763C\u7635\u7632\u7630\u76BB\u76E6\u779A\u779D\u77A1\u779C\u779B\u77A2\u77A3\u7795\u7799\u7797\u78DD\u78E9\u78E5\u78EA\u78DE\u78E3\u78DB\u78E1\u78E2\u78ED\u78DF\u78E0\u79A4\u7A44\u7A48\u7A47\u7AB6\u7AB8\u7AB5\u7AB1\u7AB7\u7BDE\u7BE3\u7BE7\u7BDD\u7BD5\u7BE5\u7BDA\u7BE8\u7BF9\u7BD4\u7BEA\u7BE2\u7BDC\u7BEB\u7BD8\u7BDF\u7CD2\u7CD4\u7CD7\u7CD0\u7CD1\u7E12\u7E21\u7E17\u7E0C\u7E1F\u7E20\u7E13\u7E0E\u7E1C\u7E15\u7E1A\u7E22\u7E0B\u7E0F\u7E16\u7E0D\u7E14\u7E25\u7E24\u7F43\u7F7B\u7F7C\u7F7A\u7FB1\u7FEF\u802A\u8029\u806C\u81B1\u81A6\u81AE\u81B9\u81B5\u81AB\u81B0\u81AC\u81B4\u81B2\u81B7\u81A7\u81F2\u8255\u8256\u8257\u8556\u8545\u856B\u854D\u8553\u8561\u8558\u8540\u8546\u8564\u8541\u8562\u8544\u8551\u8547\u8563\u853E\u855B\u8571\u854E\u856E\u8575\u8555\u8567\u8560\u858C\u8566\u855D\u8554\u8565\u856C\u8663\u8665\u8664\u879B\u878F\u8797\u8793\u8792\u8788\u8781\u8796\u8798\u8779\u8787\u87A3\u8785\u8790\u8791\u879D\u8784\u8794\u879C\u879A\u8789\u891E\u8926\u8930\u892D\u892E\u8927\u8931\u8922\u8929\u8923\u892F\u892C\u891F\u89F1\u8AE0\u8AE2\u8AF2\u8AF4\u8AF5\u8ADD\u8B14\u8AE4\u8ADF\u8AF0\u8AC8\u8ADE\u8AE1\u8AE8\u8AFF\u8AEF\u8AFB\u8C91\u8C92\u8C90\u8CF5\u8CEE\u8CF1\u8CF0\u8CF3\u8D6C\u8D6E\u8DA5\u8DA7\u8E33\u8E3E\u8E38\u8E40\u8E45\u8E36\u8E3C\u8E3D\u8E41\u8E30\u8E3F\u8EBD\u8F36\u8F2E\u8F35\u8F32\u8F39\u8F37\u8F34\u9076\u9079\u907B\u9086\u90FA\u9133\u9135\u9136\u9193\u9190\u9191\u918D\u918F\u9327\u931E\u9308\u931F\u9306\u930F\u937A\u9338\u933C\u931B\u9323\u9312\u9301\u9346\u932D\u930E\u930D\u92CB\u931D\u92FA\u9325\u9313\u92F9\u92F7\u9334\u9302\u9324\u92FF\u9329\u9339\u9335\u932A\u9314\u930C\u930B\u92FE\u9309\u9300\u92FB\u9316\u95BC\u95CD\u95BE\u95B9\u95BA\u95B6\u95BF\u95B5\u95BD\u96A9\u96D4\u970B\u9712\u9710\u9799\u9797\u9794\u97F0\u97F8\u9835\u982F\u9832\u9924\u991F\u9927\u9929\u999E\u99EE\u99EC\u99E5\u99E4\u99F0\u99E3\u99EA\u99E9\u99E7\u9AB9\u9ABF\u9AB4\u9ABB\u9AF6\u9AFA\u9AF9\u9AF7\u9B33\u9B80\u9B85\u9B87\u9B7C\u9B7E\u9B7B\u9B82\u9B93\u9B92\u9B90\u9B7A\u9B95\u9B7D\u9B88\u9D25\u9D17\u9D20\u9D1E\u9D14\u9D29\u9D1D\u9D18\u9D22\u9D10\u9D19\u9D1F\u9E88\u9E86\u9E87\u9EAE\u9EAD\u9ED5\u9ED6\u9EFA\u9F12\u9F3D\u5126\u5125\u5122\u5124\u5120\u5129\u52F4\u5693\u568C\u568D\u5686\u5684\u5683\u567E\u5682\u567F\u5681\u58D6\u58D4\u58CF\u58D2\u5B2D\u5B25\u5B32\u5B23\u5B2C\u5B27\u5B26\u5B2F\u5B2E\u5B7B\u5BF1\u5BF2\u5DB7\u5E6C\u5E6A\u5FBE\u5FBB\u61C3\u61B5\u61BC\u61E7\u61E0\u61E5\u61E4\u61E8\u61DE\u64EF\u64E9\u64E3\u64EB\u64E4\u64E8\u6581\u6580\u65B6\u65DA\u66D2\u6A8D\u6A96\u6A81\u6AA5\u6A89\u6A9F\u6A9B\u6AA1\u6A9E\u6A87\u6A93\u6A8E\u6A95\u6A83\u6AA8\u6AA4\u6A91\u6A7F\u6AA6\u6A9A\u6A85\u6A8C\u6A92\u6B5B\u6BAD\u6C09\u6FCC\u6FA9\u6FF4\u6FD4\u6FE3\u6FDC\u6FED\u6FE7\u6FE6\u6FDE\u6FF2\u6FDD\u6FE2\u6FE8\u71E1\u71F1\u71E8\u71F2\u71E4\u71F0\u71E2\u7373\u736E\u736F\u7497\u74B2\u74AB\u7490\u74AA\u74AD\u74B1\u74A5\u74AF\u7510\u7511\u7512\u750F\u7584\u7643\u7648\u7649\u7647\u76A4\u76E9\u77B5\u77AB\u77B2\u77B7\u77B6\u77B4\u77B1\u77A8\u77F0\u78F3\u78FD\u7902\u78FB\u78FC\u78F2\u7905\u78F9\u78FE\u7904\u79AB\u79A8\u7A5C\u7A5B\u7A56\u7A58\u7A54\u7A5A\u7ABE\u7AC0\u7AC1\u7C05\u7C0F\u7BF2\u7C00\u7BFF\u7BFB\u7C0E\u7BF4\u7C0B\u7BF3\u7C02\u7C09\u7C03\u7C01\u7BF8\u7BFD\u7C06\u7BF0\u7BF1\u7C10\u7C0A\u7CE8\u7E2D\u7E3C\u7E42\u7E33\u9848\u7E38\u7E2A\u7E49\u7E40\u7E47\u7E29\u7E4C\u7E30\u7E3B\u7E36\u7E44\u7E3A\u7F45\u7F7F\u7F7E\u7F7D\u7FF4\u7FF2\u802C\u81BB\u81C4\u81CC\u81CA\u81C5\u81C7\u81BC\u81E9\u825B\u825A\u825C\u8583\u8580\u858F\u85A7\u8595\u85A0\u858B\u85A3\u857B\u85A4\u859A\u859E\u8577\u857C\u8589\u85A1\u857A\u8578\u8557\u858E\u8596\u8586\u858D\u8599\u859D\u8581\u85A2\u8582\u8588\u8585\u8579\u8576\u8598\u8590\u859F\u8668\u87BE\u87AA\u87AD\u87C5\u87B0\u87AC\u87B9\u87B5\u87BC\u87AE\u87C9\u87C3\u87C2\u87CC\u87B7\u87AF\u87C4\u87CA\u87B4\u87B6\u87BF\u87B8\u87BD\u87DE\u87B2\u8935\u8933\u893C\u893E\u8941\u8952\u8937\u8942\u89AD\u89AF\u89AE\u89F2\u89F3\u8B1E\u8B18\u8B16\u8B11\u8B05\u8B0B\u8B22\u8B0F\u8B12\u8B15\u8B07\u8B0D\u8B08\u8B06\u8B1C\u8B13\u8B1A\u8C4F\u8C70\u8C72\u8C71\u8C6F\u8C95\u8C94\u8CF9\u8D6F\u8E4E\u8E4D\u8E53\u8E50\u8E4C\u8E47\u8F43\u8F40\u9085\u907E\u9138\u919A\u91A2\u919B\u9199\u919F\u91A1\u919D\u91A0\u93A1\u9383\u93AF\u9364\u9356\u9347\u937C\u9358\u935C\u9376\u9349\u9350\u9351\u9360\u936D\u938F\u934C\u936A\u9379\u9357\u9355\u9352\u934F\u9371\u9377\u937B\u9361\u935E\u9363\u9367\u9380\u934E\u9359\u95C7\u95C0\u95C9\u95C3\u95C5\u95B7\u96AE\u96B0\u96AC\u9720\u971F\u9718\u971D\u9719\u979A\u97A1\u979C\u979E\u979D\u97D5\u97D4\u97F1\u9841\u9844\u984A\u9849\u9845\u9843\u9925\u992B\u992C\u992A\u9933\u9932\u992F\u992D\u9931\u9930\u9998\u99A3\u99A1\u9A02\u99FA\u99F4\u99F7\u99F9\u99F8\u99F6\u99FB\u99FD\u99FE\u99FC\u9A03\u9ABE\u9AFE\u9AFD\u9B01\u9AFC\u9B48\u9B9A\u9BA8\u9B9E\u9B9B\u9BA6\u9BA1\u9BA5\u9BA4\u9B86\u9BA2\u9BA0\u9BAF\u9D33\u9D41\u9D67\u9D36\u9D2E\u9D2F\u9D31\u9D38\u9D30\u9D45\u9D42\u9D43\u9D3E\u9D37\u9D40\u9D3D\u7FF5\u9D2D\u9E8A\u9E89\u9E8D\u9EB0\u9EC8\u9EDA\u9EFB\u9EFF\u9F24\u9F23\u9F22\u9F54\u9FA0\u5131\u512D\u512E\u5698\u569C\u5697\u569A\u569D\u5699\u5970\u5B3C\u5C69\u5C6A\u5DC0\u5E6D\u5E6E\u61D8\u61DF\u61ED\u61EE\u61F1\u61EA\u61F0\u61EB\u61D6\u61E9\u64FF\u6504\u64FD\u64F8\u6501\u6503\u64FC\u6594\u65DB\u66DA\u66DB\u66D8\u6AC5\u6AB9\u6ABD\u6AE1\u6AC6\u6ABA\u6AB6\u6AB7\u6AC7\u6AB4\u6AAD\u6B5E\u6BC9\u6C0B\u7007\u700C\u700D\u7001\u7005\u7014\u700E\u6FFF\u7000\u6FFB\u7026\u6FFC\u6FF7\u700A\u7201\u71FF\u71F9\u7203\u71FD\u7376\u74B8\u74C0\u74B5\u74C1\u74BE\u74B6\u74BB\u74C2\u7514\u7513\u765C\u7664\u7659\u7650\u7653\u7657\u765A\u76A6\u76BD\u76EC\u77C2\u77BA\u78FF\u790C\u7913\u7914\u7909\u7910\u7912\u7911\u79AD\u79AC\u7A5F\u7C1C\u7C29\u7C19\u7C20\u7C1F\u7C2D\u7C1D\u7C26\u7C28\u7C22\u7C25\u7C30\u7E5C\u7E50\u7E56\u7E63\u7E58\u7E62\u7E5F\u7E51\u7E60\u7E57\u7E53\u7FB5\u7FB3\u7FF7\u7FF8\u8075\u81D1\u81D2\u81D0\u825F\u825E\u85B4\u85C6\u85C0\u85C3\u85C2\u85B3\u85B5\u85BD\u85C7\u85C4\u85BF\u85CB\u85CE\u85C8\u85C5\u85B1\u85B6\u85D2\u8624\u85B8\u85B7\u85BE\u8669\u87E7\u87E6\u87E2\u87DB\u87EB\u87EA\u87E5\u87DF\u87F3\u87E4\u87D4\u87DC\u87D3\u87ED\u87D8\u87E3\u87A4\u87D7\u87D9\u8801\u87F4\u87E8\u87DD\u8953\u894B\u894F\u894C\u8946\u8950\u8951\u8949\u8B2A\u8B27\u8B23\u8B33\u8B30\u8B35\u8B47\u8B2F\u8B3C\u8B3E\u8B31\u8B25\u8B37\u8B26\u8B36\u8B2E\u8B24\u8B3B\u8B3D\u8B3A\u8C42\u8C75\u8C99\u8C98\u8C97\u8CFE\u8D04\u8D02\u8D00\u8E5C\u8E62\u8E60\u8E57\u8E56\u8E5E\u8E65\u8E67\u8E5B\u8E5A\u8E61\u8E5D\u8E69\u8E54\u8F46\u8F47\u8F48\u8F4B\u9128\u913A\u913B\u913E\u91A8\u91A5\u91A7\u91AF\u91AA\u93B5\u938C\u9392\u93B7\u939B\u939D\u9389\u93A7\u938E\u93AA\u939E\u93A6\u9395\u9388\u9399\u939F\u938D\u93B1\u9391\u93B2\u93A4\u93A8\u93B4\u93A3\u93A5\u95D2\u95D3\u95D1\u96B3\u96D7\u96DA\u5DC2\u96DF\u96D8\u96DD\u9723\u9722\u9725\u97AC\u97AE\u97A8\u97AB\u97A4\u97AA\u97A2\u97A5\u97D7\u97D9\u97D6\u97D8\u97FA\u9850\u9851\u9852\u98B8\u9941\u993C\u993A\u9A0F\u9A0B\u9A09\u9A0D\u9A04\u9A11\u9A0A\u9A05\u9A07\u9A06\u9AC0\u9ADC\u9B08\u9B04\u9B05\u9B29\u9B35\u9B4A\u9B4C\u9B4B\u9BC7\u9BC6\u9BC3\u9BBF\u9BC1\u9BB5\u9BB8\u9BD3\u9BB6\u9BC4\u9BB9\u9BBD\u9D5C\u9D53\u9D4F\u9D4A\u9D5B\u9D4B\u9D59\u9D56\u9D4C\u9D57\u9D52\u9D54\u9D5F\u9D58\u9D5A\u9E8E\u9E8C\u9EDF\u9F01\u9F00\u9F16\u9F25\u9F2B\u9F2A\u9F29\u9F28\u9F4C\u9F55\u5134\u5135\u5296\u52F7\u53B4\u56AB\u56AD\u56A6\u56A7\u56AA\u56AC\u58DA\u58DD\u58DB\u5912\u5B3D\u5B3E\u5B3F\u5DC3\u5E70\u5FBF\u61FB\u6507\u6510\u650D\u6509\u650C\u650E\u6584\u65DE\u65DD\u66DE\u6AE7\u6AE0\u6ACC\u6AD1\u6AD9\u6ACB\u6ADF\u6ADC\u6AD0\u6AEB\u6ACF\u6ACD\u6ADE\u6B60\u6BB0\u6C0C\u7019\u7027\u7020\u7016\u702B\u7021\u7022\u7023\u7029\u7017\u7024\u701C\u702A\u720C\u720A\u7207\u7202\u7205\u72A5\u72A6\u72A4\u72A3\u72A1\u74CB\u74C5\u74B7\u74C3\u7516\u7660\u77C9\u77CA\u77C4\u77F1\u791D\u791B\u7921\u791C\u7917\u791E\u79B0\u7A67\u7A68\u7C33\u7C3C\u7C39\u7C2C\u7C3B\u7CEC\u7CEA\u7E76\u7E75\u7E78\u7E70\u7E77\u7E6F\u7E7A\u7E72\u7E74\u7E68\u7F4B\u7F4A\u7F83\u7F86\u7FB7\u7FFD\u7FFE\u8078\u81D7\u81D5\u8264\u8261\u8263\u85EB\u85F1\u85ED\u85D9\u85E1\u85E8\u85DA\u85D7\u85EC\u85F2\u85F8\u85D8\u85DF\u85E3\u85DC\u85D1\u85F0\u85E6\u85EF\u85DE\u85E2\u8800\u87FA\u8803\u87F6\u87F7\u8809\u880C\u880B\u8806\u87FC\u8808\u87FF\u880A\u8802\u8962\u895A\u895B\u8957\u8961\u895C\u8958\u895D\u8959\u8988\u89B7\u89B6\u89F6\u8B50\u8B48\u8B4A\u8B40\u8B53\u8B56\u8B54\u8B4B\u8B55\u8B51\u8B42\u8B52\u8B57\u8C43\u8C77\u8C76\u8C9A\u8D06\u8D07\u8D09\u8DAC\u8DAA\u8DAD\u8DAB\u8E6D\u8E78\u8E73\u8E6A\u8E6F\u8E7B\u8EC2\u8F52\u8F51\u8F4F\u8F50\u8F53\u8FB4\u9140\u913F\u91B0\u91AD\u93DE\u93C7\u93CF\u93C2\u93DA\u93D0\u93F9\u93EC\u93CC\u93D9\u93A9\u93E6\u93CA\u93D4\u93EE\u93E3\u93D5\u93C4\u93CE\u93C0\u93D2\u93E7\u957D\u95DA\u95DB\u96E1\u9729\u972B\u972C\u9728\u9726\u97B3\u97B7\u97B6\u97DD\u97DE\u97DF\u985C\u9859\u985D\u9857\u98BF\u98BD\u98BB\u98BE\u9948\u9947\u9943\u99A6\u99A7\u9A1A\u9A15\u9A25\u9A1D\u9A24\u9A1B\u9A22\u9A20\u9A27\u9A23\u9A1E\u9A1C\u9A14\u9AC2\u9B0B\u9B0A\u9B0E\u9B0C\u9B37\u9BEA\u9BEB\u9BE0\u9BDE\u9BE4\u9BE6\u9BE2\u9BF0\u9BD4\u9BD7\u9BEC\u9BDC\u9BD9\u9BE5\u9BD5\u9BE1\u9BDA\u9D77\u9D81\u9D8A\u9D84\u9D88\u9D71\u9D80\u9D78\u9D86\u9D8B\u9D8C\u9D7D\u9D6B\u9D74\u9D75\u9D70\u9D69\u9D85\u9D73\u9D7B\u9D82\u9D6F\u9D79\u9D7F\u9D87\u9D68\u9E94\u9E91\u9EC0\u9EFC\u9F2D\u9F40\u9F41\u9F4D\u9F56\u9F57\u9F58\u5337\u56B2\u56B5\u56B3\u58E3\u5B45\u5DC6\u5DC7\u5EEE\u5EEF\u5FC0\u5FC1\u61F9\u6517\u6516\u6515\u6513\u65DF\u66E8\u66E3\u66E4\u6AF3\u6AF0\u6AEA\u6AE8\u6AF9\u6AF1\u6AEE\u6AEF\u703C\u7035\u702F\u7037\u7034\u7031\u7042\u7038\u703F\u703A\u7039\u7040\u703B\u7033\u7041\u7213\u7214\u72A8\u737D\u737C\u74BA\u76AB\u76AA\u76BE\u76ED\u77CC\u77CE\u77CF\u77CD\u77F2\u7925\u7923\u7927\u7928\u7924\u7929\u79B2\u7A6E\u7A6C\u7A6D\u7AF7\u7C49\u7C48\u7C4A\u7C47\u7C45\u7CEE\u7E7B\u7E7E\u7E81\u7E80\u7FBA\u7FFF\u8079\u81DB\u81D9\u820B\u8268\u8269\u8622\u85FF\u8601\u85FE\u861B\u8600\u85F6\u8604\u8609\u8605\u860C\u85FD\u8819\u8810\u8811\u8817\u8813\u8816\u8963\u8966\u89B9\u89F7\u8B60\u8B6A\u8B5D\u8B68\u8B63\u8B65\u8B67\u8B6D\u8DAE\u8E86\u8E88\u8E84\u8F59\u8F56\u8F57\u8F55\u8F58\u8F5A\u908D\u9143\u9141\u91B7\u91B5\u91B2\u91B3\u940B\u9413\u93FB\u9420\u940F\u9414\u93FE\u9415\u9410\u9428\u9419\u940D\u93F5\u9400\u93F7\u9407\u940E\u9416\u9412\u93FA\u9409\u93F8\u940A\u93FF\u93FC\u940C\u93F6\u9411\u9406\u95DE\u95E0\u95DF\u972E\u972F\u97B9\u97BB\u97FD\u97FE\u9860\u9862\u9863\u985F\u98C1\u98C2\u9950\u994E\u9959\u994C\u994B\u9953\u9A32\u9A34\u9A31\u9A2C\u9A2A\u9A36\u9A29\u9A2E\u9A38\u9A2D\u9AC7\u9ACA\u9AC6\u9B10\u9B12\u9B11\u9C0B\u9C08\u9BF7\u9C05\u9C12\u9BF8\u9C40\u9C07\u9C0E\u9C06\u9C17\u9C14\u9C09\u9D9F\u9D99\u9DA4\u9D9D\u9D92\u9D98\u9D90\u9D9B\u9DA0\u9D94\u9D9C\u9DAA\u9D97\u9DA1\u9D9A\u9DA2\u9DA8\u9D9E\u9DA3\u9DBF\u9DA9\u9D96\u9DA6\u9DA7\u9E99\u9E9B\u9E9A\u9EE5\u9EE4\u9EE7\u9EE6\u9F30\u9F2E\u9F5B\u9F60\u9F5E\u9F5D\u9F59\u9F91\u513A\u5139\u5298\u5297\u56C3\u56BD\u56BE\u5B48\u5B47\u5DCB\u5DCF\u5EF1\u61FD\u651B\u6B02\u6AFC\u6B03\u6AF8\u6B00\u7043\u7044\u704A\u7048\u7049\u7045\u7046\u721D\u721A\u7219\u737E\u7517\u766A\u77D0\u792D\u7931\u792F\u7C54\u7C53\u7CF2\u7E8A\u7E87\u7E88\u7E8B\u7E86\u7E8D\u7F4D\u7FBB\u8030\u81DD\u8618\u862A\u8626\u861F\u8623\u861C\u8619\u8627\u862E\u8621\u8620\u8629\u861E\u8625\u8829\u881D\u881B\u8820\u8824\u881C\u882B\u884A\u896D\u8969\u896E\u896B\u89FA\u8B79\u8B78\u8B45\u8B7A\u8B7B\u8D10\u8D14\u8DAF\u8E8E\u8E8C\u8F5E\u8F5B\u8F5D\u9146\u9144\u9145\u91B9\u943F\u943B\u9436\u9429\u943D\u943C\u9430\u9439\u942A\u9437\u942C\u9440\u9431\u95E5\u95E4\u95E3\u9735\u973A\u97BF\u97E1\u9864\u98C9\u98C6\u98C0\u9958\u9956\u9A39\u9A3D\u9A46\u9A44\u9A42\u9A41\u9A3A\u9A3F\u9ACD\u9B15\u9B17\u9B18\u9B16\u9B3A\u9B52\u9C2B\u9C1D\u9C1C\u9C2C\u9C23\u9C28\u9C29\u9C24\u9C21\u9DB7\u9DB6\u9DBC\u9DC1\u9DC7\u9DCA\u9DCF\u9DBE\u9DC5\u9DC3\u9DBB\u9DB5\u9DCE\u9DB9\u9DBA\u9DAC\u9DC8\u9DB1\u9DAD\u9DCC\u9DB3\u9DCD\u9DB2\u9E7A\u9E9C\u9EEB\u9EEE\u9EED\u9F1B\u9F18\u9F1A\u9F31\u9F4E\u9F65\u9F64\u9F92\u4EB9\u56C6\u56C5\u56CB\u5971\u5B4B\u5B4C\u5DD5\u5DD1\u5EF2\u6521\u6520\u6526\u6522\u6B0B\u6B08\u6B09\u6C0D\u7055\u7056\u7057\u7052\u721E\u721F\u72A9\u737F\u74D8\u74D5\u74D9\u74D7\u766D\u76AD\u7935\u79B4\u7A70\u7A71\u7C57\u7C5C\u7C59\u7C5B\u7C5A\u7CF4\u7CF1\u7E91\u7F4F\u7F87\u81DE\u826B\u8634\u8635\u8633\u862C\u8632\u8636\u882C\u8828\u8826\u882A\u8825\u8971\u89BF\u89BE\u89FB\u8B7E\u8B84\u8B82\u8B86\u8B85\u8B7F\u8D15\u8E95\u8E94\u8E9A\u8E92\u8E90\u8E96\u8E97\u8F60\u8F62\u9147\u944C\u9450\u944A\u944B\u944F\u9447\u9445\u9448\u9449\u9446\u973F\u97E3\u986A\u9869\u98CB\u9954\u995B\u9A4E\u9A53\u9A54\u9A4C\u9A4F\u9A48\u9A4A\u9A49\u9A52\u9A50\u9AD0\u9B19\u9B2B\u9B3B\u9B56\u9B55\u9C46\u9C48\u9C3F\u9C44\u9C39\u9C33\u9C41\u9C3C\u9C37\u9C34\u9C32\u9C3D\u9C36\u9DDB\u9DD2\u9DDE\u9DDA\u9DCB\u9DD0\u9DDC\u9DD1\u9DDF\u9DE9\u9DD9\u9DD8\u9DD6\u9DF5\u9DD5\u9DDD\u9EB6\u9EF0\u9F35\u9F33\u9F32\u9F42\u9F6B\u9F95\u9FA2\u513D\u5299\u58E8\u58E7\u5972\u5B4D\u5DD8\u882F\u5F4F\u6201\u6203\u6204\u6529\u6525\u6596\u66EB\u6B11\u6B12\u6B0F\u6BCA\u705B\u705A\u7222\u7382\u7381\u7383\u7670\u77D4\u7C67\u7C66\u7E95\u826C\u863A\u8640\u8639\u863C\u8631\u863B\u863E\u8830\u8832\u882E\u8833\u8976\u8974\u8973\u89FE\u8B8C\u8B8E\u8B8B\u8B88\u8C45\u8D19\u8E98\u8F64\u8F63\u91BC\u9462\u9455\u945D\u9457\u945E\u97C4\u97C5\u9800\u9A56\u9A59\u9B1E\u9B1F\u9B20\u9C52\u9C58\u9C50\u9C4A\u9C4D\u9C4B\u9C55\u9C59\u9C4C\u9C4E\u9DFB\u9DF7\u9DEF\u9DE3\u9DEB\u9DF8\u9DE4\u9DF6\u9DE1\u9DEE\u9DE6\u9DF2\u9DF0\u9DE2\u9DEC\u9DF4\u9DF3\u9DE8\u9DED\u9EC2\u9ED0\u9EF2\u9EF3\u9F06\u9F1C\u9F38\u9F37\u9F36\u9F43\u9F4F\u9F71\u9F70\u9F6E\u9F6F\u56D3\u56CD\u5B4E\u5C6D\u652D\u66ED\u66EE\u6B13\u705F\u7061\u705D\u7060\u7223\u74DB\u74E5\u77D5\u7938\u79B7\u79B6\u7C6A\u7E97\u7F89\u826D\u8643\u8838\u8837\u8835\u884B\u8B94\u8B95\u8E9E\u8E9F\u8EA0\u8E9D\u91BE\u91BD\u91C2\u946B\u9468\u9469\u96E5\u9746\u9743\u9747\u97C7\u97E5\u9A5E\u9AD5\u9B59\u9C63\u9C67\u9C66\u9C62\u9C5E\u9C60\u9E02\u9DFE\u9E07\u9E03\u9E06\u9E05\u9E00\u9E01\u9E09\u9DFF\u9DFD\u9E04\u9EA0\u9F1E\u9F46\u9F74\u9F75\u9F76\u56D4\u652E\u65B8\u6B18\u6B19\u6B17\u6B1A\u7062\u7226\u72AA\u77D8\u77D9\u7939\u7C69\u7C6B\u7CF6\u7E9A\u7E98\u7E9B\u7E99\u81E0\u81E1\u8646\u8647\u8648\u8979\u897A\u897C\u897B\u89FF\u8B98\u8B99\u8EA5\u8EA4\u8EA3\u946E\u946D\u946F\u9471\u9473\u9749\u9872\u995F\u9C68\u9C6E\u9C6D\u9E0B\u9E0D\u9E10\u9E0F\u9E12\u9E11\u9EA1\u9EF5\u9F09\u9F47\u9F78\u9F7B\u9F7A\u9F79\u571E\u7066\u7C6F\u883C\u8DB2\u8EA6\u91C3\u9474\u9478\u9476\u9475\u9A60\u9C74\u9C73\u9C71\u9C75\u9E14\u9E13\u9EF6\u9F0A\u9FA4\u7068\u7065\u7CF7\u866A\u883E\u883D\u883F\u8B9E\u8C9C\u8EA9\u8EC9\u974B\u9873\u9874\u98CC\u9961\u99AB\u9A64\u9A66\u9A67\u9B24\u9E15\u9E17\u9F48\u6207\u6B1E\u7227\u864C\u8EA8\u9482\u9480\u9481\u9A69\u9A68\u9B2E\u9E19\u7229\u864B\u8B9F\u9483\u9C79\u9EB7\u7675\u9A6B\u9C7A\u9E1D\u7069\u706A\u9EA4\u9F7E\u9F49\u9F98\u7881\u92B9\u88CF\u58BB\u6052\u7CA7\u5AFA\u2554\u2566\u2557\u2560\u256C\u2563\u255A\u2569\u255D\u2552\u2564\u2555\u255E\u256A\u2561\u2558\u2567\u255B\u2553\u2565\u2556\u255F\u256B\u2562\u2559\u2568\u255C\u2551\u2550\u256D\u256E\u2570\u256F\uFFED\u0547\u92DB\u05DF\u3FC5\u854C\u42B5\u73EF\u51B5\u3649\u4942\u89E4\u9344\u19DB\u82EE\u3CC8\u783C\u6744\u62DF\u4933\u89AA\u02A0\u6BB3\u1305\u4FAB\u24ED\u5008\u6D29\u7A84\u3600\u4AB1\u2513\u5029\u037E\u5FA4\u0380\u0347\u6EDB\u041F\u507D\u5101\u347A\u510E\u986C\u3743\u8416\u49A4\u0487\u5160\u33B4\u516A\u0BFF\u20FC\u02E5\u2530\u058E\u3233\u1983\u5B82\u877D\u05B3\u3C99\u51B2\u51B8\u9D34\u51C9\u51CF\u51D1\u3CDC\u51D3\u4AA6\u51B3\u51E2\u5342\u51ED\u83CD\u693E\u372D\u5F7B\u520B\u5226\u523C\u52B5\u5257\u5294\u52B9\u52C5\u7C15\u8542\u52E0\u860D\u6B13\u5305\u8ADE\u5549\u6ED9\u3F80\u0954\u3FEC\u5333\u5344\u0BE2\u6CCB\u1726\u681B\u73D5\u604A\u3EAA\u38CC\u16E8\u71DD\u44A2\u536D\u5374\u86AB\u537E\u537F\u1596\u1613\u77E6\u5393\u8A9B\u53A0\u53AB\u53AE\u73A7\u5772\u3F59\u739C\u53C1\u53C5\u6C49\u4E49\u57FE\u53D9\u3AAB\u0B8F\u53E0\u3FEB\u2DA3\u53F6\u0C77\u5413\u7079\u552B\u6657\u6D5B\u546D\u6B53\u0D74\u555D\u548F\u54A4\u47A6\u170D\u0EDD\u3DB4\u0D4D\u89BC\u2698\u5547\u4CED\u542F\u7417\u5586\u55A9\u5605\u18D7\u403A\u4552\u4435\u66B3\u10B4\u5637\u66CD\u328A\u66A4\u66AD\u564D\u564F\u78F1\u56F1\u9787\u53FE\u5700\u56EF\u56ED\u8B66\u3623\u124F\u5746\u41A5\u6C6E\u708B\u5742\u36B1\u6C7E\u57E6\u1416\u5803\u1454\u4363\u5826\u4BF5\u585C\u58AA\u3561\u58E0\u58DC\u123C\u58FB\u5BFF\u5743\uA150\u4278\u93D3\u35A1\u591F\u68A6\u36C3\u6E59\u163E\u5A24\u5553\u1692\u8505\u59C9\u0D4E\u6C81\u6D2A\u17DC\u59D9\u17FB\u17B2\u6DA6\u6D71\u1828\u16D5\u59F9\u6E45\u5AAB\u5A63\u36E6\u49A9\u5A77\u3708\u5A96\u7465\u5AD3\u6FA1\u2554\u3D85\u1911\u3732\u16B8\u5E83\u52D0\u5B76\u6588\u5B7C\u7A0E\u4004\u485D\u0204\u5BD5\u6160\u1A34\u59CC\u05A5\u5BF3\u5B9D\u4D10\u5C05\u1B44\u5C13\u73CE\u5C14\u1CA5\u6B28\u5C49\u48DD\u5C85\u5CE9\u5CEF\u5D8B\u1DF9\u1E37\u5D10\u5D18\u5D46\u1EA4\u5CBA\u5DD7\u82FC\u382D\u4901\u2049\u2173\u8287\u3836\u3BC2\u5E2E\u6A8A\u5E75\u5E7A\u44BC\u0CD3\u53A6\u4EB7\u5ED0\u53A8\u1771\u5E09\u5EF4\u8482\u5EF9\u5EFB\u38A0\u5EFC\u683E\u941B\u5F0D\u01C1\uF894\u3ADE\u48AE\u133A\u5F3A\u6888\u23D0\u5F58\u2471\u5F63\u97BD\u6E6E\u5F72\u9340\u8A36\u5FA7\u5DB6\u3D5F\u5250\u1F6A\u70F8\u2668\u91D6\u029E\u8A29\u6031\u6685\u1877\u3963\u3DC7\u3639\u5790\u27B4\u7971\u3E40\u609E\u60A4\u60B3\u4982\u498F\u7A53\u74A4\u50E1\u5AA0\u6164\u8424\u6142\uF8A6\u6ED2\u6181\u51F4\u0656\u6187\u5BAA\u3FB7\u285F\u61D3\u8B9D\u995D\u61D0\u3932\u2980\u28C1\u6023\u615C\u651E\u638B\u0118\u62C5\u1770\u62D5\u2E0D\u636C\u49DF\u3A17\u6438\u63F8\u138E\u17FC\u6490\u6F8A\u2E36\u9814\u408C\u571D\u64E1\u64E5\u947B\u3A66\u643A\u3A57\u654D\u6F16\u4A28\u4A23\u6585\u656D\u655F\u307E\u65B5\u4940\u4B37\u65D1\u40D8\u1829\u65E0\u65E3\u5FDF\u3400\u6618\u31F7\u31F8\u6644\u31A4\u31A5\u664B\u0E75\u6667\u51E6\u6673\u6674\u1E3D\u3231\u85F4\u31C8\u5313\u77C5\u28F7\u99A4\u6702\u439C\u4A21\u3B2B\u69FA\u37C2\u675E\u6767\u6762\u41CD\u90ED\u67D7\u44E9\u6822\u6E50\u923C\u6801\u33E6\u6DA0\u685D\u346F\u69E1\u6A0B\u8ADF\u6973\u68C3\u35CD\u6901\u6900\u3D32\u3A01\u363C\u3B80\u67AC\u6961\u8A4A\u42FC\u6936\u6998\u3BA1\u03C9\u8363\u5090\u69F9\u3659\u212A\u6A45\u3703\u6A9D\u3BF3\u67B1\u6AC8\u919C\u3C0D\u6B1D\u0923\u60DE\u6B35\u6B74\u27CD\u6EB5\u3ADB\u03B5\u1958\u3740\u5421\u3B5A\u6BE1\u3EFC\u6BDC\u6C37\u248B\u48F1\u6B51\u6C5A\u8226\u6C79\u3DBC\u44C5\u3DBD\u41A4\u490C\u4900\u3CC9\u36E5\u3CEB\u0D32\u9B83\u31F9\u2491\u7F8F\u6837\u6D25\u6DA1\u6DEB\u6D96\u6D5C\u6E7C\u6F04\u497F\u4085\u6E72\u8533\u6F74\u51C7\u6C9C\u6E1D\u842E\u8B21\u6E2F\u3E2F\u7453\u3F82\u79CC\u6E4F\u5A91\u304B\u6FF8\u370D\u6F9D\u3E30\u6EFA\u1497\u403D\u4555\u93F0\u6F44\u6F5C\u3D4E\u6F74\u9170\u3D3B\u6F9F\u4144\u6FD3\u4091\u4155\u4039\u3FF0\u3FB4\u413F\u51DF\u4156\u4157\u4140\u61DD\u704B\u707E\u70A7\u7081\u70CC\u70D5\u70D6\u70DF\u4104\u3DE8\u71B4\u7196\u4277\u712B\u7145\u5A88\u714A\u716E\u5C9C\u4365\u714F\u9362\u42C1\u712C\u445A\u4A27\u4A22\u71BA\u8BE8\u70BD\u720E\u9442\u7215\u5911\u9443\u7224\u9341\u5605\u722E\u7240\u4974\u68BD\u7255\u7257\u3E55\u3044\u680D\u6F3D\u7282\u732A\u732B\u4823\u882B\u48ED\u8804\u7328\u732E\u73CF\u73AA\u0C3A\u6A2E\u73C9\u7449\u41E2\u16E7\u4A24\u6623\u36C5\u49B7\u498D\u49FB\u73F7\u7415\u6903\u4A26\u7439\u05C3\u3ED7\u745C\u28AD\u7460\u8EB2\u7447\u73E4\u7476\u83B9\u746C\u3730\u7474\u93F1\u6A2C\u7482\u4953\u4A8C\u415F\u4A79\u8B8F\u5B46\u8C03\u189E\u74C8\u1988\u750E\u74E9\u751E\u8ED9\u1A4B\u5BD7\u8EAC\u9385\u754D\u754A\u7567\u756E\u4F82\u3F04\u4D13\u758E\u745D\u759E\u75B4\u7602\u762C\u7651\u764F\u766F\u7676\u63F5\u7690\u81EF\u37F8\u6911\u690E\u76A1\u76A5\u76B7\u76CC\u6F9F\u8462\u509D\u517D\u1E1C\u771E\u7726\u7740\u64AF\u5220\u7758\u32AC\u77AF\u8964\u8968\u16C1\u77F4\u7809\u1376\u4A12\u68CA\u78AF\u78C7\u78D3\u96A5\u792E\u55E0\u78D7\u7934\u78B1\u760C\u8FB8\u8884\u8B2B\u6083\u261C\u7986\u8900\u6902\u7980\u5857\u799D\u7B39\u793C\u79A9\u6E2A\u7126\u3EA8\u79C6\u910D\u79D4";
+
+ private static boolean readBit(int i) {
+ return (ASTRALNESS.charAt(i >> 4) & (1 << (i & 0xF))) != 0;
+ }
+
+ static char lowBits(int pointer) {
+ if (pointer < 942) {
+ return '\u0000';
+ }
+ if (pointer < 1068) {
+ return TABLE0.charAt(pointer - 942);
+ }
+ if (pointer < 1099) {
+ return '\u0000';
+ }
+ if (pointer < 1172) {
+ return TABLE1.charAt(pointer - 1099);
+ }
+ if (pointer < 1256) {
+ return '\u0000';
+ }
+ if (pointer < 5466) {
+ return TABLE2.charAt(pointer - 1256);
+ }
+ if (pointer < 5495) {
+ return '\u0000';
+ }
+ if (pointer < 11214) {
+ return TABLE3.charAt(pointer - 5495);
+ }
+ if (pointer < 11254) {
+ return '\u0000';
+ }
+ if (pointer < 19782) {
+ return TABLE4.charAt(pointer - 11254);
+ }
+ return '\u0000';
+ }
+
+ static boolean isAstral(int pointer) {
+ if (pointer < 947) {
+ return false;
+ }
+ if (pointer < 1119) {
+ return readBit(0 + (pointer - 947));
+ }
+ if (pointer < 1256) {
+ return false;
+ }
+ if (pointer < 1269) {
+ return readBit(172 + (pointer - 1256));
+ }
+ if (pointer < 1336) {
+ return false;
+ }
+ if (pointer < 1364) {
+ return readBit(185 + (pointer - 1336));
+ }
+ if (pointer < 1413) {
+ return false;
+ }
+ if (pointer < 1912) {
+ return readBit(213 + (pointer - 1413));
+ }
+ if (pointer < 2012) {
+ return false;
+ }
+ if (pointer < 3800) {
+ return readBit(712 + (pointer - 2012));
+ }
+ if (pointer < 3883) {
+ return false;
+ }
+ if (pointer == 3883) {
+ return true;
+ }
+ if (pointer < 3985) {
+ return false;
+ }
+ if (pointer < 5024) {
+ return readBit(2501 + (pointer - 3985));
+ }
+ if (pointer < 11205) {
+ return false;
+ }
+ if (pointer < 11214) {
+ return readBit(3540 + (pointer - 11205));
+ }
+ if (pointer < 18997) {
+ return false;
+ }
+ if (pointer < 19782) {
+ return readBit(3549 + (pointer - 18997));
+ }
+ return false;
+ }
+
+ public static int findPointer(char lowBits, boolean isAstral) {
+ if (!isAstral) {
+ switch (lowBits) {
+ case 0x2550:
+ return 18991;
+ case 0x255E:
+ return 18975;
+ case 0x2561:
+ return 18977;
+ case 0x256A:
+ return 18976;
+ case 0x5341:
+ return 5512;
+ case 0x5345:
+ return 5599;
+ default:
+ break;
+ }
+ }
+ for (int i = 3768; i < TABLE2.length(); i++) {
+ if (TABLE2.charAt(i) == lowBits) {
+ int pointer = i + 1256;
+ if (isAstral == isAstral(pointer)) {
+ return pointer;
+ }
+ }
+ }
+ for (int i = 0; i < TABLE3.length(); i++) {
+ if (TABLE3.charAt(i) == lowBits) {
+ int pointer = i + 5495;
+ if (isAstral == isAstral(pointer)) {
+ return pointer;
+ }
+ }
+ }
+ for (int i = 0; i < TABLE4.length(); i++) {
+ if (TABLE4.charAt(i) == lowBits) {
+ int pointer = i + 11254;
+ if (isAstral == isAstral(pointer)) {
+ return pointer;
+ }
+ }
+ }
+ return 0;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java
new file mode 100644
index 000000000..cc56b892f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CoderResult;
+
+public class Big5Decoder extends Decoder {
+
+ private int big5Lead = 0;
+
+ private char pendingTrail = '\u0000';
+
+ protected Big5Decoder(Charset cs) {
+ super(cs, 0.5f, 1.0f);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ assert !(this.report && (big5Lead != 0)):
+ "When reporting, this method should never return with big5Lead set.";
+ if (pendingTrail != '\u0000') {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = '\u0000';
+ }
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = ((int) in.get() & 0xFF);
+ if (big5Lead == 0) {
+ if (b <= 0x7F) {
+ out.put((char) b);
+ continue;
+ }
+ if (b >= 0x81 && b <= 0xFE) {
+ if (this.report && !in.hasRemaining()) {
+ // The Java API is badly documented. Need to do this
+ // crazy thing and hope the caller knows about the
+ // undocumented aspects of the API!
+ in.position(in.position() - 1);
+ return CoderResult.UNDERFLOW;
+ }
+ big5Lead = b;
+ continue;
+ }
+ if (this.report) {
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put('\uFFFD');
+ continue;
+ }
+ int lead = big5Lead;
+ big5Lead = 0;
+ int offset = (b < 0x7F) ? 0x40 : 0x62;
+ if ((b >= 0x40 && b <= 0x7E) || (b >= 0xA1 && b <= 0xFE)) {
+ int pointer = (lead - 0x81) * 157 + (b - offset);
+ char outTrail;
+ switch (pointer) {
+ case 1133:
+ out.put('\u00CA');
+ outTrail = '\u0304';
+ break;
+ case 1135:
+ out.put('\u00CA');
+ outTrail = '\u030C';
+ break;
+ case 1164:
+ out.put('\u00EA');
+ outTrail = '\u0304';
+ break;
+ case 1166:
+ out.put('\u00EA');
+ outTrail = '\u030C';
+ break;
+ default:
+ char lowBits = Big5Data.lowBits(pointer);
+ if (lowBits == '\u0000') {
+ // The following |if| block fixes
+ // https://github.com/whatwg/encoding/issues/5
+ if (b <= 0x7F) {
+ // prepend byte to stream
+ // Always legal, since we've always just read a byte
+ // if we come here.
+ in.position(in.position() - 1);
+ }
+ if (this.report) {
+ // This can go past the start of the buffer
+ // if the caller does not conform to the
+ // undocumented aspects of the API.
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2);
+ }
+ out.put('\uFFFD');
+ continue;
+ }
+ if (Big5Data.isAstral(pointer)) {
+ int codePoint = lowBits | 0x20000;
+ out.put((char) (0xD7C0 + (codePoint >> 10)));
+ outTrail = (char) (0xDC00 + (codePoint & 0x3FF));
+ break;
+ }
+ out.put(lowBits);
+ continue;
+ }
+ if (!out.hasRemaining()) {
+ pendingTrail = outTrail;
+ return CoderResult.OVERFLOW;
+ }
+ out.put(outTrail);
+ continue;
+ }
+ // pointer is null
+ if (b <= 0x7F) {
+ // prepend byte to stream
+ // Always legal, since we've always just read a byte
+ // if we come here.
+ in.position(in.position() - 1);
+ }
+ if (this.report) {
+ // if position() == 0, the caller is not using the
+ // undocumented part of the API right and the line
+ // below will throw!
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2);
+ }
+ out.put('\uFFFD');
+ continue;
+ }
+ }
+
+ @Override protected CoderResult implFlush(CharBuffer out) {
+ if (pendingTrail != '\u0000') {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = '\u0000';
+ }
+ if (big5Lead != 0) {
+ assert !this.report: "How come big5Lead got to be non-zero when decodeLoop() returned in the reporting mode?";
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put('\uFFFD');
+ big5Lead = 0;
+ }
+ return CoderResult.UNDERFLOW;
+ }
+
+ @Override protected void implReset() {
+ big5Lead = 0;
+ pendingTrail = '\u0000';
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java
new file mode 100644
index 000000000..de5132151
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CoderResult;
+
+public class Big5Encoder extends Encoder {
+
+ private char utf16Lead = '\u0000';
+
+ private byte pendingTrail = 0;
+
+ protected Big5Encoder(Charset cs) {
+ super(cs, 1.5f, 2.0f);
+ }
+
+ @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+ assert !((this.reportMalformed || this.reportUnmappable) && (utf16Lead != '\u0000')):
+ "When reporting, this method should never return with utf16Lead set.";
+ if (pendingTrail != 0) {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = 0;
+ }
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ boolean isAstral; // true means Plane 2, false means BMP
+ char lowBits; // The low 16 bits of the code point
+ char codeUnit = in.get();
+ int highBits = (codeUnit & 0xFC00);
+ if (highBits == 0xD800) {
+ // high surrogate
+ if (utf16Lead != '\u0000') {
+ // High surrogate follows another high surrogate. The
+ // *previous* code unit is in error.
+ if (this.reportMalformed) {
+ // The caller had better adhere to the API contract.
+ // Otherwise, this may throw.
+ in.position(in.position() - 2);
+ utf16Lead = '\u0000';
+ return CoderResult.malformedForLength(1);
+ }
+ out.put((byte) '?');
+ }
+ utf16Lead = codeUnit;
+ continue;
+ }
+ if (highBits == 0xDC00) {
+ // low surrogate
+ if (utf16Lead == '\u0000') {
+ // Got low surrogate without a previous high surrogate
+ if (this.reportMalformed) {
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put((byte) '?');
+ continue;
+ }
+ int codePoint = (utf16Lead << 10) + codeUnit - 56613888;
+ utf16Lead = '\u0000';
+ // Plane 2 is the only astral plane that has potentially
+ // Big5-encodable characters.
+ if ((0xFF0000 & codePoint) != 0x20000) {
+ if (this.reportUnmappable) {
+ in.position(in.position() - 2);
+ return CoderResult.unmappableForLength(2);
+ }
+ out.put((byte) '?');
+ continue;
+ }
+ isAstral = true;
+ lowBits = (char)(codePoint & 0xFFFF);
+ } else {
+ // not a surrogate
+ if (utf16Lead != '\u0000') {
+ // Non-surrogate follows a high surrogate. The *previous*
+ // code unit is in error.
+ utf16Lead = '\u0000';
+ if (this.reportMalformed) {
+ // The caller had better adhere to the API contract.
+ // Otherwise, this may throw.
+ in.position(in.position() - 2);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put((byte) '?');
+ // Let's unconsume this code unit and reloop in order to
+ // re-check if the output buffer still has space.
+ in.position(in.position() - 1);
+ continue;
+ }
+ isAstral = false;
+ lowBits = codeUnit;
+ }
+ // isAstral now tells us if we have a Plane 2 or a BMP character.
+ // lowBits tells us the low 16 bits.
+ // After all the above setup to deal with UTF-16, we are now
+ // finally ready to follow the spec.
+ if (!isAstral && lowBits <= 0x7F) {
+ out.put((byte)lowBits);
+ continue;
+ }
+ int pointer = Big5Data.findPointer(lowBits, isAstral);
+ if (pointer == 0) {
+ if (this.reportUnmappable) {
+ if (isAstral) {
+ in.position(in.position() - 2);
+ return CoderResult.unmappableForLength(2);
+ }
+ in.position(in.position() - 1);
+ return CoderResult.unmappableForLength(1);
+ }
+ out.put((byte)'?');
+ continue;
+ }
+ int lead = pointer / 157 + 0x81;
+ int trail = pointer % 157;
+ if (trail < 0x3F) {
+ trail += 0x40;
+ } else {
+ trail += 0x62;
+ }
+ out.put((byte)lead);
+ if (!out.hasRemaining()) {
+ pendingTrail = (byte)trail;
+ return CoderResult.OVERFLOW;
+ }
+ out.put((byte)trail);
+ continue;
+ }
+ }
+
+ @Override protected CoderResult implFlush(ByteBuffer out) {
+ if (pendingTrail != 0) {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(pendingTrail);
+ pendingTrail = 0;
+ }
+ if (utf16Lead != '\u0000') {
+ assert !this.reportMalformed: "How come utf16Lead got to be non-zero when decodeLoop() returned in the reporting mode?";
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put((byte)'?');
+ utf16Lead = '\u0000';
+ }
+ return CoderResult.UNDERFLOW;
+ }
+
+ @Override protected void implReset() {
+ utf16Lead = '\u0000';
+ pendingTrail = 0;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java
new file mode 100644
index 000000000..41e06c63a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+
+public abstract class Decoder extends CharsetDecoder {
+
+ protected boolean report = true;
+
+ protected Decoder(Charset cs, float averageCharsPerByte, float maxCharsPerByte) {
+ super(cs, averageCharsPerByte, maxCharsPerByte);
+ }
+
+ @Override protected final void implOnMalformedInput(CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ this.report = false;
+ return;
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ this.report = true;
+ return;
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override protected final void implOnUnmappableCharacter(
+ CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ return; // We don't actually care, since there are no unmappables.
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ return; // We don't actually care, since there are no unmappables.
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override protected final void implReplaceWith(String newReplacement) {
+ if (!"\uFFFD".equals(newReplacement)) {
+ throw new IllegalArgumentException("Only U+FFFD is allowed as the replacement.");
+ }
+ }
+
+ // TODO: Check if the JDK decoders reset the reporting state on reset()
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java
new file mode 100644
index 000000000..6fc011ed2
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+
+public abstract class Encoder extends CharsetEncoder {
+
+ boolean reportMalformed = true;
+
+ boolean reportUnmappable = true;
+
+ protected Encoder(Charset cs, float averageBytesPerChar,
+ float maxBytesPerChar) {
+ super(cs, averageBytesPerChar, maxBytesPerChar);
+ }
+
+ @Override protected final void implOnMalformedInput(CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ this.reportMalformed = false;
+ return;
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ this.reportUnmappable = true;
+ return;
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override protected final void implOnUnmappableCharacter(
+ CodingErrorAction newAction) {
+ if (newAction == null) {
+ throw new IllegalArgumentException("The argument must not be null.");
+ }
+ if (newAction == CodingErrorAction.IGNORE) {
+ throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored.");
+ }
+ if (newAction == CodingErrorAction.REPLACE) {
+ this.reportUnmappable = false;
+ return;
+ }
+ if (newAction == CodingErrorAction.REPORT) {
+ this.reportMalformed = true;
+ return;
+ }
+ assert false: "Unreachable.";
+ throw new IllegalArgumentException("Unknown CodingErrorAction.");
+ }
+
+ @Override public boolean isLegalReplacement(byte[] repl) {
+ if (repl == null) {
+ return false;
+ }
+ if (repl.length != 1) {
+ return false;
+ }
+ if (repl[0] != '?') {
+ return false;
+ }
+ return true;
+ }
+
+ @Override protected final void implReplaceWith(byte[] newReplacement) {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java
new file mode 100644
index 000000000..6e59ef7c7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java
@@ -0,0 +1,886 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Represents an <a href="https://encoding.spec.whatwg.org/#encoding">encoding</a>
+ * as defined in the <a href="https://encoding.spec.whatwg.org/">Encoding
+ * Standard</a>, provides access to each encoding defined in the Encoding
+ * Standard via a static constant and provides the
+ * "<a href="https://encoding.spec.whatwg.org/#concept-encoding-get">get an
+ * encoding</a>" algorithm defined in the Encoding Standard.
+ *
+ * <p>This class inherits from {@link Charset} to allow the Encoding
+ * Standard-compliant encodings to be used in contexts that support
+ * <code>Charset</code> instances. However, by design, the Encoding
+ * Standard-compliant encodings are not supplied via a {@link CharsetProvider}
+ * and, therefore, are not available via and do not interfere with the static
+ * methods provided by <code>Charset</code>. (This class provides methods of
+ * the same name to hide each static method of <code>Charset</code> to help
+ * avoid accidental calls to the static methods of the superclass when working
+ * with Encoding Standard-compliant encodings.)
+ *
+ * <p>When an application needs to use a particular encoding, such as utf-8
+ * or windows-1252, the corresponding constant, i.e.
+ * {@link #UTF_8 Encoding.UTF_8} and {@link #WINDOWS_1252 Encoding.WINDOWS_1252}
+ * respectively, should be used. However, when the application receives an
+ * encoding label from external input, the method {@link #forName(String)
+ * forName()} should be used to obtain the object representing the encoding
+ * identified by the label. In contexts where labels that map to the
+ * <a href="https://encoding.spec.whatwg.org/#replacement">replacement
+ * encoding</a> should be treated as unknown, the method {@link
+ * #forNameNoReplacement(String) forNameNoReplacement()} should be used instead.
+ *
+ *
+ * @author hsivonen
+ */
+public abstract class Encoding extends Charset {
+
+ private static final String[] LABELS = {
+ "866",
+ "ansi_x3.4-1968",
+ "arabic",
+ "ascii",
+ "asmo-708",
+ "big5",
+ "big5-hkscs",
+ "chinese",
+ "cn-big5",
+ "cp1250",
+ "cp1251",
+ "cp1252",
+ "cp1253",
+ "cp1254",
+ "cp1255",
+ "cp1256",
+ "cp1257",
+ "cp1258",
+ "cp819",
+ "cp866",
+ "csbig5",
+ "cseuckr",
+ "cseucpkdfmtjapanese",
+ "csgb2312",
+ "csibm866",
+ "csiso2022jp",
+ "csiso2022kr",
+ "csiso58gb231280",
+ "csiso88596e",
+ "csiso88596i",
+ "csiso88598e",
+ "csiso88598i",
+ "csisolatin1",
+ "csisolatin2",
+ "csisolatin3",
+ "csisolatin4",
+ "csisolatin5",
+ "csisolatin6",
+ "csisolatin9",
+ "csisolatinarabic",
+ "csisolatincyrillic",
+ "csisolatingreek",
+ "csisolatinhebrew",
+ "cskoi8r",
+ "csksc56011987",
+ "csmacintosh",
+ "csshiftjis",
+ "cyrillic",
+ "dos-874",
+ "ecma-114",
+ "ecma-118",
+ "elot_928",
+ "euc-jp",
+ "euc-kr",
+ "gb18030",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "greek",
+ "greek8",
+ "hebrew",
+ "hz-gb-2312",
+ "ibm819",
+ "ibm866",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-jp",
+ "iso-2022-kr",
+ "iso-8859-1",
+ "iso-8859-10",
+ "iso-8859-11",
+ "iso-8859-13",
+ "iso-8859-14",
+ "iso-8859-15",
+ "iso-8859-16",
+ "iso-8859-2",
+ "iso-8859-3",
+ "iso-8859-4",
+ "iso-8859-5",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-8859-7",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-8859-8-i",
+ "iso-8859-9",
+ "iso-ir-100",
+ "iso-ir-101",
+ "iso-ir-109",
+ "iso-ir-110",
+ "iso-ir-126",
+ "iso-ir-127",
+ "iso-ir-138",
+ "iso-ir-144",
+ "iso-ir-148",
+ "iso-ir-149",
+ "iso-ir-157",
+ "iso-ir-58",
+ "iso8859-1",
+ "iso8859-10",
+ "iso8859-11",
+ "iso8859-13",
+ "iso8859-14",
+ "iso8859-15",
+ "iso8859-2",
+ "iso8859-3",
+ "iso8859-4",
+ "iso8859-5",
+ "iso8859-6",
+ "iso8859-7",
+ "iso8859-8",
+ "iso8859-9",
+ "iso88591",
+ "iso885910",
+ "iso885911",
+ "iso885913",
+ "iso885914",
+ "iso885915",
+ "iso88592",
+ "iso88593",
+ "iso88594",
+ "iso88595",
+ "iso88596",
+ "iso88597",
+ "iso88598",
+ "iso88599",
+ "iso_8859-1",
+ "iso_8859-15",
+ "iso_8859-1:1987",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "iso_8859-5",
+ "iso_8859-5:1988",
+ "iso_8859-6",
+ "iso_8859-6:1987",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8-ru",
+ "koi8-u",
+ "koi8_r",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "l1",
+ "l2",
+ "l3",
+ "l4",
+ "l5",
+ "l6",
+ "l9",
+ "latin1",
+ "latin2",
+ "latin3",
+ "latin4",
+ "latin5",
+ "latin6",
+ "logical",
+ "mac",
+ "macintosh",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "sun_eu_greek",
+ "tis-620",
+ "unicode-1-1-utf-8",
+ "us-ascii",
+ "utf-16",
+ "utf-16be",
+ "utf-16le",
+ "utf-8",
+ "utf8",
+ "visual",
+ "windows-1250",
+ "windows-1251",
+ "windows-1252",
+ "windows-1253",
+ "windows-1254",
+ "windows-1255",
+ "windows-1256",
+ "windows-1257",
+ "windows-1258",
+ "windows-31j",
+ "windows-874",
+ "windows-949",
+ "x-cp1250",
+ "x-cp1251",
+ "x-cp1252",
+ "x-cp1253",
+ "x-cp1254",
+ "x-cp1255",
+ "x-cp1256",
+ "x-cp1257",
+ "x-cp1258",
+ "x-euc-jp",
+ "x-gbk",
+ "x-mac-cyrillic",
+ "x-mac-roman",
+ "x-mac-ukrainian",
+ "x-sjis",
+ "x-user-defined",
+ "x-x-big5",
+ };
+
+ private static final Encoding[] ENCODINGS_FOR_LABELS = {
+ Ibm866.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso6.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso6.INSTANCE,
+ Big5.INSTANCE,
+ Big5.INSTANCE,
+ Gbk.INSTANCE,
+ Big5.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ Windows1252.INSTANCE,
+ Ibm866.INSTANCE,
+ Big5.INSTANCE,
+ EucKr.INSTANCE,
+ EucJp.INSTANCE,
+ Gbk.INSTANCE,
+ Ibm866.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Replacement.INSTANCE,
+ Gbk.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso15.INSTANCE,
+ Iso6.INSTANCE,
+ Iso5.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Koi8R.INSTANCE,
+ EucKr.INSTANCE,
+ Macintosh.INSTANCE,
+ ShiftJis.INSTANCE,
+ Iso5.INSTANCE,
+ Windows874.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ EucJp.INSTANCE,
+ EucKr.INSTANCE,
+ Gb18030.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Replacement.INSTANCE,
+ Windows1252.INSTANCE,
+ Ibm866.INSTANCE,
+ Replacement.INSTANCE,
+ Replacement.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Replacement.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso16.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso7.INSTANCE,
+ Iso6.INSTANCE,
+ Iso8.INSTANCE,
+ Iso5.INSTANCE,
+ Windows1254.INSTANCE,
+ EucKr.INSTANCE,
+ Iso10.INSTANCE,
+ Gbk.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso15.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1254.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8U.INSTANCE,
+ Koi8U.INSTANCE,
+ Koi8R.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso15.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso8I.INSTANCE,
+ Macintosh.INSTANCE,
+ Macintosh.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ Iso7.INSTANCE,
+ Windows874.INSTANCE,
+ Utf8.INSTANCE,
+ Windows1252.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf16Be.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf8.INSTANCE,
+ Utf8.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ ShiftJis.INSTANCE,
+ Windows874.INSTANCE,
+ EucKr.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ EucJp.INSTANCE,
+ Gbk.INSTANCE,
+ MacCyrillic.INSTANCE,
+ Macintosh.INSTANCE,
+ MacCyrillic.INSTANCE,
+ ShiftJis.INSTANCE,
+ UserDefined.INSTANCE,
+ Big5.INSTANCE,
+ };
+
+ private static final Encoding[] ENCODINGS = {
+ Big5.INSTANCE,
+ EucJp.INSTANCE,
+ EucKr.INSTANCE,
+ Gb18030.INSTANCE,
+ Gbk.INSTANCE,
+ Ibm866.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Iso10.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso16.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8U.INSTANCE,
+ Macintosh.INSTANCE,
+ Replacement.INSTANCE,
+ ShiftJis.INSTANCE,
+ Utf16Be.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf8.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ Windows874.INSTANCE,
+ MacCyrillic.INSTANCE,
+ UserDefined.INSTANCE,
+ };
+
+ /**
+ * The big5 encoding.
+ */
+ public static final Encoding BIG5 = Big5.INSTANCE;
+
+ /**
+ * The euc-jp encoding.
+ */
+ public static final Encoding EUC_JP = EucJp.INSTANCE;
+
+ /**
+ * The euc-kr encoding.
+ */
+ public static final Encoding EUC_KR = EucKr.INSTANCE;
+
+ /**
+ * The gb18030 encoding.
+ */
+ public static final Encoding GB18030 = Gb18030.INSTANCE;
+
+ /**
+ * The gbk encoding.
+ */
+ public static final Encoding GBK = Gbk.INSTANCE;
+
+ /**
+ * The ibm866 encoding.
+ */
+ public static final Encoding IBM866 = Ibm866.INSTANCE;
+
+ /**
+ * The iso-2022-jp encoding.
+ */
+ public static final Encoding ISO_2022_JP = Iso2022Jp.INSTANCE;
+
+ /**
+ * The iso-8859-10 encoding.
+ */
+ public static final Encoding ISO_8859_10 = Iso10.INSTANCE;
+
+ /**
+ * The iso-8859-13 encoding.
+ */
+ public static final Encoding ISO_8859_13 = Iso13.INSTANCE;
+
+ /**
+ * The iso-8859-14 encoding.
+ */
+ public static final Encoding ISO_8859_14 = Iso14.INSTANCE;
+
+ /**
+ * The iso-8859-15 encoding.
+ */
+ public static final Encoding ISO_8859_15 = Iso15.INSTANCE;
+
+ /**
+ * The iso-8859-16 encoding.
+ */
+ public static final Encoding ISO_8859_16 = Iso16.INSTANCE;
+
+ /**
+ * The iso-8859-2 encoding.
+ */
+ public static final Encoding ISO_8859_2 = Iso2.INSTANCE;
+
+ /**
+ * The iso-8859-3 encoding.
+ */
+ public static final Encoding ISO_8859_3 = Iso3.INSTANCE;
+
+ /**
+ * The iso-8859-4 encoding.
+ */
+ public static final Encoding ISO_8859_4 = Iso4.INSTANCE;
+
+ /**
+ * The iso-8859-5 encoding.
+ */
+ public static final Encoding ISO_8859_5 = Iso5.INSTANCE;
+
+ /**
+ * The iso-8859-6 encoding.
+ */
+ public static final Encoding ISO_8859_6 = Iso6.INSTANCE;
+
+ /**
+ * The iso-8859-7 encoding.
+ */
+ public static final Encoding ISO_8859_7 = Iso7.INSTANCE;
+
+ /**
+ * The iso-8859-8 encoding.
+ */
+ public static final Encoding ISO_8859_8 = Iso8.INSTANCE;
+
+ /**
+ * The iso-8859-8-i encoding.
+ */
+ public static final Encoding ISO_8859_8_I = Iso8I.INSTANCE;
+
+ /**
+ * The koi8-r encoding.
+ */
+ public static final Encoding KOI8_R = Koi8R.INSTANCE;
+
+ /**
+ * The koi8-u encoding.
+ */
+ public static final Encoding KOI8_U = Koi8U.INSTANCE;
+
+ /**
+ * The macintosh encoding.
+ */
+ public static final Encoding MACINTOSH = Macintosh.INSTANCE;
+
+ /**
+ * The replacement encoding.
+ */
+ public static final Encoding REPLACEMENT = Replacement.INSTANCE;
+
+ /**
+ * The shift_jis encoding.
+ */
+ public static final Encoding SHIFT_JIS = ShiftJis.INSTANCE;
+
+ /**
+ * The utf-16be encoding.
+ */
+ public static final Encoding UTF_16BE = Utf16Be.INSTANCE;
+
+ /**
+ * The utf-16le encoding.
+ */
+ public static final Encoding UTF_16LE = Utf16Le.INSTANCE;
+
+ /**
+ * The utf-8 encoding.
+ */
+ public static final Encoding UTF_8 = Utf8.INSTANCE;
+
+ /**
+ * The windows-1250 encoding.
+ */
+ public static final Encoding WINDOWS_1250 = Windows1250.INSTANCE;
+
+ /**
+ * The windows-1251 encoding.
+ */
+ public static final Encoding WINDOWS_1251 = Windows1251.INSTANCE;
+
+ /**
+ * The windows-1252 encoding.
+ */
+ public static final Encoding WINDOWS_1252 = Windows1252.INSTANCE;
+
+ /**
+ * The windows-1253 encoding.
+ */
+ public static final Encoding WINDOWS_1253 = Windows1253.INSTANCE;
+
+ /**
+ * The windows-1254 encoding.
+ */
+ public static final Encoding WINDOWS_1254 = Windows1254.INSTANCE;
+
+ /**
+ * The windows-1255 encoding.
+ */
+ public static final Encoding WINDOWS_1255 = Windows1255.INSTANCE;
+
+ /**
+ * The windows-1256 encoding.
+ */
+ public static final Encoding WINDOWS_1256 = Windows1256.INSTANCE;
+
+ /**
+ * The windows-1257 encoding.
+ */
+ public static final Encoding WINDOWS_1257 = Windows1257.INSTANCE;
+
+ /**
+ * The windows-1258 encoding.
+ */
+ public static final Encoding WINDOWS_1258 = Windows1258.INSTANCE;
+
+ /**
+ * The windows-874 encoding.
+ */
+ public static final Encoding WINDOWS_874 = Windows874.INSTANCE;
+
+ /**
+ * The x-mac-cyrillic encoding.
+ */
+ public static final Encoding X_MAC_CYRILLIC = MacCyrillic.INSTANCE;
+
+ /**
+ * The x-user-defined encoding.
+ */
+ public static final Encoding X_USER_DEFINED = UserDefined.INSTANCE;
+
+
+private static SortedMap<String, Charset> encodings = null;
+
+ protected Encoding(String canonicalName, String[] aliases) {
+ super(canonicalName, aliases);
+ }
+
+ private enum State {
+ HEAD, LABEL, TAIL
+ };
+
+ public static Encoding forName(String label) {
+ if (label == null) {
+ throw new IllegalArgumentException("Label must not be null.");
+ }
+ if (label.length() == 0) {
+ throw new IllegalCharsetNameException(label);
+ }
+ // First try the fast path
+ int index = Arrays.binarySearch(LABELS, label);
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ // Else, slow path
+ StringBuilder sb = new StringBuilder();
+ State state = State.HEAD;
+ for (int i = 0; i < label.length(); i++) {
+ char c = label.charAt(i);
+ if ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t')
+ || (c == '\u000C')) {
+ if (state == State.LABEL) {
+ state = State.TAIL;
+ }
+ continue;
+ }
+ if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if ((c == '-') || (c == '+') || (c == '.') || (c == ':')
+ || (c == '_')) {
+ switch (state) {
+ case LABEL:
+ sb.append(c);
+ continue;
+ case HEAD:
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ throw new IllegalCharsetNameException(label);
+ }
+ index = Arrays.binarySearch(LABELS, sb.toString());
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ throw new UnsupportedCharsetException(label);
+ }
+
+ public static Encoding forNameNoReplacement(String label) {
+ Encoding encoding = Encoding.forName(label);
+ if (encoding == Encoding.REPLACEMENT) {
+ throw new UnsupportedCharsetException(label);
+ }
+ return encoding;
+ }
+
+ public static boolean isSupported(String label) {
+ try {
+ Encoding.forName(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static boolean isSupportedNoReplacement(String label) {
+ try {
+ Encoding.forNameNoReplacement(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static SortedMap<String, Charset> availableCharsets() {
+ if (encodings == null) {
+ TreeMap<String, Charset> map = new TreeMap<String, Charset>();
+ for (Encoding encoding : ENCODINGS) {
+ map.put(encoding.name(), encoding);
+ }
+ encodings = Collections.unmodifiableSortedMap(map);
+ }
+ return encodings;
+ }
+
+ public static Encoding defaultCharset() {
+ return WINDOWS_1252;
+ }
+
+ @Override public boolean canEncode() {
+ return false;
+ }
+
+ @Override public boolean contains(Charset cs) {
+ return false;
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ throw new UnsupportedOperationException("Encoder not implemented.");
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java
new file mode 100644
index 000000000..05fbef810
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class EucJp extends Encoding {
+
+ private static final String[] LABELS = {
+ "cseucpkdfmtjapanese",
+ "euc-jp",
+ "x-euc-jp"
+ };
+
+ private static final String NAME = "euc-jp";
+
+ static final EucJp INSTANCE = new EucJp();
+
+ private EucJp() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java
new file mode 100644
index 000000000..a3923e224
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class EucKr extends Encoding {
+
+ private static final String[] LABELS = {
+ "cseuckr",
+ "csksc56011987",
+ "euc-kr",
+ "iso-ir-149",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "windows-949"
+ };
+
+ private static final String NAME = "euc-kr";
+
+ static final EucKr INSTANCE = new EucKr();
+
+ private EucKr() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java
new file mode 100644
index 000000000..34a1f36b5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CoderResult;
+
+public final class FallibleSingleByteDecoder extends InfallibleSingleByteDecoder {
+
+ public FallibleSingleByteDecoder(Encoding cs, char[] upperHalf) {
+ super(cs, upperHalf);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ if (!this.report) {
+ return super.decodeLoop(in, out);
+ } else {
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = (int) in.get();
+ if (b >= 0) {
+ out.put((char) b);
+ } else {
+ char mapped = this.upperHalf[b + 128];
+ if (mapped == '\uFFFD') {
+ in.position(in.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+ out.put(mapped);
+ }
+ }
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java
new file mode 100644
index 000000000..fcb090dde
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Gb18030 extends Encoding {
+
+ private static final String[] LABELS = {
+ "gb18030"
+ };
+
+ private static final String NAME = "gb18030";
+
+ static final Gb18030 INSTANCE = new Gb18030();
+
+ private Gb18030() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java
new file mode 100644
index 000000000..2dc3694ed
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Gbk extends Encoding {
+
+ private static final String[] LABELS = {
+ "chinese",
+ "csgb2312",
+ "csiso58gb231280",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "iso-ir-58",
+ "x-gbk"
+ };
+
+ private static final String NAME = "gbk";
+
+ static final Gbk INSTANCE = new Gbk();
+
+ private Gbk() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName("gb18030").newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java
new file mode 100644
index 000000000..037e62835
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Ibm866 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u2591',
+ '\u2592',
+ '\u2593',
+ '\u2502',
+ '\u2524',
+ '\u2561',
+ '\u2562',
+ '\u2556',
+ '\u2555',
+ '\u2563',
+ '\u2551',
+ '\u2557',
+ '\u255d',
+ '\u255c',
+ '\u255b',
+ '\u2510',
+ '\u2514',
+ '\u2534',
+ '\u252c',
+ '\u251c',
+ '\u2500',
+ '\u253c',
+ '\u255e',
+ '\u255f',
+ '\u255a',
+ '\u2554',
+ '\u2569',
+ '\u2566',
+ '\u2560',
+ '\u2550',
+ '\u256c',
+ '\u2567',
+ '\u2568',
+ '\u2564',
+ '\u2565',
+ '\u2559',
+ '\u2558',
+ '\u2552',
+ '\u2553',
+ '\u256b',
+ '\u256a',
+ '\u2518',
+ '\u250c',
+ '\u2588',
+ '\u2584',
+ '\u258c',
+ '\u2590',
+ '\u2580',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u044f',
+ '\u0401',
+ '\u0451',
+ '\u0404',
+ '\u0454',
+ '\u0407',
+ '\u0457',
+ '\u040e',
+ '\u045e',
+ '\u00b0',
+ '\u2219',
+ '\u00b7',
+ '\u221a',
+ '\u2116',
+ '\u00a4',
+ '\u25a0',
+ '\u00a0'
+ };
+
+ private static final String[] LABELS = {
+ "866",
+ "cp866",
+ "csibm866",
+ "ibm866"
+ };
+
+ private static final String NAME = "ibm866";
+
+ static final Encoding INSTANCE = new Ibm866();
+
+ private Ibm866() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java
new file mode 100644
index 000000000..7cc63072c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CoderResult;
+
+public class InfallibleSingleByteDecoder extends Decoder {
+
+ protected final char[] upperHalf;
+
+ protected InfallibleSingleByteDecoder(Encoding cs, char[] upperHalf) {
+ super(cs, 1.0f, 1.0f);
+ this.upperHalf = upperHalf;
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ // TODO figure out if it's worthwhile to optimize the case where both
+ // buffers are array-backed.
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = (int) in.get();
+ if (b >= 0) {
+ out.put((char) b);
+ } else {
+ out.put(this.upperHalf[b + 128]);
+ }
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java
new file mode 100644
index 000000000..895cb5eed
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso10 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u0112',
+ '\u0122',
+ '\u012a',
+ '\u0128',
+ '\u0136',
+ '\u00a7',
+ '\u013b',
+ '\u0110',
+ '\u0160',
+ '\u0166',
+ '\u017d',
+ '\u00ad',
+ '\u016a',
+ '\u014a',
+ '\u00b0',
+ '\u0105',
+ '\u0113',
+ '\u0123',
+ '\u012b',
+ '\u0129',
+ '\u0137',
+ '\u00b7',
+ '\u013c',
+ '\u0111',
+ '\u0161',
+ '\u0167',
+ '\u017e',
+ '\u2015',
+ '\u016b',
+ '\u014b',
+ '\u0100',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u012e',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u0116',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00d0',
+ '\u0145',
+ '\u014c',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u0168',
+ '\u00d8',
+ '\u0172',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u00de',
+ '\u00df',
+ '\u0101',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u012f',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u0117',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u00f0',
+ '\u0146',
+ '\u014d',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u0169',
+ '\u00f8',
+ '\u0173',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u00fe',
+ '\u0138'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin6",
+ "iso-8859-10",
+ "iso-ir-157",
+ "iso8859-10",
+ "iso885910",
+ "l6",
+ "latin6"
+ };
+
+ private static final String NAME = "iso-8859-10";
+
+ static final Encoding INSTANCE = new Iso10();
+
+ private Iso10() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java
new file mode 100644
index 000000000..60e6f5339
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso13 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u201d',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u201e',
+ '\u00a6',
+ '\u00a7',
+ '\u00d8',
+ '\u00a9',
+ '\u0156',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00c6',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u201c',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00f8',
+ '\u00b9',
+ '\u0157',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00e6',
+ '\u0104',
+ '\u012e',
+ '\u0100',
+ '\u0106',
+ '\u00c4',
+ '\u00c5',
+ '\u0118',
+ '\u0112',
+ '\u010c',
+ '\u00c9',
+ '\u0179',
+ '\u0116',
+ '\u0122',
+ '\u0136',
+ '\u012a',
+ '\u013b',
+ '\u0160',
+ '\u0143',
+ '\u0145',
+ '\u00d3',
+ '\u014c',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u0172',
+ '\u0141',
+ '\u015a',
+ '\u016a',
+ '\u00dc',
+ '\u017b',
+ '\u017d',
+ '\u00df',
+ '\u0105',
+ '\u012f',
+ '\u0101',
+ '\u0107',
+ '\u00e4',
+ '\u00e5',
+ '\u0119',
+ '\u0113',
+ '\u010d',
+ '\u00e9',
+ '\u017a',
+ '\u0117',
+ '\u0123',
+ '\u0137',
+ '\u012b',
+ '\u013c',
+ '\u0161',
+ '\u0144',
+ '\u0146',
+ '\u00f3',
+ '\u014d',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u0173',
+ '\u0142',
+ '\u015b',
+ '\u016b',
+ '\u00fc',
+ '\u017c',
+ '\u017e',
+ '\u2019'
+ };
+
+ private static final String[] LABELS = {
+ "iso-8859-13",
+ "iso8859-13",
+ "iso885913"
+ };
+
+ private static final String NAME = "iso-8859-13";
+
+ static final Encoding INSTANCE = new Iso13();
+
+ private Iso13() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java
new file mode 100644
index 000000000..d4a180e6e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso14 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u1e02',
+ '\u1e03',
+ '\u00a3',
+ '\u010a',
+ '\u010b',
+ '\u1e0a',
+ '\u00a7',
+ '\u1e80',
+ '\u00a9',
+ '\u1e82',
+ '\u1e0b',
+ '\u1ef2',
+ '\u00ad',
+ '\u00ae',
+ '\u0178',
+ '\u1e1e',
+ '\u1e1f',
+ '\u0120',
+ '\u0121',
+ '\u1e40',
+ '\u1e41',
+ '\u00b6',
+ '\u1e56',
+ '\u1e81',
+ '\u1e57',
+ '\u1e83',
+ '\u1e60',
+ '\u1ef3',
+ '\u1e84',
+ '\u1e85',
+ '\u1e61',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u0174',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u1e6a',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u0176',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u0175',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u1e6b',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u0177',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "iso-8859-14",
+ "iso8859-14",
+ "iso885914"
+ };
+
+ private static final String NAME = "iso-8859-14";
+
+ static final Encoding INSTANCE = new Iso14();
+
+ private Iso14() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java
new file mode 100644
index 000000000..a60e4b6ef
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso15 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u20ac',
+ '\u00a5',
+ '\u0160',
+ '\u00a7',
+ '\u0161',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u017d',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u017e',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u0152',
+ '\u0153',
+ '\u0178',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00d0',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u00de',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u00f0',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u00fe',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin9",
+ "iso-8859-15",
+ "iso8859-15",
+ "iso885915",
+ "iso_8859-15",
+ "l9"
+ };
+
+ private static final String NAME = "iso-8859-15";
+
+ static final Encoding INSTANCE = new Iso15();
+
+ private Iso15() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java
new file mode 100644
index 000000000..5eb1926db
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso16 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u0105',
+ '\u0141',
+ '\u20ac',
+ '\u201e',
+ '\u0160',
+ '\u00a7',
+ '\u0161',
+ '\u00a9',
+ '\u0218',
+ '\u00ab',
+ '\u0179',
+ '\u00ad',
+ '\u017a',
+ '\u017b',
+ '\u00b0',
+ '\u00b1',
+ '\u010c',
+ '\u0142',
+ '\u017d',
+ '\u201d',
+ '\u00b6',
+ '\u00b7',
+ '\u017e',
+ '\u010d',
+ '\u0219',
+ '\u00bb',
+ '\u0152',
+ '\u0153',
+ '\u0178',
+ '\u017c',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u0106',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u0110',
+ '\u0143',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u0150',
+ '\u00d6',
+ '\u015a',
+ '\u0170',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u0118',
+ '\u021a',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u0107',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u0111',
+ '\u0144',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u0151',
+ '\u00f6',
+ '\u015b',
+ '\u0171',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u0119',
+ '\u021b',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "iso-8859-16"
+ };
+
+ private static final String NAME = "iso-8859-16";
+
+ static final Encoding INSTANCE = new Iso16();
+
+ private Iso16() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java
new file mode 100644
index 000000000..7a5f6322a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso2 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u02d8',
+ '\u0141',
+ '\u00a4',
+ '\u013d',
+ '\u015a',
+ '\u00a7',
+ '\u00a8',
+ '\u0160',
+ '\u015e',
+ '\u0164',
+ '\u0179',
+ '\u00ad',
+ '\u017d',
+ '\u017b',
+ '\u00b0',
+ '\u0105',
+ '\u02db',
+ '\u0142',
+ '\u00b4',
+ '\u013e',
+ '\u015b',
+ '\u02c7',
+ '\u00b8',
+ '\u0161',
+ '\u015f',
+ '\u0165',
+ '\u017a',
+ '\u02dd',
+ '\u017e',
+ '\u017c',
+ '\u0154',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u0139',
+ '\u0106',
+ '\u00c7',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u011a',
+ '\u00cd',
+ '\u00ce',
+ '\u010e',
+ '\u0110',
+ '\u0143',
+ '\u0147',
+ '\u00d3',
+ '\u00d4',
+ '\u0150',
+ '\u00d6',
+ '\u00d7',
+ '\u0158',
+ '\u016e',
+ '\u00da',
+ '\u0170',
+ '\u00dc',
+ '\u00dd',
+ '\u0162',
+ '\u00df',
+ '\u0155',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u013a',
+ '\u0107',
+ '\u00e7',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u011b',
+ '\u00ed',
+ '\u00ee',
+ '\u010f',
+ '\u0111',
+ '\u0144',
+ '\u0148',
+ '\u00f3',
+ '\u00f4',
+ '\u0151',
+ '\u00f6',
+ '\u00f7',
+ '\u0159',
+ '\u016f',
+ '\u00fa',
+ '\u0171',
+ '\u00fc',
+ '\u00fd',
+ '\u0163',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin2",
+ "iso-8859-2",
+ "iso-ir-101",
+ "iso8859-2",
+ "iso88592",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "l2",
+ "latin2"
+ };
+
+ private static final String NAME = "iso-8859-2";
+
+ static final Encoding INSTANCE = new Iso2();
+
+ private Iso2() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java
new file mode 100644
index 000000000..6ebadc947
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Iso2022Jp extends Encoding {
+
+ private static final String[] LABELS = {
+ "csiso2022jp",
+ "iso-2022-jp"
+ };
+
+ private static final String NAME = "iso-2022-jp";
+
+ static final Iso2022Jp INSTANCE = new Iso2022Jp();
+
+ private Iso2022Jp() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java
new file mode 100644
index 000000000..0667a160c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso3 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0126',
+ '\u02d8',
+ '\u00a3',
+ '\u00a4',
+ '\ufffd',
+ '\u0124',
+ '\u00a7',
+ '\u00a8',
+ '\u0130',
+ '\u015e',
+ '\u011e',
+ '\u0134',
+ '\u00ad',
+ '\ufffd',
+ '\u017b',
+ '\u00b0',
+ '\u0127',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u0125',
+ '\u00b7',
+ '\u00b8',
+ '\u0131',
+ '\u015f',
+ '\u011f',
+ '\u0135',
+ '\u00bd',
+ '\ufffd',
+ '\u017c',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\ufffd',
+ '\u00c4',
+ '\u010a',
+ '\u0108',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\ufffd',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u0120',
+ '\u00d6',
+ '\u00d7',
+ '\u011c',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u016c',
+ '\u015c',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\ufffd',
+ '\u00e4',
+ '\u010b',
+ '\u0109',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\ufffd',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u0121',
+ '\u00f6',
+ '\u00f7',
+ '\u011d',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u016d',
+ '\u015d',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin3",
+ "iso-8859-3",
+ "iso-ir-109",
+ "iso8859-3",
+ "iso88593",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "l3",
+ "latin3"
+ };
+
+ private static final String NAME = "iso-8859-3";
+
+ static final Encoding INSTANCE = new Iso3();
+
+ private Iso3() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java
new file mode 100644
index 000000000..b954869ab
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso4 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0104',
+ '\u0138',
+ '\u0156',
+ '\u00a4',
+ '\u0128',
+ '\u013b',
+ '\u00a7',
+ '\u00a8',
+ '\u0160',
+ '\u0112',
+ '\u0122',
+ '\u0166',
+ '\u00ad',
+ '\u017d',
+ '\u00af',
+ '\u00b0',
+ '\u0105',
+ '\u02db',
+ '\u0157',
+ '\u00b4',
+ '\u0129',
+ '\u013c',
+ '\u02c7',
+ '\u00b8',
+ '\u0161',
+ '\u0113',
+ '\u0123',
+ '\u0167',
+ '\u014a',
+ '\u017e',
+ '\u014b',
+ '\u0100',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u012e',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u0116',
+ '\u00cd',
+ '\u00ce',
+ '\u012a',
+ '\u0110',
+ '\u0145',
+ '\u014c',
+ '\u0136',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u0172',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u0168',
+ '\u016a',
+ '\u00df',
+ '\u0101',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u012f',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u0117',
+ '\u00ed',
+ '\u00ee',
+ '\u012b',
+ '\u0111',
+ '\u0146',
+ '\u014d',
+ '\u0137',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u0173',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u0169',
+ '\u016b',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatin4",
+ "iso-8859-4",
+ "iso-ir-110",
+ "iso8859-4",
+ "iso88594",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "l4",
+ "latin4"
+ };
+
+ private static final String NAME = "iso-8859-4";
+
+ static final Encoding INSTANCE = new Iso4();
+
+ private Iso4() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java
new file mode 100644
index 000000000..13946cdbb
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso5 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0401',
+ '\u0402',
+ '\u0403',
+ '\u0404',
+ '\u0405',
+ '\u0406',
+ '\u0407',
+ '\u0408',
+ '\u0409',
+ '\u040a',
+ '\u040b',
+ '\u040c',
+ '\u00ad',
+ '\u040e',
+ '\u040f',
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u044f',
+ '\u2116',
+ '\u0451',
+ '\u0452',
+ '\u0453',
+ '\u0454',
+ '\u0455',
+ '\u0456',
+ '\u0457',
+ '\u0458',
+ '\u0459',
+ '\u045a',
+ '\u045b',
+ '\u045c',
+ '\u00a7',
+ '\u045e',
+ '\u045f'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatincyrillic",
+ "cyrillic",
+ "iso-8859-5",
+ "iso-ir-144",
+ "iso8859-5",
+ "iso88595",
+ "iso_8859-5",
+ "iso_8859-5:1988"
+ };
+
+ private static final String NAME = "iso-8859-5";
+
+ static final Encoding INSTANCE = new Iso5();
+
+ private Iso5() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java
new file mode 100644
index 000000000..02e6df8ba
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso6 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u00a4',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u060c',
+ '\u00ad',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u061b',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u061f',
+ '\ufffd',
+ '\u0621',
+ '\u0622',
+ '\u0623',
+ '\u0624',
+ '\u0625',
+ '\u0626',
+ '\u0627',
+ '\u0628',
+ '\u0629',
+ '\u062a',
+ '\u062b',
+ '\u062c',
+ '\u062d',
+ '\u062e',
+ '\u062f',
+ '\u0630',
+ '\u0631',
+ '\u0632',
+ '\u0633',
+ '\u0634',
+ '\u0635',
+ '\u0636',
+ '\u0637',
+ '\u0638',
+ '\u0639',
+ '\u063a',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u0640',
+ '\u0641',
+ '\u0642',
+ '\u0643',
+ '\u0644',
+ '\u0645',
+ '\u0646',
+ '\u0647',
+ '\u0648',
+ '\u0649',
+ '\u064a',
+ '\u064b',
+ '\u064c',
+ '\u064d',
+ '\u064e',
+ '\u064f',
+ '\u0650',
+ '\u0651',
+ '\u0652',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "arabic",
+ "asmo-708",
+ "csiso88596e",
+ "csiso88596i",
+ "csisolatinarabic",
+ "ecma-114",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-ir-127",
+ "iso8859-6",
+ "iso88596",
+ "iso_8859-6",
+ "iso_8859-6:1987"
+ };
+
+ private static final String NAME = "iso-8859-6";
+
+ static final Encoding INSTANCE = new Iso6();
+
+ private Iso6() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java
new file mode 100644
index 000000000..630e702de
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso7 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u2018',
+ '\u2019',
+ '\u00a3',
+ '\u20ac',
+ '\u20af',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u037a',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\ufffd',
+ '\u2015',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u0384',
+ '\u0385',
+ '\u0386',
+ '\u00b7',
+ '\u0388',
+ '\u0389',
+ '\u038a',
+ '\u00bb',
+ '\u038c',
+ '\u00bd',
+ '\u038e',
+ '\u038f',
+ '\u0390',
+ '\u0391',
+ '\u0392',
+ '\u0393',
+ '\u0394',
+ '\u0395',
+ '\u0396',
+ '\u0397',
+ '\u0398',
+ '\u0399',
+ '\u039a',
+ '\u039b',
+ '\u039c',
+ '\u039d',
+ '\u039e',
+ '\u039f',
+ '\u03a0',
+ '\u03a1',
+ '\ufffd',
+ '\u03a3',
+ '\u03a4',
+ '\u03a5',
+ '\u03a6',
+ '\u03a7',
+ '\u03a8',
+ '\u03a9',
+ '\u03aa',
+ '\u03ab',
+ '\u03ac',
+ '\u03ad',
+ '\u03ae',
+ '\u03af',
+ '\u03b0',
+ '\u03b1',
+ '\u03b2',
+ '\u03b3',
+ '\u03b4',
+ '\u03b5',
+ '\u03b6',
+ '\u03b7',
+ '\u03b8',
+ '\u03b9',
+ '\u03ba',
+ '\u03bb',
+ '\u03bc',
+ '\u03bd',
+ '\u03be',
+ '\u03bf',
+ '\u03c0',
+ '\u03c1',
+ '\u03c2',
+ '\u03c3',
+ '\u03c4',
+ '\u03c5',
+ '\u03c6',
+ '\u03c7',
+ '\u03c8',
+ '\u03c9',
+ '\u03ca',
+ '\u03cb',
+ '\u03cc',
+ '\u03cd',
+ '\u03ce',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "csisolatingreek",
+ "ecma-118",
+ "elot_928",
+ "greek",
+ "greek8",
+ "iso-8859-7",
+ "iso-ir-126",
+ "iso8859-7",
+ "iso88597",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "sun_eu_greek"
+ };
+
+ private static final String NAME = "iso-8859-7";
+
+ static final Encoding INSTANCE = new Iso7();
+
+ private Iso7() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java
new file mode 100644
index 000000000..10ee33486
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso8 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00d7',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00f7',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u2017',
+ '\u05d0',
+ '\u05d1',
+ '\u05d2',
+ '\u05d3',
+ '\u05d4',
+ '\u05d5',
+ '\u05d6',
+ '\u05d7',
+ '\u05d8',
+ '\u05d9',
+ '\u05da',
+ '\u05db',
+ '\u05dc',
+ '\u05dd',
+ '\u05de',
+ '\u05df',
+ '\u05e0',
+ '\u05e1',
+ '\u05e2',
+ '\u05e3',
+ '\u05e4',
+ '\u05e5',
+ '\u05e6',
+ '\u05e7',
+ '\u05e8',
+ '\u05e9',
+ '\u05ea',
+ '\ufffd',
+ '\ufffd',
+ '\u200e',
+ '\u200f',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "csiso88598e",
+ "csisolatinhebrew",
+ "hebrew",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-ir-138",
+ "iso8859-8",
+ "iso88598",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "visual"
+ };
+
+ private static final String NAME = "iso-8859-8";
+
+ static final Encoding INSTANCE = new Iso8();
+
+ private Iso8() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java
new file mode 100644
index 000000000..732e1c952
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Iso8I extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0080',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u0085',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u0091',
+ '\u0092',
+ '\u0093',
+ '\u0094',
+ '\u0095',
+ '\u0096',
+ '\u0097',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00d7',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00f7',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u2017',
+ '\u05d0',
+ '\u05d1',
+ '\u05d2',
+ '\u05d3',
+ '\u05d4',
+ '\u05d5',
+ '\u05d6',
+ '\u05d7',
+ '\u05d8',
+ '\u05d9',
+ '\u05da',
+ '\u05db',
+ '\u05dc',
+ '\u05dd',
+ '\u05de',
+ '\u05df',
+ '\u05e0',
+ '\u05e1',
+ '\u05e2',
+ '\u05e3',
+ '\u05e4',
+ '\u05e5',
+ '\u05e6',
+ '\u05e7',
+ '\u05e8',
+ '\u05e9',
+ '\u05ea',
+ '\ufffd',
+ '\ufffd',
+ '\u200e',
+ '\u200f',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "csiso88598i",
+ "iso-8859-8-i",
+ "logical"
+ };
+
+ private static final String NAME = "iso-8859-8-i";
+
+ static final Encoding INSTANCE = new Iso8I();
+
+ private Iso8I() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java
new file mode 100644
index 000000000..b6157bd8e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Koi8R extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u2500',
+ '\u2502',
+ '\u250c',
+ '\u2510',
+ '\u2514',
+ '\u2518',
+ '\u251c',
+ '\u2524',
+ '\u252c',
+ '\u2534',
+ '\u253c',
+ '\u2580',
+ '\u2584',
+ '\u2588',
+ '\u258c',
+ '\u2590',
+ '\u2591',
+ '\u2592',
+ '\u2593',
+ '\u2320',
+ '\u25a0',
+ '\u2219',
+ '\u221a',
+ '\u2248',
+ '\u2264',
+ '\u2265',
+ '\u00a0',
+ '\u2321',
+ '\u00b0',
+ '\u00b2',
+ '\u00b7',
+ '\u00f7',
+ '\u2550',
+ '\u2551',
+ '\u2552',
+ '\u0451',
+ '\u2553',
+ '\u2554',
+ '\u2555',
+ '\u2556',
+ '\u2557',
+ '\u2558',
+ '\u2559',
+ '\u255a',
+ '\u255b',
+ '\u255c',
+ '\u255d',
+ '\u255e',
+ '\u255f',
+ '\u2560',
+ '\u2561',
+ '\u0401',
+ '\u2562',
+ '\u2563',
+ '\u2564',
+ '\u2565',
+ '\u2566',
+ '\u2567',
+ '\u2568',
+ '\u2569',
+ '\u256a',
+ '\u256b',
+ '\u256c',
+ '\u00a9',
+ '\u044e',
+ '\u0430',
+ '\u0431',
+ '\u0446',
+ '\u0434',
+ '\u0435',
+ '\u0444',
+ '\u0433',
+ '\u0445',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u044f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0436',
+ '\u0432',
+ '\u044c',
+ '\u044b',
+ '\u0437',
+ '\u0448',
+ '\u044d',
+ '\u0449',
+ '\u0447',
+ '\u044a',
+ '\u042e',
+ '\u0410',
+ '\u0411',
+ '\u0426',
+ '\u0414',
+ '\u0415',
+ '\u0424',
+ '\u0413',
+ '\u0425',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u042f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0416',
+ '\u0412',
+ '\u042c',
+ '\u042b',
+ '\u0417',
+ '\u0428',
+ '\u042d',
+ '\u0429',
+ '\u0427',
+ '\u042a'
+ };
+
+ private static final String[] LABELS = {
+ "cskoi8r",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8_r"
+ };
+
+ private static final String NAME = "koi8-r";
+
+ static final Encoding INSTANCE = new Koi8R();
+
+ private Koi8R() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java
new file mode 100644
index 000000000..8150838d3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Koi8U extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u2500',
+ '\u2502',
+ '\u250c',
+ '\u2510',
+ '\u2514',
+ '\u2518',
+ '\u251c',
+ '\u2524',
+ '\u252c',
+ '\u2534',
+ '\u253c',
+ '\u2580',
+ '\u2584',
+ '\u2588',
+ '\u258c',
+ '\u2590',
+ '\u2591',
+ '\u2592',
+ '\u2593',
+ '\u2320',
+ '\u25a0',
+ '\u2219',
+ '\u221a',
+ '\u2248',
+ '\u2264',
+ '\u2265',
+ '\u00a0',
+ '\u2321',
+ '\u00b0',
+ '\u00b2',
+ '\u00b7',
+ '\u00f7',
+ '\u2550',
+ '\u2551',
+ '\u2552',
+ '\u0451',
+ '\u0454',
+ '\u2554',
+ '\u0456',
+ '\u0457',
+ '\u2557',
+ '\u2558',
+ '\u2559',
+ '\u255a',
+ '\u255b',
+ '\u0491',
+ '\u045e',
+ '\u255e',
+ '\u255f',
+ '\u2560',
+ '\u2561',
+ '\u0401',
+ '\u0404',
+ '\u2563',
+ '\u0406',
+ '\u0407',
+ '\u2566',
+ '\u2567',
+ '\u2568',
+ '\u2569',
+ '\u256a',
+ '\u0490',
+ '\u040e',
+ '\u00a9',
+ '\u044e',
+ '\u0430',
+ '\u0431',
+ '\u0446',
+ '\u0434',
+ '\u0435',
+ '\u0444',
+ '\u0433',
+ '\u0445',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u044f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0436',
+ '\u0432',
+ '\u044c',
+ '\u044b',
+ '\u0437',
+ '\u0448',
+ '\u044d',
+ '\u0449',
+ '\u0447',
+ '\u044a',
+ '\u042e',
+ '\u0410',
+ '\u0411',
+ '\u0426',
+ '\u0414',
+ '\u0415',
+ '\u0424',
+ '\u0413',
+ '\u0425',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u042f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0416',
+ '\u0412',
+ '\u042c',
+ '\u042b',
+ '\u0417',
+ '\u0428',
+ '\u042d',
+ '\u0429',
+ '\u0427',
+ '\u042a'
+ };
+
+ private static final String[] LABELS = {
+ "koi8-ru",
+ "koi8-u"
+ };
+
+ private static final String NAME = "koi8-u";
+
+ static final Encoding INSTANCE = new Koi8U();
+
+ private Koi8U() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java
new file mode 100644
index 000000000..f46546ce2
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class MacCyrillic extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u2020',
+ '\u00b0',
+ '\u0490',
+ '\u00a3',
+ '\u00a7',
+ '\u2022',
+ '\u00b6',
+ '\u0406',
+ '\u00ae',
+ '\u00a9',
+ '\u2122',
+ '\u0402',
+ '\u0452',
+ '\u2260',
+ '\u0403',
+ '\u0453',
+ '\u221e',
+ '\u00b1',
+ '\u2264',
+ '\u2265',
+ '\u0456',
+ '\u00b5',
+ '\u0491',
+ '\u0408',
+ '\u0404',
+ '\u0454',
+ '\u0407',
+ '\u0457',
+ '\u0409',
+ '\u0459',
+ '\u040a',
+ '\u045a',
+ '\u0458',
+ '\u0405',
+ '\u00ac',
+ '\u221a',
+ '\u0192',
+ '\u2248',
+ '\u2206',
+ '\u00ab',
+ '\u00bb',
+ '\u2026',
+ '\u00a0',
+ '\u040b',
+ '\u045b',
+ '\u040c',
+ '\u045c',
+ '\u0455',
+ '\u2013',
+ '\u2014',
+ '\u201c',
+ '\u201d',
+ '\u2018',
+ '\u2019',
+ '\u00f7',
+ '\u201e',
+ '\u040e',
+ '\u045e',
+ '\u040f',
+ '\u045f',
+ '\u2116',
+ '\u0401',
+ '\u0451',
+ '\u044f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u20ac'
+ };
+
+ private static final String[] LABELS = {
+ "x-mac-cyrillic",
+ "x-mac-ukrainian"
+ };
+
+ private static final String NAME = "x-mac-cyrillic";
+
+ static final Encoding INSTANCE = new MacCyrillic();
+
+ private MacCyrillic() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java
new file mode 100644
index 000000000..70e356f23
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Macintosh extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u00c4',
+ '\u00c5',
+ '\u00c7',
+ '\u00c9',
+ '\u00d1',
+ '\u00d6',
+ '\u00dc',
+ '\u00e1',
+ '\u00e0',
+ '\u00e2',
+ '\u00e4',
+ '\u00e3',
+ '\u00e5',
+ '\u00e7',
+ '\u00e9',
+ '\u00e8',
+ '\u00ea',
+ '\u00eb',
+ '\u00ed',
+ '\u00ec',
+ '\u00ee',
+ '\u00ef',
+ '\u00f1',
+ '\u00f3',
+ '\u00f2',
+ '\u00f4',
+ '\u00f6',
+ '\u00f5',
+ '\u00fa',
+ '\u00f9',
+ '\u00fb',
+ '\u00fc',
+ '\u2020',
+ '\u00b0',
+ '\u00a2',
+ '\u00a3',
+ '\u00a7',
+ '\u2022',
+ '\u00b6',
+ '\u00df',
+ '\u00ae',
+ '\u00a9',
+ '\u2122',
+ '\u00b4',
+ '\u00a8',
+ '\u2260',
+ '\u00c6',
+ '\u00d8',
+ '\u221e',
+ '\u00b1',
+ '\u2264',
+ '\u2265',
+ '\u00a5',
+ '\u00b5',
+ '\u2202',
+ '\u2211',
+ '\u220f',
+ '\u03c0',
+ '\u222b',
+ '\u00aa',
+ '\u00ba',
+ '\u03a9',
+ '\u00e6',
+ '\u00f8',
+ '\u00bf',
+ '\u00a1',
+ '\u00ac',
+ '\u221a',
+ '\u0192',
+ '\u2248',
+ '\u2206',
+ '\u00ab',
+ '\u00bb',
+ '\u2026',
+ '\u00a0',
+ '\u00c0',
+ '\u00c3',
+ '\u00d5',
+ '\u0152',
+ '\u0153',
+ '\u2013',
+ '\u2014',
+ '\u201c',
+ '\u201d',
+ '\u2018',
+ '\u2019',
+ '\u00f7',
+ '\u25ca',
+ '\u00ff',
+ '\u0178',
+ '\u2044',
+ '\u20ac',
+ '\u2039',
+ '\u203a',
+ '\ufb01',
+ '\ufb02',
+ '\u2021',
+ '\u00b7',
+ '\u201a',
+ '\u201e',
+ '\u2030',
+ '\u00c2',
+ '\u00ca',
+ '\u00c1',
+ '\u00cb',
+ '\u00c8',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00cc',
+ '\u00d3',
+ '\u00d4',
+ '\uf8ff',
+ '\u00d2',
+ '\u00da',
+ '\u00db',
+ '\u00d9',
+ '\u0131',
+ '\u02c6',
+ '\u02dc',
+ '\u00af',
+ '\u02d8',
+ '\u02d9',
+ '\u02da',
+ '\u00b8',
+ '\u02dd',
+ '\u02db',
+ '\u02c7'
+ };
+
+ private static final String[] LABELS = {
+ "csmacintosh",
+ "mac",
+ "macintosh",
+ "x-mac-roman"
+ };
+
+ private static final String NAME = "macintosh";
+
+ static final Encoding INSTANCE = new Macintosh();
+
+ private Macintosh() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java
new file mode 100644
index 000000000..abb6e24e7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Replacement extends Encoding {
+
+ private static final String[] LABELS = {
+ "csiso2022kr",
+ "hz-gb-2312",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-kr"
+ };
+
+ private static final String NAME = "replacement";
+
+ static final Replacement INSTANCE = new Replacement();
+
+ private Replacement() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new ReplacementDecoder(this);
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java
new file mode 100644
index 000000000..f6f2448f6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CoderResult;
+
+class ReplacementDecoder extends Decoder {
+
+ private boolean haveEmitted = false;
+
+ ReplacementDecoder(Charset cs) {
+ super(cs, 1.0f, 1.0f);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (haveEmitted) {
+ in.position(in.limit());
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ in.position(in.limit());
+ haveEmitted = true;
+ if (this.report) {
+ return CoderResult.malformedForLength(1);
+ }
+ out.put('\uFFFD');
+ }
+ }
+
+ /**
+ * @see java.nio.charset.CharsetDecoder#implFlush(java.nio.CharBuffer)
+ */
+ @Override protected CoderResult implFlush(CharBuffer out) {
+ // TODO Auto-generated method stub
+ return super.implFlush(out);
+ }
+
+ /**
+ * @see java.nio.charset.CharsetDecoder#implReset()
+ */
+ @Override protected void implReset() {
+ // TODO Auto-generated method stub
+ super.implReset();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java
new file mode 100644
index 000000000..6638eab39
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class ShiftJis extends Encoding {
+
+ private static final String[] LABELS = {
+ "csshiftjis",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "windows-31j",
+ "x-sjis"
+ };
+
+ private static final String NAME = "shift_jis";
+
+ static final ShiftJis INSTANCE = new ShiftJis();
+
+ private ShiftJis() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java
new file mode 100644
index 000000000..61534cb28
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class UserDefined extends Encoding {
+
+ private static final String[] LABELS = {
+ "x-user-defined"
+ };
+
+ private static final String NAME = "x-user-defined";
+
+ static final UserDefined INSTANCE = new UserDefined();
+
+ private UserDefined() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new UserDefinedDecoder(this);
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java
new file mode 100644
index 000000000..c14ca8627
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+
+class UserDefinedDecoder extends Decoder {
+
+ UserDefinedDecoder(Charset cs) {
+ super(cs, 1.0f, 1.0f);
+ }
+
+ @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ // TODO figure out if it's worthwhile to optimize the case where both
+ // buffers are array-backed.
+ for (;;) {
+ if (!in.hasRemaining()) {
+ return CoderResult.UNDERFLOW;
+ }
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ int b = (int)in.get();
+ if (b >= 0) {
+ out.put((char)b);
+ } else {
+ out.put((char)(b + 128 + 0xF780));
+ }
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java
new file mode 100644
index 000000000..16c0d2fd5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Utf16Be extends Encoding {
+
+ private static final String[] LABELS = {
+ "utf-16be"
+ };
+
+ private static final String NAME = "utf-16be";
+
+ static final Utf16Be INSTANCE = new Utf16Be();
+
+ private Utf16Be() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java
new file mode 100644
index 000000000..7381235b5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Utf16Le extends Encoding {
+
+ private static final String[] LABELS = {
+ "utf-16",
+ "utf-16le"
+ };
+
+ private static final String NAME = "utf-16le";
+
+ static final Utf16Le INSTANCE = new Utf16Le();
+
+ private Utf16Le() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java
new file mode 100644
index 000000000..d6ea7b514
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+
+class Utf8 extends Encoding {
+
+ private static final String[] LABELS = {
+ "unicode-1-1-utf-8",
+ "utf-8",
+ "utf8"
+ };
+
+ private static final String NAME = "utf-8";
+
+ static final Utf8 INSTANCE = new Utf8();
+
+ private Utf8() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return Charset.forName(NAME).newDecoder();
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ return Charset.forName(NAME).newEncoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java
new file mode 100644
index 000000000..0b3f50875
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1250 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0083',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u0088',
+ '\u2030',
+ '\u0160',
+ '\u2039',
+ '\u015a',
+ '\u0164',
+ '\u017d',
+ '\u0179',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u0161',
+ '\u203a',
+ '\u015b',
+ '\u0165',
+ '\u017e',
+ '\u017a',
+ '\u00a0',
+ '\u02c7',
+ '\u02d8',
+ '\u0141',
+ '\u00a4',
+ '\u0104',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u015e',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u017b',
+ '\u00b0',
+ '\u00b1',
+ '\u02db',
+ '\u0142',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u0105',
+ '\u015f',
+ '\u00bb',
+ '\u013d',
+ '\u02dd',
+ '\u013e',
+ '\u017c',
+ '\u0154',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u0139',
+ '\u0106',
+ '\u00c7',
+ '\u010c',
+ '\u00c9',
+ '\u0118',
+ '\u00cb',
+ '\u011a',
+ '\u00cd',
+ '\u00ce',
+ '\u010e',
+ '\u0110',
+ '\u0143',
+ '\u0147',
+ '\u00d3',
+ '\u00d4',
+ '\u0150',
+ '\u00d6',
+ '\u00d7',
+ '\u0158',
+ '\u016e',
+ '\u00da',
+ '\u0170',
+ '\u00dc',
+ '\u00dd',
+ '\u0162',
+ '\u00df',
+ '\u0155',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u013a',
+ '\u0107',
+ '\u00e7',
+ '\u010d',
+ '\u00e9',
+ '\u0119',
+ '\u00eb',
+ '\u011b',
+ '\u00ed',
+ '\u00ee',
+ '\u010f',
+ '\u0111',
+ '\u0144',
+ '\u0148',
+ '\u00f3',
+ '\u00f4',
+ '\u0151',
+ '\u00f6',
+ '\u00f7',
+ '\u0159',
+ '\u016f',
+ '\u00fa',
+ '\u0171',
+ '\u00fc',
+ '\u00fd',
+ '\u0163',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "cp1250",
+ "windows-1250",
+ "x-cp1250"
+ };
+
+ private static final String NAME = "windows-1250";
+
+ static final Encoding INSTANCE = new Windows1250();
+
+ private Windows1250() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java
new file mode 100644
index 000000000..def5cf11e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1251 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u0402',
+ '\u0403',
+ '\u201a',
+ '\u0453',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u20ac',
+ '\u2030',
+ '\u0409',
+ '\u2039',
+ '\u040a',
+ '\u040c',
+ '\u040b',
+ '\u040f',
+ '\u0452',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u0459',
+ '\u203a',
+ '\u045a',
+ '\u045c',
+ '\u045b',
+ '\u045f',
+ '\u00a0',
+ '\u040e',
+ '\u045e',
+ '\u0408',
+ '\u00a4',
+ '\u0490',
+ '\u00a6',
+ '\u00a7',
+ '\u0401',
+ '\u00a9',
+ '\u0404',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u0407',
+ '\u00b0',
+ '\u00b1',
+ '\u0406',
+ '\u0456',
+ '\u0491',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u0451',
+ '\u2116',
+ '\u0454',
+ '\u00bb',
+ '\u0458',
+ '\u0405',
+ '\u0455',
+ '\u0457',
+ '\u0410',
+ '\u0411',
+ '\u0412',
+ '\u0413',
+ '\u0414',
+ '\u0415',
+ '\u0416',
+ '\u0417',
+ '\u0418',
+ '\u0419',
+ '\u041a',
+ '\u041b',
+ '\u041c',
+ '\u041d',
+ '\u041e',
+ '\u041f',
+ '\u0420',
+ '\u0421',
+ '\u0422',
+ '\u0423',
+ '\u0424',
+ '\u0425',
+ '\u0426',
+ '\u0427',
+ '\u0428',
+ '\u0429',
+ '\u042a',
+ '\u042b',
+ '\u042c',
+ '\u042d',
+ '\u042e',
+ '\u042f',
+ '\u0430',
+ '\u0431',
+ '\u0432',
+ '\u0433',
+ '\u0434',
+ '\u0435',
+ '\u0436',
+ '\u0437',
+ '\u0438',
+ '\u0439',
+ '\u043a',
+ '\u043b',
+ '\u043c',
+ '\u043d',
+ '\u043e',
+ '\u043f',
+ '\u0440',
+ '\u0441',
+ '\u0442',
+ '\u0443',
+ '\u0444',
+ '\u0445',
+ '\u0446',
+ '\u0447',
+ '\u0448',
+ '\u0449',
+ '\u044a',
+ '\u044b',
+ '\u044c',
+ '\u044d',
+ '\u044e',
+ '\u044f'
+ };
+
+ private static final String[] LABELS = {
+ "cp1251",
+ "windows-1251",
+ "x-cp1251"
+ };
+
+ private static final String NAME = "windows-1251";
+
+ static final Encoding INSTANCE = new Windows1251();
+
+ private Windows1251() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java
new file mode 100644
index 000000000..4b3fa1ffa
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1252 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u0160',
+ '\u2039',
+ '\u0152',
+ '\u008d',
+ '\u017d',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u0161',
+ '\u203a',
+ '\u0153',
+ '\u009d',
+ '\u017e',
+ '\u0178',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u00d0',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u00dd',
+ '\u00de',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u00f0',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u00fd',
+ '\u00fe',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "ansi_x3.4-1968",
+ "ascii",
+ "cp1252",
+ "cp819",
+ "csisolatin1",
+ "ibm819",
+ "iso-8859-1",
+ "iso-ir-100",
+ "iso8859-1",
+ "iso88591",
+ "iso_8859-1",
+ "iso_8859-1:1987",
+ "l1",
+ "latin1",
+ "us-ascii",
+ "windows-1252",
+ "x-cp1252"
+ };
+
+ private static final String NAME = "windows-1252";
+
+ static final Encoding INSTANCE = new Windows1252();
+
+ private Windows1252() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java
new file mode 100644
index 000000000..c96e8630c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1253 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u0088',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0385',
+ '\u0386',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\ufffd',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u2015',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u0384',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u0388',
+ '\u0389',
+ '\u038a',
+ '\u00bb',
+ '\u038c',
+ '\u00bd',
+ '\u038e',
+ '\u038f',
+ '\u0390',
+ '\u0391',
+ '\u0392',
+ '\u0393',
+ '\u0394',
+ '\u0395',
+ '\u0396',
+ '\u0397',
+ '\u0398',
+ '\u0399',
+ '\u039a',
+ '\u039b',
+ '\u039c',
+ '\u039d',
+ '\u039e',
+ '\u039f',
+ '\u03a0',
+ '\u03a1',
+ '\ufffd',
+ '\u03a3',
+ '\u03a4',
+ '\u03a5',
+ '\u03a6',
+ '\u03a7',
+ '\u03a8',
+ '\u03a9',
+ '\u03aa',
+ '\u03ab',
+ '\u03ac',
+ '\u03ad',
+ '\u03ae',
+ '\u03af',
+ '\u03b0',
+ '\u03b1',
+ '\u03b2',
+ '\u03b3',
+ '\u03b4',
+ '\u03b5',
+ '\u03b6',
+ '\u03b7',
+ '\u03b8',
+ '\u03b9',
+ '\u03ba',
+ '\u03bb',
+ '\u03bc',
+ '\u03bd',
+ '\u03be',
+ '\u03bf',
+ '\u03c0',
+ '\u03c1',
+ '\u03c2',
+ '\u03c3',
+ '\u03c4',
+ '\u03c5',
+ '\u03c6',
+ '\u03c7',
+ '\u03c8',
+ '\u03c9',
+ '\u03ca',
+ '\u03cb',
+ '\u03cc',
+ '\u03cd',
+ '\u03ce',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "cp1253",
+ "windows-1253",
+ "x-cp1253"
+ };
+
+ private static final String NAME = "windows-1253";
+
+ static final Encoding INSTANCE = new Windows1253();
+
+ private Windows1253() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java
new file mode 100644
index 000000000..fc3aa9839
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1254 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u0160',
+ '\u2039',
+ '\u0152',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u0161',
+ '\u203a',
+ '\u0153',
+ '\u009d',
+ '\u009e',
+ '\u0178',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u00c3',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u00cc',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u011e',
+ '\u00d1',
+ '\u00d2',
+ '\u00d3',
+ '\u00d4',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u0130',
+ '\u015e',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u00e3',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u00ec',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u011f',
+ '\u00f1',
+ '\u00f2',
+ '\u00f3',
+ '\u00f4',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u0131',
+ '\u015f',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "cp1254",
+ "csisolatin5",
+ "iso-8859-9",
+ "iso-ir-148",
+ "iso8859-9",
+ "iso88599",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "l5",
+ "latin5",
+ "windows-1254",
+ "x-cp1254"
+ };
+
+ private static final String NAME = "windows-1254";
+
+ static final Encoding INSTANCE = new Windows1254();
+
+ private Windows1254() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java
new file mode 100644
index 000000000..957203d80
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1255 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u20aa',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00d7',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00f7',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u05b0',
+ '\u05b1',
+ '\u05b2',
+ '\u05b3',
+ '\u05b4',
+ '\u05b5',
+ '\u05b6',
+ '\u05b7',
+ '\u05b8',
+ '\u05b9',
+ '\ufffd',
+ '\u05bb',
+ '\u05bc',
+ '\u05bd',
+ '\u05be',
+ '\u05bf',
+ '\u05c0',
+ '\u05c1',
+ '\u05c2',
+ '\u05c3',
+ '\u05f0',
+ '\u05f1',
+ '\u05f2',
+ '\u05f3',
+ '\u05f4',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u05d0',
+ '\u05d1',
+ '\u05d2',
+ '\u05d3',
+ '\u05d4',
+ '\u05d5',
+ '\u05d6',
+ '\u05d7',
+ '\u05d8',
+ '\u05d9',
+ '\u05da',
+ '\u05db',
+ '\u05dc',
+ '\u05dd',
+ '\u05de',
+ '\u05df',
+ '\u05e0',
+ '\u05e1',
+ '\u05e2',
+ '\u05e3',
+ '\u05e4',
+ '\u05e5',
+ '\u05e6',
+ '\u05e7',
+ '\u05e8',
+ '\u05e9',
+ '\u05ea',
+ '\ufffd',
+ '\ufffd',
+ '\u200e',
+ '\u200f',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "cp1255",
+ "windows-1255",
+ "x-cp1255"
+ };
+
+ private static final String NAME = "windows-1255";
+
+ static final Encoding INSTANCE = new Windows1255();
+
+ private Windows1255() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java
new file mode 100644
index 000000000..87d805e1e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1256 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u067e',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u0679',
+ '\u2039',
+ '\u0152',
+ '\u0686',
+ '\u0698',
+ '\u0688',
+ '\u06af',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u06a9',
+ '\u2122',
+ '\u0691',
+ '\u203a',
+ '\u0153',
+ '\u200c',
+ '\u200d',
+ '\u06ba',
+ '\u00a0',
+ '\u060c',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u06be',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u061b',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u061f',
+ '\u06c1',
+ '\u0621',
+ '\u0622',
+ '\u0623',
+ '\u0624',
+ '\u0625',
+ '\u0626',
+ '\u0627',
+ '\u0628',
+ '\u0629',
+ '\u062a',
+ '\u062b',
+ '\u062c',
+ '\u062d',
+ '\u062e',
+ '\u062f',
+ '\u0630',
+ '\u0631',
+ '\u0632',
+ '\u0633',
+ '\u0634',
+ '\u0635',
+ '\u0636',
+ '\u00d7',
+ '\u0637',
+ '\u0638',
+ '\u0639',
+ '\u063a',
+ '\u0640',
+ '\u0641',
+ '\u0642',
+ '\u0643',
+ '\u00e0',
+ '\u0644',
+ '\u00e2',
+ '\u0645',
+ '\u0646',
+ '\u0647',
+ '\u0648',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u0649',
+ '\u064a',
+ '\u00ee',
+ '\u00ef',
+ '\u064b',
+ '\u064c',
+ '\u064d',
+ '\u064e',
+ '\u00f4',
+ '\u064f',
+ '\u0650',
+ '\u00f7',
+ '\u0651',
+ '\u00f9',
+ '\u0652',
+ '\u00fb',
+ '\u00fc',
+ '\u200e',
+ '\u200f',
+ '\u06d2'
+ };
+
+ private static final String[] LABELS = {
+ "cp1256",
+ "windows-1256",
+ "x-cp1256"
+ };
+
+ private static final String NAME = "windows-1256";
+
+ static final Encoding INSTANCE = new Windows1256();
+
+ private Windows1256() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java
new file mode 100644
index 000000000..140e9b458
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1257 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0083',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u0088',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u008c',
+ '\u00a8',
+ '\u02c7',
+ '\u00b8',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u009c',
+ '\u00af',
+ '\u02db',
+ '\u009f',
+ '\u00a0',
+ '\ufffd',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\ufffd',
+ '\u00a6',
+ '\u00a7',
+ '\u00d8',
+ '\u00a9',
+ '\u0156',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00c6',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00f8',
+ '\u00b9',
+ '\u0157',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00e6',
+ '\u0104',
+ '\u012e',
+ '\u0100',
+ '\u0106',
+ '\u00c4',
+ '\u00c5',
+ '\u0118',
+ '\u0112',
+ '\u010c',
+ '\u00c9',
+ '\u0179',
+ '\u0116',
+ '\u0122',
+ '\u0136',
+ '\u012a',
+ '\u013b',
+ '\u0160',
+ '\u0143',
+ '\u0145',
+ '\u00d3',
+ '\u014c',
+ '\u00d5',
+ '\u00d6',
+ '\u00d7',
+ '\u0172',
+ '\u0141',
+ '\u015a',
+ '\u016a',
+ '\u00dc',
+ '\u017b',
+ '\u017d',
+ '\u00df',
+ '\u0105',
+ '\u012f',
+ '\u0101',
+ '\u0107',
+ '\u00e4',
+ '\u00e5',
+ '\u0119',
+ '\u0113',
+ '\u010d',
+ '\u00e9',
+ '\u017a',
+ '\u0117',
+ '\u0123',
+ '\u0137',
+ '\u012b',
+ '\u013c',
+ '\u0161',
+ '\u0144',
+ '\u0146',
+ '\u00f3',
+ '\u014d',
+ '\u00f5',
+ '\u00f6',
+ '\u00f7',
+ '\u0173',
+ '\u0142',
+ '\u015b',
+ '\u016b',
+ '\u00fc',
+ '\u017c',
+ '\u017e',
+ '\u02d9'
+ };
+
+ private static final String[] LABELS = {
+ "cp1257",
+ "windows-1257",
+ "x-cp1257"
+ };
+
+ private static final String NAME = "windows-1257";
+
+ static final Encoding INSTANCE = new Windows1257();
+
+ private Windows1257() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java
new file mode 100644
index 000000000..130107789
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows1258 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u201a',
+ '\u0192',
+ '\u201e',
+ '\u2026',
+ '\u2020',
+ '\u2021',
+ '\u02c6',
+ '\u2030',
+ '\u008a',
+ '\u2039',
+ '\u0152',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u02dc',
+ '\u2122',
+ '\u009a',
+ '\u203a',
+ '\u0153',
+ '\u009d',
+ '\u009e',
+ '\u0178',
+ '\u00a0',
+ '\u00a1',
+ '\u00a2',
+ '\u00a3',
+ '\u00a4',
+ '\u00a5',
+ '\u00a6',
+ '\u00a7',
+ '\u00a8',
+ '\u00a9',
+ '\u00aa',
+ '\u00ab',
+ '\u00ac',
+ '\u00ad',
+ '\u00ae',
+ '\u00af',
+ '\u00b0',
+ '\u00b1',
+ '\u00b2',
+ '\u00b3',
+ '\u00b4',
+ '\u00b5',
+ '\u00b6',
+ '\u00b7',
+ '\u00b8',
+ '\u00b9',
+ '\u00ba',
+ '\u00bb',
+ '\u00bc',
+ '\u00bd',
+ '\u00be',
+ '\u00bf',
+ '\u00c0',
+ '\u00c1',
+ '\u00c2',
+ '\u0102',
+ '\u00c4',
+ '\u00c5',
+ '\u00c6',
+ '\u00c7',
+ '\u00c8',
+ '\u00c9',
+ '\u00ca',
+ '\u00cb',
+ '\u0300',
+ '\u00cd',
+ '\u00ce',
+ '\u00cf',
+ '\u0110',
+ '\u00d1',
+ '\u0309',
+ '\u00d3',
+ '\u00d4',
+ '\u01a0',
+ '\u00d6',
+ '\u00d7',
+ '\u00d8',
+ '\u00d9',
+ '\u00da',
+ '\u00db',
+ '\u00dc',
+ '\u01af',
+ '\u0303',
+ '\u00df',
+ '\u00e0',
+ '\u00e1',
+ '\u00e2',
+ '\u0103',
+ '\u00e4',
+ '\u00e5',
+ '\u00e6',
+ '\u00e7',
+ '\u00e8',
+ '\u00e9',
+ '\u00ea',
+ '\u00eb',
+ '\u0301',
+ '\u00ed',
+ '\u00ee',
+ '\u00ef',
+ '\u0111',
+ '\u00f1',
+ '\u0323',
+ '\u00f3',
+ '\u00f4',
+ '\u01a1',
+ '\u00f6',
+ '\u00f7',
+ '\u00f8',
+ '\u00f9',
+ '\u00fa',
+ '\u00fb',
+ '\u00fc',
+ '\u01b0',
+ '\u20ab',
+ '\u00ff'
+ };
+
+ private static final String[] LABELS = {
+ "cp1258",
+ "windows-1258",
+ "x-cp1258"
+ };
+
+ private static final String NAME = "windows-1258";
+
+ static final Encoding INSTANCE = new Windows1258();
+
+ private Windows1258() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new InfallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java
new file mode 100644
index 000000000..f93be0175
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2013-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+ * Instead, please regenerate using generate-encoding-data.py
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.CharsetDecoder;
+
+class Windows874 extends Encoding {
+
+ private static final char[] TABLE = {
+ '\u20ac',
+ '\u0081',
+ '\u0082',
+ '\u0083',
+ '\u0084',
+ '\u2026',
+ '\u0086',
+ '\u0087',
+ '\u0088',
+ '\u0089',
+ '\u008a',
+ '\u008b',
+ '\u008c',
+ '\u008d',
+ '\u008e',
+ '\u008f',
+ '\u0090',
+ '\u2018',
+ '\u2019',
+ '\u201c',
+ '\u201d',
+ '\u2022',
+ '\u2013',
+ '\u2014',
+ '\u0098',
+ '\u0099',
+ '\u009a',
+ '\u009b',
+ '\u009c',
+ '\u009d',
+ '\u009e',
+ '\u009f',
+ '\u00a0',
+ '\u0e01',
+ '\u0e02',
+ '\u0e03',
+ '\u0e04',
+ '\u0e05',
+ '\u0e06',
+ '\u0e07',
+ '\u0e08',
+ '\u0e09',
+ '\u0e0a',
+ '\u0e0b',
+ '\u0e0c',
+ '\u0e0d',
+ '\u0e0e',
+ '\u0e0f',
+ '\u0e10',
+ '\u0e11',
+ '\u0e12',
+ '\u0e13',
+ '\u0e14',
+ '\u0e15',
+ '\u0e16',
+ '\u0e17',
+ '\u0e18',
+ '\u0e19',
+ '\u0e1a',
+ '\u0e1b',
+ '\u0e1c',
+ '\u0e1d',
+ '\u0e1e',
+ '\u0e1f',
+ '\u0e20',
+ '\u0e21',
+ '\u0e22',
+ '\u0e23',
+ '\u0e24',
+ '\u0e25',
+ '\u0e26',
+ '\u0e27',
+ '\u0e28',
+ '\u0e29',
+ '\u0e2a',
+ '\u0e2b',
+ '\u0e2c',
+ '\u0e2d',
+ '\u0e2e',
+ '\u0e2f',
+ '\u0e30',
+ '\u0e31',
+ '\u0e32',
+ '\u0e33',
+ '\u0e34',
+ '\u0e35',
+ '\u0e36',
+ '\u0e37',
+ '\u0e38',
+ '\u0e39',
+ '\u0e3a',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\u0e3f',
+ '\u0e40',
+ '\u0e41',
+ '\u0e42',
+ '\u0e43',
+ '\u0e44',
+ '\u0e45',
+ '\u0e46',
+ '\u0e47',
+ '\u0e48',
+ '\u0e49',
+ '\u0e4a',
+ '\u0e4b',
+ '\u0e4c',
+ '\u0e4d',
+ '\u0e4e',
+ '\u0e4f',
+ '\u0e50',
+ '\u0e51',
+ '\u0e52',
+ '\u0e53',
+ '\u0e54',
+ '\u0e55',
+ '\u0e56',
+ '\u0e57',
+ '\u0e58',
+ '\u0e59',
+ '\u0e5a',
+ '\u0e5b',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd',
+ '\ufffd'
+ };
+
+ private static final String[] LABELS = {
+ "dos-874",
+ "iso-8859-11",
+ "iso8859-11",
+ "iso885911",
+ "tis-620",
+ "windows-874"
+ };
+
+ private static final String NAME = "windows-874";
+
+ static final Encoding INSTANCE = new Windows874();
+
+ private Windows874() {
+ super(NAME, LABELS);
+ }
+
+ @Override public CharsetDecoder newDecoder() {
+ return new FallibleSingleByteDecoder(this, TABLE);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java
new file mode 100644
index 000000000..0967a5814
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+public @interface Auto {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java
new file mode 100644
index 000000000..bcb8a2b00
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+public @interface CharacterName {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java
new file mode 100644
index 000000000..2ba7f418a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Marker for translating into the C++ const keyword on the declaration in
+ * question.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Const {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java
new file mode 100644
index 000000000..117da8d3c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The type for attribute IDness. (In Java, an interned string
+ * <code>"CDATA"</code> or <code>"ID"</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface IdType {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java
new file mode 100644
index 000000000..cc0728b1b
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Translates into the C++ inline keyword.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Inline {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java
new file mode 100644
index 000000000..44444d525
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Marks a string type as being the literal string type (typically const char*)
+ * in C++.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Literal {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java
new file mode 100644
index 000000000..1f91ba93b
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The local name of an element or attribute. Must be comparable with
+ * <code>==</code> (interned <code>String</code> in Java).
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Local {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java
new file mode 100644
index 000000000..cf011d33e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The array type marked with this annotation won't have its
+ * <code>.length</code> read.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface NoLength {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java
new file mode 100644
index 000000000..03baa75f5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The namespace URI type. (In Java, an interned <code>String</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface NsUri {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java
new file mode 100644
index 000000000..268e531a3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The type for namespace prefixes. (In Java, an interned <code>String</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Prefix {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java
new file mode 100644
index 000000000..e6d4807b6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * The type for qualified names. (In Java, an interned <code>String</code>.)
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface QName {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java
new file mode 100644
index 000000000..e293e1af5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.annotation;
+
+/**
+ * Marks a method as virtualy in C++.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public @interface Virtual {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html
new file mode 100644
index 000000000..af15d3827
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2008 Mozilla Foundation
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides annotations for facilitating automated translation
+of the source code into other programming languages.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java
new file mode 100644
index 000000000..f3b3e74ca
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import java.io.IOException;
+
+/**
+ * An interface for providing a method for reading a stream of bytes one byte at
+ * a time.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface ByteReadable {
+ /**
+ * Returns the value of the next byte as an integer from 0 to 0xFF or -1 if
+ * the stream has ended.
+ *
+ * @return integer from 0 to 0xFF or -1 on EOF
+ * @throws IOException
+ */
+ public int readByte() throws IOException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java
new file mode 100644
index 000000000..4a5769f54
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import org.xml.sax.SAXException;
+
+/**
+ * An interface for receiving notifications of UTF-16 code units read from a character stream.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface CharacterHandler {
+
+ /**
+ * Receive notification of a run of UTF-16 code units.
+ * @param ch the buffer
+ * @param start start index in the buffer
+ * @param length the number of characters to process starting from <code>start</code>
+ * @throws SAXException if things go wrong
+ */
+ public void characters(char[] ch, int start, int length)
+ throws SAXException;
+
+ /**
+ * Signals the end of the stream. Can be used for cleanup. Doesn't mean that the stream ended successfully.
+ *
+ * @throws SAXException if things go wrong
+ */
+ public void end() throws SAXException;
+
+ /**
+ * Signals the start of the stream. Can be used for setup.
+ *
+ * @throws SAXException if things go wrong
+ */
+ public void start() throws SAXException;
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java
new file mode 100644
index 000000000..a34af51fa
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Used for indicating desired behavior with legacy doctypes.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum DoctypeExpectation {
+ /**
+ * Be a pure HTML5 parser.
+ */
+ HTML,
+
+ /**
+ * Require the HTML 4.01 Transitional public id. Turn on HTML4-specific
+ * additional errors regardless of doctype.
+ */
+ HTML401_TRANSITIONAL,
+
+ /**
+ * Require the HTML 4.01 Transitional public id and a system id. Turn on
+ * HTML4-specific additional errors regardless of doctype.
+ */
+ HTML401_STRICT,
+
+ /**
+ * Treat the doctype required by HTML 5, doctypes with the HTML 4.01 Strict
+ * public id and doctypes with the HTML 4.01 Transitional public id and a
+ * system id as non-errors. Turn on HTML4-specific additional errors if the
+ * public id is the HTML 4.01 Strict or Transitional public id.
+ */
+ AUTO,
+
+ /**
+ * Never enable HTML4-specific error checks. Never report any doctype
+ * condition as an error. (Doctype tokens in wrong places will be
+ * reported as errors, though.) The application may decide what to log
+ * in response to calls to <code>DocumentModeHanler</code>. This mode
+ * in meant for doing surveys on existing content.
+ */
+ NO_DOCTYPE_ERRORS
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java
new file mode 100644
index 000000000..e30eddd87
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Represents the HTML document compatibility mode.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum DocumentMode {
+ /**
+ * The Standards Mode
+ */
+ STANDARDS_MODE,
+
+ /**
+ * The Limited Quirks Mode aka. The Almost Standards Mode
+ */
+ ALMOST_STANDARDS_MODE,
+
+ /**
+ * The Quirks Mode
+ */
+ QUIRKS_MODE
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java
new file mode 100644
index 000000000..55377e0e4
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+
+import org.xml.sax.SAXException;
+
+/**
+ * A callback interface for receiving notification about the document mode.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface DocumentModeHandler {
+
+ /**
+ * Receive notification of the document mode.
+ *
+ * @param mode the document mode
+ * @param publicIdentifier the public id of the doctype or <code>null</code> if unavailable
+ * @param systemIdentifier the system id of the doctype or <code>null</code> if unavailable
+ * @param html4SpecificAdditionalErrorChecks <code>true</code> if HTML 4-specific checks were enabled, <code>false</code> otherwise
+ * @throws SAXException if things go wrong
+ */
+ public void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java
new file mode 100644
index 000000000..6f185aeaf
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import org.xml.sax.SAXException;
+
+/**
+ * An interface for communicating about character encoding names with the
+ * environment of the parser.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface EncodingDeclarationHandler {
+
+ /**
+ * Indicates that the parser has found an internal encoding declaration with
+ * the charset value <code>charset</code>.
+ *
+ * @param charset
+ * the charset name found.
+ * @return <code>true</code> if the value of <code>charset</code> was an
+ * encoding name for a supported ASCII-superset encoding.
+ * @throws SAXException
+ * if something went wrong
+ */
+ public boolean internalEncodingDeclaration(String charset) throws SAXException;
+
+ /**
+ * Queries the environment for the encoding in use (for error reporting).
+ *
+ * @return the encoding in use
+ * @throws SAXException
+ * if something went wrong
+ */
+ public String getCharacterEncoding() throws SAXException;
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java
new file mode 100644
index 000000000..40f15ce7d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Indicates a request for character encoding sniffer choice.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum Heuristics {
+
+ /**
+ * Perform no heuristic sniffing.
+ */
+ NONE,
+
+ /**
+ * Use both jchardet and ICU4J.
+ */
+ ALL,
+
+ /**
+ * Use jchardet only.
+ */
+ CHARDET,
+
+ /**
+ * Use ICU4J only.
+ */
+ ICU
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java
new file mode 100644
index 000000000..deab4c60f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * A placeholder type that translates into the type of the C++ class that
+ * implements an interning service for local names (<code>@Local</code> in
+ * Java).
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface Interner {
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java
new file mode 100644
index 000000000..18f49e99d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import nu.validator.htmlparser.annotation.Const;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.impl.ElementName;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.Tokenizer;
+
+import org.xml.sax.SAXException;
+
+/**
+ * <code>Tokenizer</code> reports tokens through this interface.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface TokenHandler {
+
+ /**
+ * This method is called at the start of tokenization before any other
+ * methods on this interface are called. Implementations should hold the
+ * reference to the <code>Tokenizer</code> in order to set the content
+ * model flag and in order to be able to query for <code>Locator</code>
+ * data.
+ *
+ * @param self
+ * the <code>Tokenizer</code>.
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void startTokenization(Tokenizer self) throws SAXException;
+
+ /**
+ * If this handler implementation cares about comments, return
+ * <code>true</code>. If not, return <code>false</code>.
+ *
+ * @return whether this handler wants comments
+ * @throws SAXException
+ * if something went wrong
+ */
+ public boolean wantsComments() throws SAXException;
+
+ /**
+ * Receive a doctype token.
+ *
+ * @param name
+ * the name
+ * @param publicIdentifier
+ * the public id
+ * @param systemIdentifier
+ * the system id
+ * @param forceQuirks
+ * whether the token is correct
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void doctype(String name, String publicIdentifier,
+ String systemIdentifier, boolean forceQuirks) throws SAXException;
+
+ /**
+ * Receive a start tag token.
+ *
+ * @param eltName
+ * the tag name
+ * @param attributes
+ * the attributes
+ * @param selfClosing
+ * TODO
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void startTag(ElementName eltName, HtmlAttributes attributes,
+ boolean selfClosing) throws SAXException;
+
+ /**
+ * Receive an end tag token.
+ *
+ * @param eltName
+ * the tag name
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void endTag(ElementName eltName) throws SAXException;
+
+ /**
+ * Receive a comment token. The data is junk if the
+ * <code>wantsComments()</code> returned <code>false</code>.
+ *
+ * @param buf
+ * a buffer holding the data
+ * @param start the offset into the buffer
+ * @param length
+ * the number of code units to read
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void comment(@NoLength char[] buf, int start, int length) throws SAXException;
+
+ /**
+ * Receive character tokens. This method has the same semantics as the SAX
+ * method of the same name.
+ *
+ * @param buf
+ * a buffer holding the data
+ * @param start
+ * offset into the buffer
+ * @param length
+ * the number of code units to read
+ * @throws SAXException
+ * if something went wrong
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ public void characters(@Const @NoLength char[] buf, int start, int length)
+ throws SAXException;
+
+ /**
+ * Reports a U+0000 that's being turned into a U+FFFD.
+ *
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void zeroOriginatingReplacementCharacter() throws SAXException;
+
+ /**
+ * The end-of-file token.
+ *
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void eof() throws SAXException;
+
+ /**
+ * The perform final cleanup.
+ *
+ * @throws SAXException
+ * if something went wrong
+ */
+ public void endTokenization() throws SAXException;
+
+ /**
+ * Checks if the CDATA sections are allowed.
+ *
+ * @return <code>true</code> if CDATA sections are allowed
+ * @throws SAXException
+ * if something went wrong
+ */
+ public boolean cdataSectionAllowed() throws SAXException;
+
+ /**
+ * Notifies the token handler of the worst case amount of data to be
+ * reported via <code>characters()</code> and
+ * <code>zeroOriginatingReplacementCharacter()</code>.
+ *
+ * @param inputLength the maximum number of chars that can be reported
+ * via <code>characters()</code> and
+ * <code>zeroOriginatingReplacementCharacter()</code> before a new call to
+ * this method.
+ */
+ public void ensureBufferSpace(int inputLength) throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java
new file mode 100644
index 000000000..eec23c71c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+import org.xml.sax.SAXException;
+
+/**
+ * An interface for intercepting information about the state transitions that
+ * the tokenizer is making.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface TransitionHandler {
+
+ /**
+ * This method is called for every tokenizer state transition.
+ *
+ * @param from
+ * the state the tokenizer is transitioning from
+ * @param to
+ * the state being transitioned to
+ * @param reconsume
+ * <code>true</code> if the current input character is going to
+ * be reconsumed in the new state
+ * @param pos
+ * the current index into the input stream
+ * @throws SAXException
+ * if something went wrong
+ */
+ void transition(int from, int to, boolean reconsume, int pos)
+ throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java
new file mode 100644
index 000000000..c959df655
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.common;
+
+/**
+ * Policy for XML 1.0 violations.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum XmlViolationPolicy {
+ /**
+ * Conform to HTML 5, allow XML 1.0 to be violated.
+ */
+ ALLOW,
+
+ /**
+ * Halt when something cannot be mapped to XML 1.0.
+ */
+ FATAL,
+
+ /**
+ * Be non-conforming and alter the infoset to fit
+ * XML 1.0 when something would otherwise not be
+ * mappable to XML 1.0.
+ */
+ ALTER_INFOSET
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html
new file mode 100644
index 000000000..43f141cd8
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides common interfaces and enumerations.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java
new file mode 100644
index 000000000..2b8eff230
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.dom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.validator.htmlparser.impl.CoalescingTreeBuilder;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+
+import org.w3c.dom.DOMException;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.Text;
+import org.xml.sax.SAXException;
+
+/**
+ * The tree builder glue for building a tree through the public DOM APIs.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+class DOMTreeBuilder extends CoalescingTreeBuilder<Element> {
+
+ /**
+ * The DOM impl.
+ */
+ private DOMImplementation implementation;
+
+ /**
+ * The current doc.
+ */
+ private Document document;
+
+ /**
+ * The constructor.
+ *
+ * @param implementation
+ * the DOM impl.
+ */
+ protected DOMTreeBuilder(DOMImplementation implementation) {
+ super();
+ this.implementation = implementation;
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#addAttributesToElement(java.lang.Object,
+ * nu.validator.htmlparser.impl.HtmlAttributes)
+ */
+ @Override protected void addAttributesToElement(Element element,
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String localName = attributes.getLocalNameNoBoundsCheck(i);
+ String uri = attributes.getURINoBoundsCheck(i);
+ if (!element.hasAttributeNS(uri, localName)) {
+ element.setAttributeNS(uri, localName,
+ attributes.getValueNoBoundsCheck(i));
+ }
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCharacters(java.lang.Object,
+ * java.lang.String)
+ */
+ @Override protected void appendCharacters(Element parent, String text)
+ throws SAXException {
+ try {
+ Node lastChild = parent.getLastChild();
+ if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setData(lastAsText.getData() + text);
+ return;
+ }
+ parent.appendChild(document.createTextNode(text));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendChildrenToNewParent(java.lang.Object,
+ * java.lang.Object)
+ */
+ @Override protected void appendChildrenToNewParent(Element oldParent,
+ Element newParent) throws SAXException {
+ try {
+ while (oldParent.hasChildNodes()) {
+ newParent.appendChild(oldParent.getFirstChild());
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendComment(java.lang.Object,
+ * java.lang.String)
+ */
+ @Override protected void appendComment(Element parent, String comment)
+ throws SAXException {
+ try {
+ parent.appendChild(document.createComment(comment));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCommentToDocument(java.lang.String)
+ */
+ @Override protected void appendCommentToDocument(String comment)
+ throws SAXException {
+ try {
+ document.appendChild(document.createComment(comment));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, String, nu.validator.htmlparser.impl.HtmlAttributes, Object)
+ */
+ @Override protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element intendedParent) throws SAXException {
+ try {
+ Element rv = document.createElementNS(ns, name);
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.setAttributeNS(attributes.getURINoBoundsCheck(i),
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i));
+ if (attributes.getTypeNoBoundsCheck(i) == "ID") {
+ rv.setIdAttributeNS(null, attributes.getLocalName(i), true);
+ }
+ }
+ return rv;
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createHtmlElementSetAsRoot(nu.validator.htmlparser.impl.HtmlAttributes)
+ */
+ @Override protected Element createHtmlElementSetAsRoot(
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ Element rv = document.createElementNS(
+ "http://www.w3.org/1999/xhtml", "html");
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.setAttributeNS(attributes.getURINoBoundsCheck(i),
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i));
+ }
+ document.appendChild(rv);
+ return rv;
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendElement(java.lang.Object,
+ * java.lang.Object)
+ */
+ @Override protected void appendElement(Element child, Element newParent)
+ throws SAXException {
+ try {
+ newParent.appendChild(child);
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#hasChildren(java.lang.Object)
+ */
+ @Override protected boolean hasChildren(Element element)
+ throws SAXException {
+ try {
+ return element.hasChildNodes();
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String,
+ * java.lang.String, org.xml.sax.Attributes, java.lang.Object)
+ */
+ @Override protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException {
+ try {
+ Element rv = createElement(ns, name, attributes, intendedParent);
+ rv.setUserData("nu.validator.form-pointer", form, null);
+ return rv;
+ } catch (DOMException e) {
+ fatal(e);
+ return null;
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override protected void start(boolean fragment) throws SAXException {
+ document = implementation.createDocument(null, null, null);
+ }
+
+ /**
+ *
+ * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode,
+ * java.lang.String, java.lang.String, boolean)
+ */
+ protected void documentMode(DocumentMode mode, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+ document.setUserData("nu.validator.document-mode", mode, null);
+ }
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ Document getDocument() {
+ Document rv = document;
+ document = null;
+ return rv;
+ }
+
+ /**
+ * Return the document fragment.
+ *
+ * @return the document fragment
+ */
+ DocumentFragment getDocumentFragment() {
+ DocumentFragment rv = document.createDocumentFragment();
+ Node rootElt = document.getFirstChild();
+ while (rootElt.hasChildNodes()) {
+ rv.appendChild(rootElt.getFirstChild());
+ }
+ document = null;
+ return rv;
+ }
+
+ @Override
+ protected Element createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParentNode();
+ Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent);
+
+ if (parent != null) { // always an element if not null
+ parent.insertBefore(child, table);
+ } else {
+ stackParent.appendChild(child);
+ }
+
+ return child;
+ } catch (DOMException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override protected void insertFosterParentedCharacters(String text,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ Node previousSibling = table.getPreviousSibling();
+ if (previousSibling != null
+ && previousSibling.getNodeType() == Node.TEXT_NODE) {
+ Text lastAsText = (Text) previousSibling;
+ lastAsText.setData(lastAsText.getData() + text);
+ return;
+ }
+ parent.insertBefore(document.createTextNode(text), table);
+ return;
+ }
+ Node lastChild = stackParent.getLastChild();
+ if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setData(lastAsText.getData() + text);
+ return;
+ }
+ stackParent.appendChild(document.createTextNode(text));
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void insertFosterParentedChild(Element child,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ parent.insertBefore(child, table);
+ } else {
+ stackParent.appendChild(child);
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void detachFromParent(Element element)
+ throws SAXException {
+ try {
+ Node parent = element.getParentNode();
+ if (parent != null) {
+ parent.removeChild(element);
+ }
+ } catch (DOMException e) {
+ fatal(e);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java
new file mode 100644
index 000000000..5e366be7b
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.dom;
+
+import org.w3c.dom.DocumentType;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class Dom2Sax {
+
+ private static String emptyIfNull(String namespaceURI) {
+ return namespaceURI == null ? "" : namespaceURI;
+ }
+
+ private final NamedNodeMapAttributes attributes = new NamedNodeMapAttributes();
+
+ private final ContentHandler contentHandler;
+
+ private final LexicalHandler lexicalHandler;
+
+ /**
+ * @param contentHandler
+ * @param lexicalHandler
+ */
+ public Dom2Sax(ContentHandler contentHandler, LexicalHandler lexicalHandler) {
+ if (contentHandler == null) {
+ throw new IllegalArgumentException("ContentHandler must not be null.");
+ }
+ this.contentHandler = contentHandler;
+ this.lexicalHandler = lexicalHandler;
+ }
+
+ public void parse(Node node) throws SAXException {
+ Node current = node;
+ Node next;
+ char[] buf;
+ for (;;) {
+ switch (current.getNodeType()) {
+ case Node.ELEMENT_NODE:
+ attributes.setNamedNodeMap(current.getAttributes());
+ // To work around severe bogosity in the default DOM
+ // impl, use the node name if local name is null.
+ String localName = current.getLocalName();
+ contentHandler.startElement(
+ emptyIfNull(current.getNamespaceURI()),
+ localName == null ? current.getNodeName()
+ : localName, null, attributes);
+ attributes.clear();
+ break;
+ case Node.TEXT_NODE:
+ buf = current.getNodeValue().toCharArray();
+ contentHandler.characters(buf, 0, buf.length);
+ break;
+ case Node.CDATA_SECTION_NODE:
+ if (lexicalHandler != null) {
+ lexicalHandler.startCDATA();
+ }
+ buf = current.getNodeValue().toCharArray();
+ contentHandler.characters(buf, 0, buf.length);
+ if (lexicalHandler != null) {
+ lexicalHandler.endCDATA();
+ }
+ break;
+ case Node.COMMENT_NODE:
+ if (lexicalHandler != null) {
+ buf = current.getNodeValue().toCharArray();
+ lexicalHandler.comment(buf, 0, buf.length);
+ }
+ break;
+ case Node.DOCUMENT_NODE:
+ contentHandler.startDocument();
+ break;
+ case Node.DOCUMENT_TYPE_NODE:
+ if (lexicalHandler != null) {
+ DocumentType doctype = (DocumentType) current;
+ lexicalHandler.startDTD(doctype.getName(),
+ doctype.getPublicId(), doctype.getSystemId());
+ lexicalHandler.endDTD();
+ }
+ break;
+ case Node.PROCESSING_INSTRUCTION_NODE:
+ contentHandler.processingInstruction(current.getNodeName(), current.getNodeValue());
+ break;
+ case Node.ENTITY_REFERENCE_NODE:
+ contentHandler.skippedEntity(current.getNodeName());
+ break;
+ }
+ if ((next = current.getFirstChild()) != null) {
+ current = next;
+ continue;
+ }
+ for (;;) {
+ switch (current.getNodeType()) {
+ case Node.ELEMENT_NODE:
+ // To work around severe bogosity in the default DOM
+ // impl, use the node name if local name is null.
+ String localName = current.getLocalName();
+ contentHandler.endElement(
+ emptyIfNull(current.getNamespaceURI()),
+ localName == null ? current.getNodeName()
+ : localName, null);
+ break;
+ case Node.DOCUMENT_NODE:
+ contentHandler.endDocument();
+ break;
+ }
+ if (current == node) {
+ return;
+ }
+ if ((next = current.getNextSibling()) != null) {
+ current = next;
+ break;
+ }
+ current = current.getParentNode();
+ }
+ }
+ }
+
+ private class NamedNodeMapAttributes implements Attributes {
+
+ private NamedNodeMap map;
+
+ private int length;
+
+ public void setNamedNodeMap(NamedNodeMap attributes) {
+ this.map = attributes;
+ this.length = attributes.getLength();
+ }
+
+ public void clear() {
+ this.map = null;
+ }
+
+ public int getIndex(String qName) {
+ for (int i = 0; i < length; i++) {
+ Node n = map.item(i);
+ if (n.getNodeName().equals(qName)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public int getIndex(String uri, String localName) {
+ for (int i = 0; i < length; i++) {
+ Node n = map.item(i);
+ if (n.getLocalName().equals(localName) && emptyIfNull(n.getNamespaceURI()).equals(uri)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public String getLocalName(int index) {
+ if (index < length && index >= 0) {
+ return map.item(index).getLocalName();
+ } else {
+ return null;
+ }
+ }
+
+ public String getQName(int index) {
+ if (index < length && index >= 0) {
+ return map.item(index).getNodeName();
+ } else {
+ return null;
+ }
+ }
+
+ public String getType(int index) {
+ if (index < length && index >= 0) {
+ return "id".equals(map.item(index).getLocalName()) ? "ID" : "CDATA";
+ } else {
+ return null;
+ }
+ }
+
+ public String getType(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public String getType(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public String getURI(int index) {
+ if (index < length && index >= 0) {
+ return emptyIfNull(map.item(index).getNamespaceURI());
+ } else {
+ return null;
+ }
+ }
+
+ public String getValue(int index) {
+ if (index < length && index >= 0) {
+ return map.item(index).getNodeValue();
+ } else {
+ return null;
+ }
+ }
+
+ public String getValue(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ public String getValue(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java
new file mode 100644
index 000000000..f4a307c9f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.dom;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the DOM
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
+ * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
+ * XML violation policy. To make the parser support non-conforming HTML fully
+ * per the HTML 5 spec while on the other hand potentially violating the SAX2
+ * API contract, set the general XML violation policy to <code>ALLOW</code>.
+ * This does not work with a standard DOM implementation.
+ * It is possible to treat XML 1.0 infoset violations as fatal by setting
+ * the general XML violation policy to <code>FATAL</code>.
+ *
+ * <p>The doctype is not represented in the tree.
+ *
+ * <p>The document mode is represented as user data <code>DocumentMode</code>
+ * object with the key <code>nu.validator.document-mode</code> on the document
+ * node.
+ *
+ * <p>The form pointer is also stored as user data with the key
+ * <code>nu.validator.form-pointer</code>.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class HtmlDocumentBuilder extends DocumentBuilder {
+
+ /**
+ * Returns the JAXP DOM implementation.
+ *
+ * @return the JAXP DOM implementation
+ */
+ private static DOMImplementation jaxpDOMImplementation() {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ factory.setNamespaceAware(true);
+ DocumentBuilder builder;
+ try {
+ builder = factory.newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ throw new RuntimeException(e);
+ }
+ return builder.getDOMImplementation();
+ }
+
+ /**
+ * The tokenizer.
+ */
+ private Driver driver;
+
+ /**
+ * The tree builder.
+ */
+ private final DOMTreeBuilder treeBuilder;
+
+ /**
+ * The DOM impl.
+ */
+ private final DOMImplementation implementation;
+
+ /**
+ * The entity resolver.
+ */
+ private EntityResolver entityResolver;
+
+ private ErrorHandler errorHandler = null;
+
+ private DocumentModeHandler documentModeHandler = null;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private boolean checkingNormalization = false;
+
+ private boolean scriptingEnabled = false;
+
+ private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
+
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata = false;
+
+ private boolean mappingLangToXmlLang = false;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
+
+ private boolean reportingDoctype = true;
+
+ private ErrorHandler treeBuilderErrorHandler = null;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private TransitionHandler transitionHandler = null;
+
+ /**
+ * Instantiates the document builder with a specific DOM
+ * implementation and XML violation policy.
+ *
+ * @param implementation
+ * the DOM implementation
+ * @param xmlPolicy the policy
+ */
+ public HtmlDocumentBuilder(DOMImplementation implementation,
+ XmlViolationPolicy xmlPolicy) {
+ this.implementation = implementation;
+ this.treeBuilder = new DOMTreeBuilder(implementation);
+ this.driver = null;
+ setXmlPolicy(xmlPolicy);
+ }
+
+ /**
+ * Instantiates the document builder with a specific DOM implementation
+ * and the infoset-altering XML violation policy.
+ *
+ * @param implementation
+ * the DOM implementation
+ */
+ public HtmlDocumentBuilder(DOMImplementation implementation) {
+ this(implementation, XmlViolationPolicy.ALTER_INFOSET);
+ }
+
+ /**
+ * Instantiates the document builder with the JAXP DOM implementation
+ * and the infoset-altering XML violation policy.
+ */
+ public HtmlDocumentBuilder() {
+ this(XmlViolationPolicy.ALTER_INFOSET);
+ }
+
+ /**
+ * Instantiates the document builder with the JAXP DOM implementation
+ * and a specific XML violation policy.
+ * @param xmlPolicy the policy
+ */
+ public HtmlDocumentBuilder(XmlViolationPolicy xmlPolicy) {
+ this(jaxpDOMImplementation(), xmlPolicy);
+ }
+
+
+ private Tokenizer newTokenizer(TokenHandler handler,
+ boolean newAttributesEachTime) {
+ if (errorHandler == null && transitionHandler == null
+ && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
+ return new Tokenizer(handler, newAttributesEachTime);
+ } else {
+ return new ErrorReportingTokenizer(handler, newAttributesEachTime);
+ }
+ }
+
+ /**
+ * This class wraps different tree builders depending on configuration. This
+ * method does the work of hiding this from the user of the class.
+ */
+ private void lazyInit() {
+ if (driver == null) {
+ this.driver = new Driver(newTokenizer(treeBuilder, false));
+ this.driver.setErrorHandler(errorHandler);
+ this.driver.setTransitionHandler(transitionHandler);
+ this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
+ this.driver.setCheckingNormalization(checkingNormalization);
+ this.driver.setCommentPolicy(commentPolicy);
+ this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ this.driver.setContentSpacePolicy(contentSpacePolicy);
+ this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ this.driver.setXmlnsPolicy(xmlnsPolicy);
+ this.driver.setHeuristics(heuristics);
+ for (CharacterHandler characterHandler : characterHandlers) {
+ this.driver.addCharacterHandler(characterHandler);
+ }
+ this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ this.treeBuilder.setDocumentModeHandler(documentModeHandler);
+ this.treeBuilder.setScriptingEnabled(scriptingEnabled);
+ this.treeBuilder.setReportingDoctype(reportingDoctype);
+ this.treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Tokenizes the input source.
+ *
+ * @param is the source
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ * @throws MalformedURLException if the system ID is malformed and the entity resolver is <code>null</code>
+ */
+ private void tokenize(InputSource is) throws SAXException, IOException,
+ MalformedURLException {
+ if (is == null) {
+ throw new IllegalArgumentException("Null input.");
+ }
+ if (is.getByteStream() == null && is.getCharacterStream() == null) {
+ String systemId = is.getSystemId();
+ if (systemId == null) {
+ throw new IllegalArgumentException(
+ "No byte stream, no character stream nor URI.");
+ }
+ if (entityResolver != null) {
+ is = entityResolver.resolveEntity(is.getPublicId(), systemId);
+ }
+ if (is.getByteStream() == null || is.getCharacterStream() == null) {
+ is = new InputSource();
+ is.setSystemId(systemId);
+ is.setByteStream(new URL(systemId).openStream());
+ }
+ }
+ if (driver == null) lazyInit();
+ driver.tokenize(is);
+ }
+
+ /**
+ * Returns the DOM implementation
+ * @return the DOM implementation
+ * @see javax.xml.parsers.DocumentBuilder#getDOMImplementation()
+ */
+ @Override public DOMImplementation getDOMImplementation() {
+ return implementation;
+ }
+
+ /**
+ * Returns <code>true</code>.
+ * @return <code>true</code>
+ * @see javax.xml.parsers.DocumentBuilder#isNamespaceAware()
+ */
+ @Override public boolean isNamespaceAware() {
+ return true;
+ }
+
+ /**
+ * Returns <code>false</code>
+ * @return <code>false</code>
+ * @see javax.xml.parsers.DocumentBuilder#isValidating()
+ */
+ @Override public boolean isValidating() {
+ return false;
+ }
+
+ /**
+ * For API compatibility.
+ * @see javax.xml.parsers.DocumentBuilder#newDocument()
+ */
+ @Override public Document newDocument() {
+ return implementation.createDocument(null, null, null);
+ }
+
+ /**
+ * Parses a document from a SAX <code>InputSource</code>.
+ * @param is the source
+ * @return the doc
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource)
+ */
+ @Override public Document parse(InputSource is) throws SAXException,
+ IOException {
+ treeBuilder.setFragmentContext(null);
+ tokenize(is);
+ return treeBuilder.getDocument();
+ }
+
+ /**
+ * Parses a document fragment from a SAX <code>InputSource</code> with
+ * an HTML element as the fragment context.
+ * @param is the source
+ * @param context the context element name (HTML namespace assumed)
+ * @return the document fragment
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ */
+ public DocumentFragment parseFragment(InputSource is, String context)
+ throws IOException, SAXException {
+ treeBuilder.setFragmentContext(context.intern());
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Parses a document fragment from a SAX <code>InputSource</code>.
+ * @param is the source
+ * @param contextLocal the local name of the context element
+ * @param contextNamespace the namespace of the context element
+ * @return the document fragment
+ * @throws SAXException if stuff goes wrong
+ * @throws IOException if IO goes wrong
+ */
+ public DocumentFragment parseFragment(InputSource is, String contextLocal,
+ String contextNamespace) throws IOException, SAXException {
+ treeBuilder.setFragmentContext(contextLocal.intern(),
+ contextNamespace.intern(), null, false);
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Sets the entity resolver for URI-only inputs.
+ * @param resolver the resolver
+ * @see javax.xml.parsers.DocumentBuilder#setEntityResolver(org.xml.sax.EntityResolver)
+ */
+ @Override public void setEntityResolver(EntityResolver resolver) {
+ this.entityResolver = resolver;
+ }
+
+ /**
+ * Sets the error handler.
+ * @param errorHandler the handler
+ * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ @Override public void setErrorHandler(ErrorHandler errorHandler) {
+ treeBuilder.setErrorHandler(errorHandler);
+ if (driver != null) {
+ driver.setErrorHandler(errorHandler);
+ }
+ }
+
+ public void setTransitionHander(TransitionHandler handler) {
+ transitionHandler = handler;
+ driver = null;
+ }
+
+ /**
+ * Indicates whether NFC normalization of source is being checked.
+ * @return <code>true</code> if NFC normalization of source is being checked.
+ * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
+ */
+ public boolean isCheckingNormalization() {
+ return checkingNormalization;
+ }
+
+ /**
+ * Toggles the checking of the NFC normalization of source.
+ * @param enable <code>true</code> to check normalization
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
+ */
+ public void setCheckingNormalization(boolean enable) {
+ this.checkingNormalization = enable;
+ if (driver != null) {
+ driver.setCheckingNormalization(checkingNormalization);
+ }
+ }
+
+ /**
+ * Sets the policy for consecutive hyphens in comments.
+ * @param commentPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ if (driver != null) {
+ driver.setCommentPolicy(commentPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML characters except white space.
+ * @param contentNonXmlCharPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ driver = null;
+ }
+
+ /**
+ * Sets the policy for non-XML white space.
+ * @param contentSpacePolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ if (driver != null) {
+ driver.setContentSpacePolicy(contentSpacePolicy);
+ }
+ }
+
+ /**
+ * Whether the parser considers scripting to be enabled for noscript treatment.
+ *
+ * @return <code>true</code> if enabled
+ * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ if (treeBuilder != null) {
+ treeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+ }
+
+ /**
+ * Returns the doctype expectation.
+ *
+ * @return the doctypeExpectation
+ */
+ public DoctypeExpectation getDoctypeExpectation() {
+ return doctypeExpectation;
+ }
+
+ /**
+ * Sets the doctype expectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ if (treeBuilder != null) {
+ treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ }
+ }
+
+ /**
+ * Returns the document mode handler.
+ *
+ * @return the documentModeHandler
+ */
+ public DocumentModeHandler getDocumentModeHandler() {
+ return documentModeHandler;
+ }
+
+ /**
+ * Sets the document mode handler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Returns the streamabilityViolationPolicy.
+ *
+ * @return the streamabilityViolationPolicy
+ */
+ public XmlViolationPolicy getStreamabilityViolationPolicy() {
+ return streamabilityViolationPolicy;
+ }
+
+ /**
+ * Sets the streamabilityViolationPolicy.
+ *
+ * @param streamabilityViolationPolicy
+ * the streamabilityViolationPolicy to set
+ */
+ public void setStreamabilityViolationPolicy(
+ XmlViolationPolicy streamabilityViolationPolicy) {
+ this.streamabilityViolationPolicy = streamabilityViolationPolicy;
+ driver = null;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ if (driver != null) {
+ driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+ }
+
+ /**
+ * Returns the <code>Locator</code> during parse.
+ * @return the <code>Locator</code>
+ */
+ public Locator getDocumentLocator() {
+ return driver.getDocumentLocator();
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ *
+ * @return the html4ModeCompatibleWithXhtml1Schemata
+ */
+ public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
+ return html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang;
+ if (driver != null) {
+ driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang;
+ }
+
+ /**
+ * Whether the <code>xmlns</code> attribute on the root element is
+ * passed to through. (FATAL not allowed.)
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ if (driver != null) {
+ driver.setXmlnsPolicy(xmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the xmlnsPolicy.
+ *
+ * @return the xmlnsPolicy
+ */
+ public XmlViolationPolicy getXmlnsPolicy() {
+ return xmlnsPolicy;
+ }
+
+ /**
+ * Returns the commentPolicy.
+ *
+ * @return the commentPolicy
+ */
+ public XmlViolationPolicy getCommentPolicy() {
+ return commentPolicy;
+ }
+
+ /**
+ * Returns the contentNonXmlCharPolicy.
+ *
+ * @return the contentNonXmlCharPolicy
+ */
+ public XmlViolationPolicy getContentNonXmlCharPolicy() {
+ return contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Returns the contentSpacePolicy.
+ *
+ * @return the contentSpacePolicy
+ */
+ public XmlViolationPolicy getContentSpacePolicy() {
+ return contentSpacePolicy;
+ }
+
+ /**
+ * @param reportingDoctype
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ if (treeBuilder != null) {
+ treeBuilder.setReportingDoctype(reportingDoctype);
+ }
+ }
+
+ /**
+ * Returns the reportingDoctype.
+ *
+ * @return the reportingDoctype
+ */
+ public boolean isReportingDoctype() {
+ return reportingDoctype;
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ if (driver != null) {
+ driver.setNamePolicy(namePolicy);
+ treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics the heuristics to set
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ if (driver != null) {
+ driver.setHeuristics(heuristics);
+ }
+ }
+
+ public Heuristics getHeuristics() {
+ return this.heuristics;
+ }
+
+ /**
+ * This is a catch-all convenience method for setting name, xmlns, content space,
+ * content non-XML char and comment policies in one go. This does not affect the
+ * streamability policy or doctype reporting.
+ *
+ * @param xmlPolicy
+ */
+ public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
+ setNamePolicy(xmlPolicy);
+ setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
+ setContentSpacePolicy(xmlPolicy);
+ setContentNonXmlCharPolicy(xmlPolicy);
+ setCommentPolicy(xmlPolicy);
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ *
+ * @return the namePolicy
+ */
+ public XmlViolationPolicy getNamePolicy() {
+ return namePolicy;
+ }
+
+ /**
+ * Does nothing.
+ * @deprecated
+ */
+ public void setBogusXmlnsPolicy(
+ XmlViolationPolicy bogusXmlnsPolicy) {
+ }
+
+ /**
+ * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
+ * @deprecated
+ * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
+ */
+ public XmlViolationPolicy getBogusXmlnsPolicy() {
+ return XmlViolationPolicy.ALTER_INFOSET;
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ this.characterHandlers.add(characterHandler);
+ if (driver != null) {
+ driver.addCharacterHandler(characterHandler);
+ }
+ }
+
+
+ /**
+ * Sets whether comment nodes appear in the tree.
+ * @param ignoreComments <code>true</code> to ignore comments
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
+ */
+ public void setIgnoringComments(boolean ignoreComments) {
+ treeBuilder.setIgnoringComments(ignoreComments);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html
new file mode 100644
index 000000000..d793bcf86
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides an HTML5 parser that exposes the document using the DOM API.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java
new file mode 100644
index 000000000..a75750398
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.extra;
+
+import java.io.IOException;
+import java.nio.charset.UnsupportedCharsetException;
+
+import nu.validator.htmlparser.io.Encoding;
+
+import org.mozilla.intl.chardet.nsDetector;
+import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
+import org.mozilla.intl.chardet.nsPSMDetector;
+
+import com.ibm.icu.text.CharsetDetector;
+
+public class ChardetSniffer implements nsICharsetDetectionObserver {
+
+ private final byte[] source;
+
+ private final int length;
+
+ private Encoding returnValue = null;
+
+ /**
+ * @param source
+ */
+ public ChardetSniffer(final byte[] source, final int length) {
+ this.source = source;
+ this.length = length;
+ }
+
+ public Encoding sniff() throws IOException {
+ nsDetector detector = new nsDetector(nsPSMDetector.ALL);
+ detector.Init(this);
+ detector.DoIt(source, length, false);
+ detector.DataEnd();
+ if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) {
+ return returnValue;
+ } else {
+ return null;
+ }
+ }
+
+ public static void main(String[] args) {
+ String[] detectable = CharsetDetector.getAllDetectableCharsets();
+ for (int i = 0; i < detectable.length; i++) {
+ String charset = detectable[i];
+ System.out.println(charset);
+ }
+ }
+
+ public void Notify(String charsetName) {
+ try {
+ Encoding enc = Encoding.forName(charsetName);
+ Encoding actual = enc.getActualHtmlEncoding();
+ if (actual != null) {
+ enc = actual;
+ }
+ returnValue = enc;
+ } catch (UnsupportedCharsetException e) {
+ returnValue = null;
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java
new file mode 100644
index 000000000..f3caab5c4
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.extra;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import nu.validator.htmlparser.common.ByteReadable;
+import nu.validator.htmlparser.io.Encoding;
+
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
+
+public class IcuDetectorSniffer extends InputStream {
+
+ private final ByteReadable source;
+
+ /**
+ * @param source
+ */
+ public IcuDetectorSniffer(final ByteReadable source) {
+ this.source = source;
+ }
+
+ @Override
+ public int read() throws IOException {
+ return source.readByte();
+ }
+
+ public Encoding sniff() throws IOException {
+ try {
+ CharsetDetector detector = new CharsetDetector();
+ detector.setText(this);
+ CharsetMatch match = detector.detect();
+ Encoding enc = Encoding.forName(match.getName());
+ Encoding actual = enc.getActualHtmlEncoding();
+ if (actual != null) {
+ enc = actual;
+ }
+ if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) {
+ return enc;
+ } else {
+ return null;
+ }
+ } catch (Exception e) {
+ return null;
+ }
+ }
+
+ public static void main(String[] args) {
+ String[] detectable = CharsetDetector.getAllDetectableCharsets();
+ for (int i = 0; i < detectable.length; i++) {
+ String charset = detectable[i];
+ System.out.println(charset);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java
new file mode 100644
index 000000000..45df62fb7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2006, 2007 Henri Sivonen
+ * Copyright (c) 2007 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.extra;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class NormalizationChecker implements CharacterHandler {
+
+ private ErrorHandler errorHandler;
+
+ private Locator locator;
+
+ /**
+ * A thread-safe set of composing characters as per Charmod Norm.
+ */
+ @SuppressWarnings("deprecation")
+ private static final UnicodeSet COMPOSING_CHARACTERS = (UnicodeSet) new UnicodeSet(
+ "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze();
+
+ // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908
+
+ /**
+ * A buffer for holding sequences overlap the SAX buffer boundary.
+ */
+ private char[] buf = new char[128];
+
+ /**
+ * A holder for the original buffer (for the memory leak prevention
+ * mechanism).
+ */
+ private char[] bufHolder = null;
+
+ /**
+ * The current used length of the buffer, i.e. the index of the first slot
+ * that does not hold current data.
+ */
+ private int pos;
+
+ /**
+ * Indicates whether the checker the next call to <code>characters()</code>
+ * is the first call in a run.
+ */
+ private boolean atStartOfRun;
+
+ /**
+ * Indicates whether the current run has already caused an error.
+ */
+ private boolean alreadyComplainedAboutThisRun;
+
+ /**
+ * Emit an error. The locator is used.
+ *
+ * @param message the error message
+ * @throws SAXException if something goes wrong
+ */
+ public void err(String message) throws SAXException {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, locator);
+ errorHandler.error(spe);
+ }
+ }
+
+ /**
+ * Returns <code>true</code> if the argument is a composing BMP character
+ * or a surrogate and <code>false</code> otherwise.
+ *
+ * @param c a UTF-16 code unit
+ * @return <code>true</code> if the argument is a composing BMP character
+ * or a surrogate and <code>false</code> otherwise
+ */
+ private static boolean isComposingCharOrSurrogate(char c) {
+ if (UCharacter.isHighSurrogate(c) || UCharacter.isLowSurrogate(c)) {
+ return true;
+ }
+ return isComposingChar(c);
+ }
+
+ /**
+ * Returns <code>true</code> if the argument is a composing character
+ * and <code>false</code> otherwise.
+ *
+ * @param c a Unicode code point
+ * @return <code>true</code> if the argument is a composing character
+ * <code>false</code> otherwise
+ */
+ private static boolean isComposingChar(int c) {
+ return COMPOSING_CHARACTERS.contains(c);
+ }
+
+ /**
+ * Constructor with mode selection.
+ *
+ * @param sourceTextMode whether the source text-related messages
+ * should be enabled.
+ */
+ public NormalizationChecker(Locator locator) {
+ super();
+ start();
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.CharacterHandler#start()
+ */
+ public void start() {
+ atStartOfRun = true;
+ alreadyComplainedAboutThisRun = false;
+ pos = 0;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int)
+ */
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ if (alreadyComplainedAboutThisRun) {
+ return;
+ }
+ if (atStartOfRun) {
+ char c = ch[start];
+ if (pos == 1) {
+ // there's a single high surrogate in buf
+ if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) {
+ err("Text run starts with a composing character.");
+ }
+ atStartOfRun = false;
+ } else {
+ if (length == 1 && UCharacter.isHighSurrogate(c)) {
+ buf[0] = c;
+ pos = 1;
+ return;
+ } else {
+ if (UCharacter.isHighSurrogate(c)) {
+ if (isComposingChar(UCharacter.getCodePoint(c,
+ ch[start + 1]))) {
+ err("Text run starts with a composing character.");
+ }
+ } else {
+ if (isComposingCharOrSurrogate(c)) {
+ err("Text run starts with a composing character.");
+ }
+ }
+ atStartOfRun = false;
+ }
+ }
+ }
+ int i = start;
+ int stop = start + length;
+ if (pos > 0) {
+ // there's stuff in buf
+ while (i < stop && isComposingCharOrSurrogate(ch[i])) {
+ i++;
+ }
+ appendToBuf(ch, start, i);
+ if (i == stop) {
+ return;
+ } else {
+ if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) {
+ errAboutTextRun();
+ }
+ pos = 0;
+ }
+ }
+ if (i < stop) {
+ start = i;
+ i = stop - 1;
+ while (i > start && isComposingCharOrSurrogate(ch[i])) {
+ i--;
+ }
+ if (i > start) {
+ if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) {
+ errAboutTextRun();
+ }
+ }
+ appendToBuf(ch, i, stop);
+ }
+ }
+
+ /**
+ * Emits an error stating that the current text run or the source
+ * text is not in NFC.
+ *
+ * @throws SAXException if the <code>ErrorHandler</code> throws
+ */
+ private void errAboutTextRun() throws SAXException {
+ err("Source text is not in Unicode Normalization Form C.");
+ alreadyComplainedAboutThisRun = true;
+ }
+
+ /**
+ * Appends a slice of an UTF-16 code unit array to the internal
+ * buffer.
+ *
+ * @param ch the array from which to copy
+ * @param start the index of the first element that is copied
+ * @param end the index of the first element that is not copied
+ */
+ private void appendToBuf(char[] ch, int start, int end) {
+ if (start == end) {
+ return;
+ }
+ int neededBufLen = pos + (end - start);
+ if (neededBufLen > buf.length) {
+ char[] newBuf = new char[neededBufLen];
+ System.arraycopy(buf, 0, newBuf, 0, pos);
+ if (bufHolder == null) {
+ bufHolder = buf; // keep the original around
+ }
+ buf = newBuf;
+ }
+ System.arraycopy(ch, start, buf, pos, end - start);
+ pos += (end - start);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.CharacterHandler#end()
+ */
+ public void end() throws SAXException {
+ if (!alreadyComplainedAboutThisRun
+ && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) {
+ errAboutTextRun();
+ }
+ if (bufHolder != null) {
+ // restore the original small buffer to avoid leaking
+ // memory if this checker is recycled
+ buf = bufHolder;
+ bufHolder = null;
+ }
+ }
+
+ public void setErrorHandler(ErrorHandler errorHandler) {
+ this.errorHandler = errorHandler;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java
new file mode 100644
index 000000000..7b889e71e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java
@@ -0,0 +1,2473 @@
+/*
+ * Copyright (c) 2008-2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.util.Arrays;
+
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.annotation.NsUri;
+import nu.validator.htmlparser.annotation.Prefix;
+import nu.validator.htmlparser.annotation.QName;
+import nu.validator.htmlparser.annotation.Virtual;
+import nu.validator.htmlparser.common.Interner;
+
+public final class AttributeName
+// Uncomment to regenerate
+// implements Comparable<AttributeName>
+{
+ // [NOCPP[
+
+ public static final int NCNAME_HTML = 1;
+
+ public static final int NCNAME_FOREIGN = (1 << 1) | (1 << 2);
+
+ public static final int NCNAME_LANG = (1 << 3);
+
+ public static final int IS_XMLNS = (1 << 4);
+
+ public static final int CASE_FOLDED = (1 << 5);
+
+ public static final int BOOLEAN = (1 << 6);
+
+ // ]NOCPP]
+
+ /**
+ * An array representing no namespace regardless of namespace mode (HTML,
+ * SVG, MathML, lang-mapping HTML) used.
+ */
+ static final @NoLength @NsUri String[] ALL_NO_NS = { "", "", "",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ /**
+ * An array that has no namespace for the HTML mode but the XMLNS namespace
+ * for the SVG and MathML modes.
+ */
+ private static final @NoLength @NsUri String[] XMLNS_NS = { "",
+ "http://www.w3.org/2000/xmlns/", "http://www.w3.org/2000/xmlns/",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ /**
+ * An array that has no namespace for the HTML mode but the XML namespace
+ * for the SVG and MathML modes.
+ */
+ private static final @NoLength @NsUri String[] XML_NS = { "",
+ "http://www.w3.org/XML/1998/namespace",
+ "http://www.w3.org/XML/1998/namespace",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ /**
+ * An array that has no namespace for the HTML mode but the XLink namespace
+ * for the SVG and MathML modes.
+ */
+ private static final @NoLength @NsUri String[] XLINK_NS = { "",
+ "http://www.w3.org/1999/xlink", "http://www.w3.org/1999/xlink",
+ // [NOCPP[
+ ""
+ // ]NOCPP]
+ };
+
+ // [NOCPP[
+ /**
+ * An array that has no namespace for the HTML, SVG and MathML modes but has
+ * the XML namespace for the lang-mapping HTML mode.
+ */
+ private static final @NoLength @NsUri String[] LANG_NS = { "", "", "",
+ "http://www.w3.org/XML/1998/namespace" };
+
+ // ]NOCPP]
+
+ /**
+ * An array for no prefixes in any mode.
+ */
+ static final @NoLength @Prefix String[] ALL_NO_PREFIX = { null, null, null,
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ /**
+ * An array for no prefixe in the HTML mode and the <code>xmlns</code>
+ * prefix in the SVG and MathML modes.
+ */
+ private static final @NoLength @Prefix String[] XMLNS_PREFIX = { null,
+ "xmlns", "xmlns",
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ /**
+ * An array for no prefixe in the HTML mode and the <code>xlink</code>
+ * prefix in the SVG and MathML modes.
+ */
+ private static final @NoLength @Prefix String[] XLINK_PREFIX = { null,
+ "xlink", "xlink",
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ /**
+ * An array for no prefixe in the HTML mode and the <code>xml</code> prefix
+ * in the SVG and MathML modes.
+ */
+ private static final @NoLength @Prefix String[] XML_PREFIX = { null, "xml",
+ "xml",
+ // [NOCPP[
+ null
+ // ]NOCPP]
+ };
+
+ // [NOCPP[
+
+ private static final @NoLength @Prefix String[] LANG_PREFIX = { null, null,
+ null, "xml" };
+
+ private static @QName String[] COMPUTE_QNAME(String[] local, String[] prefix) {
+ @QName String[] arr = new String[4];
+ for (int i = 0; i < arr.length; i++) {
+ if (prefix[i] == null) {
+ arr[i] = local[i];
+ } else {
+ arr[i] = (prefix[i] + ':' + local[i]).intern();
+ }
+ }
+ return arr;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * An initialization helper for having a one name in the SVG mode and
+ * another name in the other modes.
+ *
+ * @param name
+ * the name for the non-SVG modes
+ * @param camel
+ * the name for the SVG mode
+ * @return the initialized name array
+ */
+ private static @NoLength @Local String[] SVG_DIFFERENT(@Local String name,
+ @Local String camel) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = name;
+ arr[2] = camel;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * An initialization helper for having a one name in the MathML mode and
+ * another name in the other modes.
+ *
+ * @param name
+ * the name for the non-MathML modes
+ * @param camel
+ * the name for the MathML mode
+ * @return the initialized name array
+ */
+ private static @NoLength @Local String[] MATH_DIFFERENT(@Local String name,
+ @Local String camel) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = camel;
+ arr[2] = name;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * An initialization helper for having a different local name in the HTML
+ * mode and the SVG and MathML modes.
+ *
+ * @param name
+ * the name for the HTML mode
+ * @param suffix
+ * the name for the SVG and MathML modes
+ * @return the initialized name array
+ */
+ private static @NoLength @Local String[] COLONIFIED_LOCAL(
+ @Local String name, @Local String suffix) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = suffix;
+ arr[2] = suffix;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * An initialization helper for having the same local name in all modes.
+ *
+ * @param name
+ * the name
+ * @return the initialized name array
+ */
+ static @NoLength @Local String[] SAME_LOCAL(@Local String name) {
+ @NoLength @Local String[] arr = new String[4];
+ arr[0] = name;
+ arr[1] = name;
+ arr[2] = name;
+ // [NOCPP[
+ arr[3] = name;
+ // ]NOCPP]
+ return arr;
+ }
+
+ /**
+ * Returns an attribute name by buffer.
+ *
+ * <p>
+ * C++ ownership: The return value is either released by the caller if the
+ * attribute is a duplicate or the ownership is transferred to
+ * HtmlAttributes and released upon clearing or destroying that object.
+ *
+ * @param buf
+ * the buffer
+ * @param offset
+ * ignored
+ * @param length
+ * length of data
+ * @param checkNcName
+ * whether to check ncnameness
+ * @return an <code>AttributeName</code> corresponding to the argument data
+ */
+ static AttributeName nameByBuffer(@NoLength char[] buf, int offset,
+ int length
+ // [NOCPP[
+ , boolean checkNcName
+ // ]NOCPP]
+ , Interner interner) {
+ // XXX deal with offset
+ int hash = AttributeName.bufToHash(buf, length);
+ int index = Arrays.binarySearch(AttributeName.ATTRIBUTE_HASHES, hash);
+ if (index < 0) {
+ return AttributeName.createAttributeName(
+ Portability.newLocalNameFromBuffer(buf, offset, length,
+ interner)
+ // [NOCPP[
+ , checkNcName
+ // ]NOCPP]
+ );
+ } else {
+ AttributeName attributeName = AttributeName.ATTRIBUTE_NAMES[index];
+ @Local String name = attributeName.getLocal(AttributeName.HTML);
+ if (!Portability.localEqualsBuffer(name, buf, offset, length)) {
+ return AttributeName.createAttributeName(
+ Portability.newLocalNameFromBuffer(buf, offset, length,
+ interner)
+ // [NOCPP[
+ , checkNcName
+ // ]NOCPP]
+ );
+ }
+ return attributeName;
+ }
+ }
+
+ /**
+ * This method has to return a unique integer for each well-known
+ * lower-cased attribute name.
+ *
+ * @param buf
+ * @param len
+ * @return
+ */
+ private static int bufToHash(@NoLength char[] buf, int len) {
+ int hash2 = 0;
+ int hash = len;
+ hash <<= 5;
+ hash += buf[0] - 0x60;
+ int j = len;
+ for (int i = 0; i < 4 && j > 0; i++) {
+ j--;
+ hash <<= 5;
+ hash += buf[j] - 0x60;
+ hash2 <<= 6;
+ hash2 += buf[i] - 0x5F;
+ }
+ return hash ^ hash2;
+ }
+
+ /**
+ * The mode value for HTML.
+ */
+ public static final int HTML = 0;
+
+ /**
+ * The mode value for MathML.
+ */
+ public static final int MATHML = 1;
+
+ /**
+ * The mode value for SVG.
+ */
+ public static final int SVG = 2;
+
+ // [NOCPP[
+
+ /**
+ * The mode value for lang-mapping HTML.
+ */
+ public static final int HTML_LANG = 3;
+
+ // ]NOCPP]
+
+ /**
+ * The namespaces indexable by mode.
+ */
+ private final @NsUri @NoLength String[] uri;
+
+ /**
+ * The local names indexable by mode.
+ */
+ private final @Local @NoLength String[] local;
+
+ /**
+ * The prefixes indexably by mode.
+ */
+ private final @Prefix @NoLength String[] prefix;
+
+ // [NOCPP[
+
+ private final int flags;
+
+ /**
+ * The qnames indexable by mode.
+ */
+ private final @QName @NoLength String[] qName;
+
+ // ]NOCPP]
+
+ /**
+ * The startup-time constructor.
+ *
+ * @param uri
+ * the namespace
+ * @param local
+ * the local name
+ * @param prefix
+ * the prefix
+ * @param ncname
+ * the ncnameness
+ * @param xmlns
+ * whether this is an xmlns attribute
+ */
+ protected AttributeName(@NsUri @NoLength String[] uri,
+ @Local @NoLength String[] local, @Prefix @NoLength String[] prefix
+ // [NOCPP[
+ , int flags
+ // ]NOCPP]
+ ) {
+ this.uri = uri;
+ this.local = local;
+ this.prefix = prefix;
+ // [NOCPP[
+ this.qName = COMPUTE_QNAME(local, prefix);
+ this.flags = flags;
+ // ]NOCPP]
+ }
+
+ /**
+ * Creates an <code>AttributeName</code> for a local name.
+ *
+ * @param name
+ * the name
+ * @param checkNcName
+ * whether to check ncnameness
+ * @return an <code>AttributeName</code>
+ */
+ private static AttributeName createAttributeName(@Local String name
+ // [NOCPP[
+ , boolean checkNcName
+ // ]NOCPP]
+ ) {
+ // [NOCPP[
+ int flags = NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG;
+ if (name.startsWith("xmlns:")) {
+ flags = IS_XMLNS;
+ } else if (checkNcName && !NCName.isNCName(name)) {
+ flags = 0;
+ }
+ // ]NOCPP]
+ return new AttributeName(AttributeName.ALL_NO_NS,
+ AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX, flags);
+ }
+
+ /**
+ * Deletes runtime-allocated instances in C++.
+ */
+ @Virtual void release() {
+ // No-op in Java.
+ // Implement as |delete this;| in subclass.
+ }
+
+ /**
+ * The C++ destructor.
+ */
+ @SuppressWarnings("unused") @Virtual private void destructor() {
+ Portability.deleteArray(local);
+ }
+
+ /**
+ * Clones the attribute using an interner. Returns <code>this</code> in Java
+ * and for non-dynamic instances in C++.
+ *
+ * @param interner
+ * an interner
+ * @return a clone
+ */
+ @Virtual public AttributeName cloneAttributeName(Interner interner) {
+ return this;
+ }
+
+ // [NOCPP[
+ /**
+ * Creator for use when the XML violation policy requires an attribute name
+ * to be changed.
+ *
+ * @param name
+ * the name of the attribute to create
+ */
+ static AttributeName create(@Local String name) {
+ return new AttributeName(AttributeName.ALL_NO_NS,
+ AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX,
+ NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ }
+
+ /**
+ * Queries whether this name is an XML 1.0 4th ed. NCName.
+ *
+ * @param mode
+ * the SVG/MathML/HTML mode
+ * @return <code>true</code> if this is an NCName in the given mode
+ */
+ public boolean isNcName(int mode) {
+ return (flags & (1 << mode)) != 0;
+ }
+
+ /**
+ * Queries whether this is an <code>xmlns</code> attribute.
+ *
+ * @return <code>true</code> if this is an <code>xmlns</code> attribute
+ */
+ public boolean isXmlns() {
+ return (flags & IS_XMLNS) != 0;
+ }
+
+ /**
+ * Queries whether this attribute has a case-folded value in the HTML4 mode
+ * of the parser.
+ *
+ * @return <code>true</code> if the value is case-folded
+ */
+ boolean isCaseFolded() {
+ return (flags & CASE_FOLDED) != 0;
+ }
+
+ boolean isBoolean() {
+ return (flags & BOOLEAN) != 0;
+ }
+
+ public @QName String getQName(int mode) {
+ return qName[mode];
+ }
+
+ // ]NOCPP]
+
+ public @NsUri String getUri(int mode) {
+ return uri[mode];
+ }
+
+ public @Local String getLocal(int mode) {
+ return local[mode];
+ }
+
+ public @Prefix String getPrefix(int mode) {
+ return prefix[mode];
+ }
+
+ boolean equalsAnother(AttributeName another) {
+ return this.getLocal(AttributeName.HTML) == another.getLocal(AttributeName.HTML);
+ }
+
+ // START CODE ONLY USED FOR GENERATING CODE uncomment to regenerate
+
+// /**
+// * @see java.lang.Object#toString()
+// */
+// @Override public String toString() {
+// return "(" + formatNs() + ", " + formatLocal() + ", " + formatPrefix()
+// + ", " + formatFlags() + ")";
+// }
+//
+// private String formatFlags() {
+// StringBuilder builder = new StringBuilder();
+// if ((flags & NCNAME_HTML) != 0) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("NCNAME_HTML");
+// }
+// if ((flags & NCNAME_FOREIGN) != 0) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("NCNAME_FOREIGN");
+// }
+// if ((flags & NCNAME_LANG) != 0) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("NCNAME_LANG");
+// }
+// if (isXmlns()) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("IS_XMLNS");
+// }
+// if (isCaseFolded()) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("CASE_FOLDED");
+// }
+// if (isBoolean()) {
+// if (builder.length() != 0) {
+// builder.append(" | ");
+// }
+// builder.append("BOOLEAN");
+// }
+// if (builder.length() == 0) {
+// return "0";
+// }
+// return builder.toString();
+// }
+//
+// public int compareTo(AttributeName other) {
+// int thisHash = this.hash();
+// int otherHash = other.hash();
+// if (thisHash < otherHash) {
+// return -1;
+// } else if (thisHash == otherHash) {
+// return 0;
+// } else {
+// return 1;
+// }
+// }
+//
+// private String formatPrefix() {
+// if (prefix[0] == null && prefix[1] == null && prefix[2] == null
+// && prefix[3] == null) {
+// return "ALL_NO_PREFIX";
+// } else if (prefix[0] == null && prefix[1] == prefix[2]
+// && prefix[3] == null) {
+// if ("xmlns".equals(prefix[1])) {
+// return "XMLNS_PREFIX";
+// } else if ("xml".equals(prefix[1])) {
+// return "XML_PREFIX";
+// } else if ("xlink".equals(prefix[1])) {
+// return "XLINK_PREFIX";
+// } else {
+// throw new IllegalStateException();
+// }
+// } else if (prefix[0] == null && prefix[1] == null && prefix[2] == null
+// && prefix[3] == "xml") {
+// return "LANG_PREFIX";
+// } else {
+// throw new IllegalStateException();
+// }
+// }
+//
+// private String formatLocal() {
+// if (local[0] == local[1] && local[0] == local[3]
+// && local[0] != local[2]) {
+// return "SVG_DIFFERENT(\"" + local[0] + "\", \"" + local[2] + "\")";
+// }
+// if (local[0] == local[2] && local[0] == local[3]
+// && local[0] != local[1]) {
+// return "MATH_DIFFERENT(\"" + local[0] + "\", \"" + local[1] + "\")";
+// }
+// if (local[0] == local[3] && local[1] == local[2]
+// && local[0] != local[1]) {
+// return "COLONIFIED_LOCAL(\"" + local[0] + "\", \"" + local[1]
+// + "\")";
+// }
+// for (int i = 1; i < local.length; i++) {
+// if (local[0] != local[i]) {
+// throw new IllegalStateException();
+// }
+// }
+// return "SAME_LOCAL(\"" + local[0] + "\")";
+// }
+//
+// private String formatNs() {
+// if (uri[0] == "" && uri[1] == "" && uri[2] == "" && uri[3] == "") {
+// return "ALL_NO_NS";
+// } else if (uri[0] == "" && uri[1] == uri[2] && uri[3] == "") {
+// if ("http://www.w3.org/2000/xmlns/".equals(uri[1])) {
+// return "XMLNS_NS";
+// } else if ("http://www.w3.org/XML/1998/namespace".equals(uri[1])) {
+// return "XML_NS";
+// } else if ("http://www.w3.org/1999/xlink".equals(uri[1])) {
+// return "XLINK_NS";
+// } else {
+// throw new IllegalStateException();
+// }
+// } else if (uri[0] == "" && uri[1] == "" && uri[2] == ""
+// && uri[3] == "http://www.w3.org/XML/1998/namespace") {
+// return "LANG_NS";
+// } else {
+// throw new IllegalStateException();
+// }
+// }
+//
+// private String constName() {
+// String name = getLocal(HTML);
+// char[] buf = new char[name.length()];
+// for (int i = 0; i < name.length(); i++) {
+// char c = name.charAt(i);
+// if (c == '-' || c == ':') {
+// buf[i] = '_';
+// } else if (c >= 'a' && c <= 'z') {
+// buf[i] = (char) (c - 0x20);
+// } else {
+// buf[i] = c;
+// }
+// }
+// return new String(buf);
+// }
+//
+// private int hash() {
+// String name = getLocal(HTML);
+// return bufToHash(name.toCharArray(), name.length());
+// }
+//
+// /**
+// * Regenerate self
+// *
+// * @param args
+// */
+// public static void main(String[] args) {
+// Arrays.sort(ATTRIBUTE_NAMES);
+// for (int i = 1; i < ATTRIBUTE_NAMES.length; i++) {
+// if (ATTRIBUTE_NAMES[i].hash() == ATTRIBUTE_NAMES[i - 1].hash()) {
+// System.err.println("Hash collision: "
+// + ATTRIBUTE_NAMES[i].getLocal(HTML) + ", "
+// + ATTRIBUTE_NAMES[i - 1].getLocal(HTML));
+// return;
+// }
+// }
+// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) {
+// AttributeName att = ATTRIBUTE_NAMES[i];
+// System.out.println("public static final AttributeName "
+// + att.constName() + " = new AttributeName" + att.toString()
+// + ";");
+// }
+// System.out.println("private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = {");
+// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) {
+// AttributeName att = ATTRIBUTE_NAMES[i];
+// System.out.println(att.constName() + ",");
+// }
+// System.out.println("};");
+// System.out.println("private final static int[] ATTRIBUTE_HASHES = {");
+// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) {
+// AttributeName att = ATTRIBUTE_NAMES[i];
+// System.out.println(Integer.toString(att.hash()) + ",");
+// }
+// System.out.println("};");
+// }
+
+ // START GENERATED CODE
+ public static final AttributeName D = new AttributeName(ALL_NO_NS, SAME_LOCAL("d"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K = new AttributeName(ALL_NO_NS, SAME_LOCAL("k"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName R = new AttributeName(ALL_NO_NS, SAME_LOCAL("r"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName X = new AttributeName(ALL_NO_NS, SAME_LOCAL("x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Z = new AttributeName(ALL_NO_NS, SAME_LOCAL("z"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BY = new AttributeName(ALL_NO_NS, SAME_LOCAL("by"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CX = new AttributeName(ALL_NO_NS, SAME_LOCAL("cx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CY = new AttributeName(ALL_NO_NS, SAME_LOCAL("cy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DX = new AttributeName(ALL_NO_NS, SAME_LOCAL("dx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DY = new AttributeName(ALL_NO_NS, SAME_LOCAL("dy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName G2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName G1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FX = new AttributeName(ALL_NO_NS, SAME_LOCAL("fx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K4 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k4"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K3 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k3"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName K1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ID = new AttributeName(ALL_NO_NS, SAME_LOCAL("id"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IN = new AttributeName(ALL_NO_NS, SAME_LOCAL("in"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName U2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName U1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RX = new AttributeName(ALL_NO_NS, SAME_LOCAL("rx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RY = new AttributeName(ALL_NO_NS, SAME_LOCAL("ry"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TO = new AttributeName(ALL_NO_NS, SAME_LOCAL("to"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Y2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName Y1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName X1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName X2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dir"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName DUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName END = new AttributeName(ALL_NO_NS, SAME_LOCAL("end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("for"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IN2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("in2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("low"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REL = new AttributeName(ALL_NO_NS, SAME_LOCAL("rel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REV = new AttributeName(ALL_NO_NS, SAME_LOCAL("rev"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("src"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AXIS = new AttributeName(ALL_NO_NS, SAME_LOCAL("axis"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ABBR = new AttributeName(ALL_NO_NS, SAME_LOCAL("abbr"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("bbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CITE = new AttributeName(ALL_NO_NS, SAME_LOCAL("cite"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("code"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BIAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("bias"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("cols"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIP = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("char"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("base"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EDGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("edge"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATA = new AttributeName(ALL_NO_NS, SAME_LOCAL("data"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILL = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FROM = new AttributeName(ALL_NO_NS, SAME_LOCAL("from"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("face"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HIGH = new AttributeName(ALL_NO_NS, SAME_LOCAL("high"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("href"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("open"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ICON = new AttributeName(ALL_NO_NS, SAME_LOCAL("icon"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MASK = new AttributeName(ALL_NO_NS, SAME_LOCAL("mask"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("link"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LANG = new AttributeName(LANG_NS, SAME_LOCAL("lang"), LANG_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LOOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("loop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LIST = new AttributeName(ALL_NO_NS, SAME_LOCAL("list"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("type"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName WHEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("when"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("wrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("text"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PING = new AttributeName(ALL_NO_NS, SAME_LOCAL("ping"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REFX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refx", "refX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REFY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refy", "refY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("seed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("rows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("span"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STEP = new AttributeName(ALL_NO_NS, SAME_LOCAL("step"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName ROLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("role"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("xref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ASYNC = new AttributeName(ALL_NO_NS, SAME_LOCAL("async"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ALINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("alink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("align"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName CLOSE = new AttributeName(ALL_NO_NS, SAME_LOCAL("close"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLASS = new AttributeName(ALL_NO_NS, SAME_LOCAL("class"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLEAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("clear"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName BEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("begin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEPTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("depth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEFER = new AttributeName(ALL_NO_NS, SAME_LOCAL("defer"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName FENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FRAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("frame"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName ISMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ismap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ONEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("index"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("order"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OTHER = new AttributeName(ALL_NO_NS, SAME_LOCAL("other"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NARGS = new AttributeName(ALL_NO_NS, SAME_LOCAL("nargs"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MEDIA = new AttributeName(ALL_NO_NS, SAME_LOCAL("media"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LABEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("label"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LOCAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("local"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TITLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("title"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VLINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("vlink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALUE = new AttributeName(ALL_NO_NS, SAME_LOCAL("value"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SLOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("slope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SHAPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName SCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName SCALE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("speed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RULES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rules"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName STEMH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemh"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SIZES = new AttributeName(ALL_NO_NS, SAME_LOCAL("sizes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STEMV = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName START = new AttributeName(ALL_NO_NS, SAME_LOCAL("start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XMLNS = new AttributeName(XMLNS_NS, SAME_LOCAL("xmlns"), ALL_NO_PREFIX, IS_XMLNS);
+ public static final AttributeName ACCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ASCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("ascent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACTIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("active"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ALTIMG = new AttributeName(ALL_NO_NS, SAME_LOCAL("altimg"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("action"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("border"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CURSOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("cursor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COORDS = new AttributeName(ALL_NO_NS, SAME_LOCAL("coords"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("filter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FORMAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("format"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("hspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAG = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrag"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("origin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONZOOM = new AttributeName(ALL_NO_NS, SAME_LOCAL("onzoom"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONHELP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onhelp"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSTOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBLUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onblur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OBJECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("object"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("offset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORIENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("orient"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NOWRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("nowrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName NOHREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("nohref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName MACROS = new AttributeName(ALL_NO_NS, SAME_LOCAL("macros"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName METHOD = new AttributeName(ALL_NO_NS, SAME_LOCAL("method"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName LOWSRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("lowsrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName USEMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("usemap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WIDTHS = new AttributeName(ALL_NO_NS, SAME_LOCAL("widths"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TARGET = new AttributeName(ALL_NO_NS, SAME_LOCAL("target"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALUES = new AttributeName(ALL_NO_NS, SAME_LOCAL("values"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("valign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName VSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("vspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POSTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("poster"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("points"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PROMPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("prompt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SRCDOC = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcdoc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCOPED = new AttributeName(ALL_NO_NS, SAME_LOCAL("scoped"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRING = new AttributeName(ALL_NO_NS, SAME_LOCAL("string"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCHEME = new AttributeName(ALL_NO_NS, SAME_LOCAL("scheme"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RADIUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("radius"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RESULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("result"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SRCSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROTATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rotate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALTTEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alttext"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARCHIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("archive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AZIMUTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("azimuth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLOSURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("closure"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName CLASSID = new AttributeName(ALL_NO_NS, SAME_LOCAL("classid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHAROFF = new AttributeName(ALL_NO_NS, SAME_LOCAL("charoff"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BGCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("bgcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("colspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COMPACT = new AttributeName(ALL_NO_NS, SAME_LOCAL("compact"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName CONTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("content"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ENCTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("enctype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName DATASRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("datasrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATAFLD = new AttributeName(ALL_NO_NS, SAME_LOCAL("datafld"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DECLARE = new AttributeName(ALL_NO_NS, SAME_LOCAL("declare"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName DISPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("display"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIVISOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("divisor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEFAULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("default"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName DESCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("descent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KERNING = new AttributeName(ALL_NO_NS, SAME_LOCAL("kerning"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HEADERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("headers"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("onclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPTIMUM = new AttributeName(ALL_NO_NS, SAME_LOCAL("optimum"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbegin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONKEYUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeyup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONERROR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerror"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONABORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onabort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONRESET = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NOSHADE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noshade"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName MINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("minsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MAXSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LARGEOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("largeop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNICODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TARGETX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targetx", "targetX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TARGETY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targety", "targetY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VIEWBOX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewbox", "viewBox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERSION = new AttributeName(ALL_NO_NS, SAME_LOCAL("version"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERN = new AttributeName(ALL_NO_NS, SAME_LOCAL("pattern"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("restart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SANDBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("sandbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SUMMARY = new AttributeName(ALL_NO_NS, SAME_LOCAL("summary"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STANDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("standby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPLACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("replace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName AUTOPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("autoplay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ADDITIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("additive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CALCMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("calcmode", "calcMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CODETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CODEBASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codebase"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BEVELLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("bevelled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EXPONENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("exponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EDGEMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("edgemode", "edgeMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ENCODING = new AttributeName(ALL_NO_NS, SAME_LOCAL("encoding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPHREF = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("glyphref", "glyphRef"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATETIME = new AttributeName(ALL_NO_NS, SAME_LOCAL("datetime"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName FONTSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYTIMES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keytimes", "keyTimes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PANOSE_1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("panose-1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HREFLANG = new AttributeName(ALL_NO_NS, SAME_LOCAL("hreflang"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONRESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBOUNCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbounce"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFINISH = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfinish"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSCROLL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onscroll"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OPERATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("operator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OVERFLOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("overflow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onsubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONREPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrepeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NOTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("notation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NORESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName MANIFEST = new AttributeName(ALL_NO_NS, SAME_LOCAL("manifest"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MULTIPLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("multiple"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName LONGDESC = new AttributeName(ALL_NO_NS, SAME_LOCAL("longdesc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LANGUAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("language"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TABINDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("tabindex"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PROPERTY = new AttributeName(ALL_NO_NS, SAME_LOCAL("property"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ROWLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEAMLESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("seamless"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRETCHY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stretchy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName XML_BASE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:base", "base"), XML_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XML_LANG = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:lang", "lang"), XML_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName X_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("x-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_OWNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-owns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AUTOFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("autofocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ARIA_SORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-sort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCESSKEY = new AttributeName(ALL_NO_NS, SAME_LOCAL("accesskey"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_BUSY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-busy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_GRAB = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-grab"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AMPLITUDE = new AttributeName(ALL_NO_NS, SAME_LOCAL("amplitude"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_LIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-live"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIP_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIP_PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EQUALROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalrows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ELEVATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("elevation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIRECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("direction"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DRAGGABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("draggable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILL_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONTSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontstyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYSYSTEM = new AttributeName(ALL_NO_NS, SAME_LOCAL("keysystem"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYPOINTS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keypoints", "keyPoints"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HIDEFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidefocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMESSAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmessage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INTERCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("intercept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOVEEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmoveend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONINVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninvalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INTEGRITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("integrity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONKEYDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeydown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFOCUSIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName INPUTMODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("inputmode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWEXIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowexit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MASKUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskunits", "maskUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MAXLENGTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxlength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LINEBREAK = new AttributeName(ALL_NO_NS, SAME_LOCAL("linebreak"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TRANSFORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("transform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VALUETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("valuetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName POINTSATZ = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatz", "pointsAtZ"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTSATX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatx", "pointsAtX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTSATY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsaty", "pointsAtY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SYMMETRIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("symmetric"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCROLLING = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolling"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName REPEATDUR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatdur", "repeatDur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SELECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("selection"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEPARATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("separator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XML_SPACE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:space", "space"), XML_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName AUTOSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("autosubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN);
+ public static final AttributeName ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACTIONTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("actiontype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCUMULATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("accumulate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_LEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-level"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CAP_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("cap-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPH_NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GROUPALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("groupalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONTFAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontfamily"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONTWEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontweight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KEYSPLINES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keysplines", "keySplines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HTTP_EQUIV = new AttributeName(ALL_NO_NS, SAME_LOCAL("http-equiv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OCCURRENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("occurrence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IRRELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("irrelevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDBLCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondblclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragdrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONKEYPRESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeypress"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFOCUSOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName NUMOCTAVES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("numoctaves", "numOctaves"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKER_MID = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-mid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKER_END = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXTLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("textlength", "textLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VISIBILITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("visibility"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VIEWTARGET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewtarget", "viewTarget"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERT_ADV_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-adv-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATHLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pathlength", "pathLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RADIOGROUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("radiogroup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STOP_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SEPARATORS = new AttributeName(ALL_NO_NS, SAME_LOCAL("separators"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ROWSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ZOOMANDPAN = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("zoomandpan", "zoomAndPan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XLINK_TYPE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:type", "type"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_ROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:role", "role"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_HREF = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:href", "href"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_SHOW = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:show", "show"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName ACCENTUNDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("accentunder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_SECRET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-secret"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_ATOMIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-atomic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_FLOWTO = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-flowto"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARABIC_FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("arabic-form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CELLPADDING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellpadding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CELLSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CROSSORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("crossorigin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("contextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASEPROFILE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("baseprofile", "baseProfile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_FAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-family"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FRAMEBORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("frameborder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FILTERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("filterunits", "filterUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FLOOD_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_WEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-weight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HORIZ_ADV_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-adv-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGLEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousemove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ORIENTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("orientation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousedown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFORECUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFORMINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onforminput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDRAGSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOVESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmovestart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerunits", "markerUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHVARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathvariant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARGINWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKERWIDTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerwidth", "markerWidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT_ANCHOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-anchor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TABLEVALUES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("tablevalues", "tableValues"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCRIPTLEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptlevel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEATCOUNT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatcount", "repeatCount"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STITCHTILES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stitchtiles", "stitchTiles"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STARTOFFSET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("startoffset", "startOffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCROLLDELAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolldelay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XMLNS_XLINK = new AttributeName(XMLNS_NS, COLONIFIED_LOCAL("xmlns:xlink", "xlink"), XMLNS_PREFIX, IS_XMLNS);
+ public static final AttributeName XLINK_TITLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:title", "title"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName ARIA_INVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-invalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_PRESSED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-pressed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName ARIA_SETSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-setsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_CHANNEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-channel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName EQUALCOLUMNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalcolumns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DISPLAYSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("displaystyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DATAFORMATAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("dataformatas"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED);
+ public static final AttributeName FILL_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_VARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-variant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_STRETCH = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-stretch"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FRAMESPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("framespacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KERNELMATRIX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelmatrix", "kernelMatrix"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWSDELETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsdelete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSELEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFORMCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onformchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCELLCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncellchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEWHEEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousewheel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONMOUSEENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONAFTERPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFORECOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARGINHEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginheight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKERHEIGHT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerheight", "markerHeight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MARKER_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LENGTHADJUST = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("lengthadjust", "lengthAdjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNSELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNICODE_BIDI = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-bidi"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNITS_PER_EM = new AttributeName(ALL_NO_NS, SAME_LOCAL("units-per-em"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WORD_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("word-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName WRITING_MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("writing-mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERNUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patternunits", "patternUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPREADMETHOD = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("spreadmethod", "spreadMethod"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SURFACESCALE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("surfacescale", "surfaceScale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STDDEVIATION = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stddeviation", "stdDeviation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STOP_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_HASPOPUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-haspopup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ACCENT_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_VALUENOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuenow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_RELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-relevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_POSINSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-posinset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_VALUEMAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemax"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_EXPANDED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-expanded"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ATTRIBUTETYPE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributetype", "attributeType"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ATTRIBUTENAME = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributename", "attributeName"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DATATYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-datatype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_VALUEMIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASEFREQUENCY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("basefrequency", "baseFrequency"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLUMNSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CLIPPATHUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("clippathunits", "clipPathUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DEFINITIONURL = new AttributeName(ALL_NO_NS, MATH_DIFFERENT("definitionurl", "definitionURL"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GRADIENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradientunits", "gradientUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FLOOD_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONAFTERUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONERRORUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerrorupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforepaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONLOSECAPTURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onlosecapture"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONSELECTSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselectstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MOVABLELIMITS = new AttributeName(ALL_NO_NS, SAME_LOCAL("movablelimits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LINETHICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("linethickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNICODE_RANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-range"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName THINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERT_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERT_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PRESERVEALPHA = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preservealpha", "preserveAlpha"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCRIPTMINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptminsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPECIFICATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("specification"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XLINK_ACTUATE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:actuate", "actuate"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName XLINK_ARCROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:arcrole", "arcrole"), XLINK_PREFIX, NCNAME_FOREIGN);
+ public static final AttributeName ACCEPT_CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept-charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALIGNMENTSCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignmentscope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_MULTILINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName BASELINE_SHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline-shift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HORIZ_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName HORIZ_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONFILTERCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfilterchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONROWSINSERTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsinserted"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MATHBACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathbackground"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LETTER_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("letter-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LIGHTING_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("lighting-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName THICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName V_MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName POINTER_EVENTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("pointer-events"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PRIMITIVEUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("primitiveunits", "primitiveUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REFERRERPOLICY = new AttributeName(ALL_NO_NS, SAME_LOCAL("referrerpolicy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SYSTEMLANGUAGE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("systemlanguage", "systemLanguage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_LINECAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linecap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SUBSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("subscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DROPEFFECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-dropeffect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_LABELLEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-labelledby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_TEMPLATEID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-templateid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName CONTENTEDITABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("contenteditable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DIFFUSECONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("diffuseconstant", "diffuseConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDATAAVAILABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondataavailable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONCONTROLSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontrolselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName IMAGE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("image-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MEDIUMMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mediummathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName TEXT_DECORATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-decoration"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SHAPE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_LINEJOIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linejoin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REPEAT_TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_DESCRIBEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-describedby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName FONT_SIZE_ADJUST = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size-adjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName KERNELUNITLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelunitlength", "kernelUnitLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONPROPERTYCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpropertychange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDATASETCHANGED = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetchanged"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName MASKCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskcontentunits", "maskContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERNTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterntransform", "patternTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REQUIREDFEATURES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredfeatures", "requiredFeatures"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName RENDERING_INTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rendering-intent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPECULAREXPONENT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularexponent", "specularExponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SPECULARCONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularconstant", "specularConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SUPERSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("superscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_DASHARRAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dasharray"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName XCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("xchannelselector", "xChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName YCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("ychannelselector", "yChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ENABLE_BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("enable-background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName DOMINANT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("dominant-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GRADIENTTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradienttransform", "gradientTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFORDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbefordeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONDATASETCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetcomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OVERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONBEFOREEDITFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeeditfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName LIMITINGCONEANGLE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("limitingconeangle", "limitingConeAngle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_DASHOFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dashoffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STROKE_MITERLIMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-miterlimit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ALIGNMENT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignment-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ONREADYSTATECHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreadystatechange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName OVERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNDERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName REQUIREDEXTENSIONS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredextensions", "requiredExtensions"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_INTERPOLATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName UNDERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PRESERVEASPECTRATIO = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preserveaspectratio", "preserveAspectRatio"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName PATTERNCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterncontentunits", "patternContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_MULTISELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName SCRIPTSIZEMULTIPLIER = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptsizemultiplier"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName ARIA_ACTIVEDESCENDANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-activedescendant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYVERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName VERYVERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRIKETHROUGH_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName STRIKETHROUGH_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPH_ORIENTATION_VERTICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-vertical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName COLOR_INTERPOLATION_FILTERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation-filters"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ public static final AttributeName GLYPH_ORIENTATION_HORIZONTAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-horizontal"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG);
+ private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = {
+ D,
+ K,
+ R,
+ X,
+ Y,
+ Z,
+ BY,
+ CX,
+ CY,
+ DX,
+ DY,
+ G2,
+ G1,
+ FX,
+ FY,
+ K4,
+ K2,
+ K3,
+ K1,
+ ID,
+ IN,
+ U2,
+ U1,
+ RT,
+ RX,
+ RY,
+ TO,
+ Y2,
+ Y1,
+ X1,
+ X2,
+ ALT,
+ DIR,
+ DUR,
+ END,
+ FOR,
+ IN2,
+ MAX,
+ MIN,
+ LOW,
+ REL,
+ REV,
+ SRC,
+ AXIS,
+ ABBR,
+ BBOX,
+ CITE,
+ CODE,
+ BIAS,
+ COLS,
+ CLIP,
+ CHAR,
+ BASE,
+ EDGE,
+ DATA,
+ FILL,
+ FROM,
+ FORM,
+ FACE,
+ HIGH,
+ HREF,
+ OPEN,
+ ICON,
+ NAME,
+ MODE,
+ MASK,
+ LINK,
+ LANG,
+ LOOP,
+ LIST,
+ TYPE,
+ WHEN,
+ WRAP,
+ TEXT,
+ PATH,
+ PING,
+ REFX,
+ REFY,
+ SIZE,
+ SEED,
+ ROWS,
+ SPAN,
+ STEP,
+ ROLE,
+ XREF,
+ ASYNC,
+ ALINK,
+ ALIGN,
+ CLOSE,
+ COLOR,
+ CLASS,
+ CLEAR,
+ BEGIN,
+ DEPTH,
+ DEFER,
+ FENCE,
+ FRAME,
+ ISMAP,
+ ONEND,
+ INDEX,
+ ORDER,
+ OTHER,
+ ONCUT,
+ NARGS,
+ MEDIA,
+ LABEL,
+ LOCAL,
+ WIDTH,
+ TITLE,
+ VLINK,
+ VALUE,
+ SLOPE,
+ SHAPE,
+ SCOPE,
+ SCALE,
+ SPEED,
+ STYLE,
+ RULES,
+ STEMH,
+ SIZES,
+ STEMV,
+ START,
+ XMLNS,
+ ACCEPT,
+ ACCENT,
+ ASCENT,
+ ACTIVE,
+ ALTIMG,
+ ACTION,
+ BORDER,
+ CURSOR,
+ COORDS,
+ FILTER,
+ FORMAT,
+ HIDDEN,
+ HSPACE,
+ HEIGHT,
+ ONMOVE,
+ ONLOAD,
+ ONDRAG,
+ ORIGIN,
+ ONZOOM,
+ ONHELP,
+ ONSTOP,
+ ONDROP,
+ ONBLUR,
+ OBJECT,
+ OFFSET,
+ ORIENT,
+ ONCOPY,
+ NOWRAP,
+ NOHREF,
+ MACROS,
+ METHOD,
+ LOWSRC,
+ LSPACE,
+ LQUOTE,
+ USEMAP,
+ WIDTHS,
+ TARGET,
+ VALUES,
+ VALIGN,
+ VSPACE,
+ POSTER,
+ POINTS,
+ PROMPT,
+ SRCDOC,
+ SCOPED,
+ STRING,
+ SCHEME,
+ STROKE,
+ RADIUS,
+ RESULT,
+ REPEAT,
+ SRCSET,
+ RSPACE,
+ ROTATE,
+ RQUOTE,
+ ALTTEXT,
+ ARCHIVE,
+ AZIMUTH,
+ CLOSURE,
+ CHECKED,
+ CLASSID,
+ CHAROFF,
+ BGCOLOR,
+ COLSPAN,
+ CHARSET,
+ COMPACT,
+ CONTENT,
+ ENCTYPE,
+ DATASRC,
+ DATAFLD,
+ DECLARE,
+ DISPLAY,
+ DIVISOR,
+ DEFAULT,
+ DESCENT,
+ KERNING,
+ HANGING,
+ HEADERS,
+ ONPASTE,
+ ONCLICK,
+ OPTIMUM,
+ ONBEGIN,
+ ONKEYUP,
+ ONFOCUS,
+ ONERROR,
+ ONINPUT,
+ ONABORT,
+ ONSTART,
+ ONRESET,
+ OPACITY,
+ NOSHADE,
+ MINSIZE,
+ MAXSIZE,
+ LARGEOP,
+ UNICODE,
+ TARGETX,
+ TARGETY,
+ VIEWBOX,
+ VERSION,
+ PATTERN,
+ PROFILE,
+ SPACING,
+ RESTART,
+ ROWSPAN,
+ SANDBOX,
+ SUMMARY,
+ STANDBY,
+ REPLACE,
+ AUTOPLAY,
+ ADDITIVE,
+ CALCMODE,
+ CODETYPE,
+ CODEBASE,
+ CONTROLS,
+ BEVELLED,
+ BASELINE,
+ EXPONENT,
+ EDGEMODE,
+ ENCODING,
+ GLYPHREF,
+ DATETIME,
+ DISABLED,
+ FONTSIZE,
+ KEYTIMES,
+ PANOSE_1,
+ HREFLANG,
+ ONRESIZE,
+ ONCHANGE,
+ ONBOUNCE,
+ ONUNLOAD,
+ ONFINISH,
+ ONSCROLL,
+ OPERATOR,
+ OVERFLOW,
+ ONSUBMIT,
+ ONREPEAT,
+ ONSELECT,
+ NOTATION,
+ NORESIZE,
+ MANIFEST,
+ MATHSIZE,
+ MULTIPLE,
+ LONGDESC,
+ LANGUAGE,
+ TEMPLATE,
+ TABINDEX,
+ PROPERTY,
+ READONLY,
+ SELECTED,
+ ROWLINES,
+ SEAMLESS,
+ ROWALIGN,
+ STRETCHY,
+ REQUIRED,
+ XML_BASE,
+ XML_LANG,
+ X_HEIGHT,
+ ARIA_OWNS,
+ AUTOFOCUS,
+ ARIA_SORT,
+ ACCESSKEY,
+ ARIA_BUSY,
+ ARIA_GRAB,
+ AMPLITUDE,
+ ARIA_LIVE,
+ CLIP_RULE,
+ CLIP_PATH,
+ EQUALROWS,
+ ELEVATION,
+ DIRECTION,
+ DRAGGABLE,
+ FILL_RULE,
+ FONTSTYLE,
+ FONT_SIZE,
+ KEYSYSTEM,
+ KEYPOINTS,
+ HIDEFOCUS,
+ ONMESSAGE,
+ INTERCEPT,
+ ONDRAGEND,
+ ONMOVEEND,
+ ONINVALID,
+ INTEGRITY,
+ ONKEYDOWN,
+ ONFOCUSIN,
+ ONMOUSEUP,
+ INPUTMODE,
+ ONROWEXIT,
+ MATHCOLOR,
+ MASKUNITS,
+ MAXLENGTH,
+ LINEBREAK,
+ TRANSFORM,
+ V_HANGING,
+ VALUETYPE,
+ POINTSATZ,
+ POINTSATX,
+ POINTSATY,
+ SYMMETRIC,
+ SCROLLING,
+ REPEATDUR,
+ SELECTION,
+ SEPARATOR,
+ XML_SPACE,
+ AUTOSUBMIT,
+ ALPHABETIC,
+ ACTIONTYPE,
+ ACCUMULATE,
+ ARIA_LEVEL,
+ COLUMNSPAN,
+ CAP_HEIGHT,
+ BACKGROUND,
+ GLYPH_NAME,
+ GROUPALIGN,
+ FONTFAMILY,
+ FONTWEIGHT,
+ FONT_STYLE,
+ KEYSPLINES,
+ HTTP_EQUIV,
+ ONACTIVATE,
+ OCCURRENCE,
+ IRRELEVANT,
+ ONDBLCLICK,
+ ONDRAGDROP,
+ ONKEYPRESS,
+ ONROWENTER,
+ ONDRAGOVER,
+ ONFOCUSOUT,
+ ONMOUSEOUT,
+ NUMOCTAVES,
+ MARKER_MID,
+ MARKER_END,
+ TEXTLENGTH,
+ VISIBILITY,
+ VIEWTARGET,
+ VERT_ADV_Y,
+ PATHLENGTH,
+ REPEAT_MAX,
+ RADIOGROUP,
+ STOP_COLOR,
+ SEPARATORS,
+ REPEAT_MIN,
+ ROWSPACING,
+ ZOOMANDPAN,
+ XLINK_TYPE,
+ XLINK_ROLE,
+ XLINK_HREF,
+ XLINK_SHOW,
+ ACCENTUNDER,
+ ARIA_SECRET,
+ ARIA_ATOMIC,
+ ARIA_HIDDEN,
+ ARIA_FLOWTO,
+ ARABIC_FORM,
+ CELLPADDING,
+ CELLSPACING,
+ COLUMNWIDTH,
+ CROSSORIGIN,
+ COLUMNALIGN,
+ COLUMNLINES,
+ CONTEXTMENU,
+ BASEPROFILE,
+ FONT_FAMILY,
+ FRAMEBORDER,
+ FILTERUNITS,
+ FLOOD_COLOR,
+ FONT_WEIGHT,
+ HORIZ_ADV_X,
+ ONDRAGLEAVE,
+ ONMOUSEMOVE,
+ ORIENTATION,
+ ONMOUSEDOWN,
+ ONMOUSEOVER,
+ ONDRAGENTER,
+ IDEOGRAPHIC,
+ ONBEFORECUT,
+ ONFORMINPUT,
+ ONDRAGSTART,
+ ONMOVESTART,
+ MARKERUNITS,
+ MATHVARIANT,
+ MARGINWIDTH,
+ MARKERWIDTH,
+ TEXT_ANCHOR,
+ TABLEVALUES,
+ SCRIPTLEVEL,
+ REPEATCOUNT,
+ STITCHTILES,
+ STARTOFFSET,
+ SCROLLDELAY,
+ XMLNS_XLINK,
+ XLINK_TITLE,
+ ARIA_INVALID,
+ ARIA_PRESSED,
+ ARIA_CHECKED,
+ AUTOCOMPLETE,
+ ARIA_SETSIZE,
+ ARIA_CHANNEL,
+ EQUALCOLUMNS,
+ DISPLAYSTYLE,
+ DATAFORMATAS,
+ FILL_OPACITY,
+ FONT_VARIANT,
+ FONT_STRETCH,
+ FRAMESPACING,
+ KERNELMATRIX,
+ ONDEACTIVATE,
+ ONROWSDELETE,
+ ONMOUSELEAVE,
+ ONFORMCHANGE,
+ ONCELLCHANGE,
+ ONMOUSEWHEEL,
+ ONMOUSEENTER,
+ ONAFTERPRINT,
+ ONBEFORECOPY,
+ MARGINHEIGHT,
+ MARKERHEIGHT,
+ MARKER_START,
+ MATHEMATICAL,
+ LENGTHADJUST,
+ UNSELECTABLE,
+ UNICODE_BIDI,
+ UNITS_PER_EM,
+ WORD_SPACING,
+ WRITING_MODE,
+ V_ALPHABETIC,
+ PATTERNUNITS,
+ SPREADMETHOD,
+ SURFACESCALE,
+ STROKE_WIDTH,
+ REPEAT_START,
+ STDDEVIATION,
+ STOP_OPACITY,
+ ARIA_CONTROLS,
+ ARIA_HASPOPUP,
+ ACCENT_HEIGHT,
+ ARIA_VALUENOW,
+ ARIA_RELEVANT,
+ ARIA_POSINSET,
+ ARIA_VALUEMAX,
+ ARIA_READONLY,
+ ARIA_SELECTED,
+ ARIA_REQUIRED,
+ ARIA_EXPANDED,
+ ARIA_DISABLED,
+ ATTRIBUTETYPE,
+ ATTRIBUTENAME,
+ ARIA_DATATYPE,
+ ARIA_VALUEMIN,
+ BASEFREQUENCY,
+ COLUMNSPACING,
+ COLOR_PROFILE,
+ CLIPPATHUNITS,
+ DEFINITIONURL,
+ GRADIENTUNITS,
+ FLOOD_OPACITY,
+ ONAFTERUPDATE,
+ ONERRORUPDATE,
+ ONBEFOREPASTE,
+ ONLOSECAPTURE,
+ ONCONTEXTMENU,
+ ONSELECTSTART,
+ ONBEFOREPRINT,
+ MOVABLELIMITS,
+ LINETHICKNESS,
+ UNICODE_RANGE,
+ THINMATHSPACE,
+ VERT_ORIGIN_X,
+ VERT_ORIGIN_Y,
+ V_IDEOGRAPHIC,
+ PRESERVEALPHA,
+ SCRIPTMINSIZE,
+ SPECIFICATION,
+ XLINK_ACTUATE,
+ XLINK_ARCROLE,
+ ACCEPT_CHARSET,
+ ALIGNMENTSCOPE,
+ ARIA_MULTILINE,
+ BASELINE_SHIFT,
+ HORIZ_ORIGIN_X,
+ HORIZ_ORIGIN_Y,
+ ONBEFOREUPDATE,
+ ONFILTERCHANGE,
+ ONROWSINSERTED,
+ ONBEFOREUNLOAD,
+ MATHBACKGROUND,
+ LETTER_SPACING,
+ LIGHTING_COLOR,
+ THICKMATHSPACE,
+ TEXT_RENDERING,
+ V_MATHEMATICAL,
+ POINTER_EVENTS,
+ PRIMITIVEUNITS,
+ REFERRERPOLICY,
+ SYSTEMLANGUAGE,
+ STROKE_LINECAP,
+ SUBSCRIPTSHIFT,
+ STROKE_OPACITY,
+ ARIA_DROPEFFECT,
+ ARIA_LABELLEDBY,
+ ARIA_TEMPLATEID,
+ COLOR_RENDERING,
+ CONTENTEDITABLE,
+ DIFFUSECONSTANT,
+ ONDATAAVAILABLE,
+ ONCONTROLSELECT,
+ IMAGE_RENDERING,
+ MEDIUMMATHSPACE,
+ TEXT_DECORATION,
+ SHAPE_RENDERING,
+ STROKE_LINEJOIN,
+ REPEAT_TEMPLATE,
+ ARIA_DESCRIBEDBY,
+ FONT_SIZE_ADJUST,
+ KERNELUNITLENGTH,
+ ONBEFOREACTIVATE,
+ ONPROPERTYCHANGE,
+ ONDATASETCHANGED,
+ MASKCONTENTUNITS,
+ PATTERNTRANSFORM,
+ REQUIREDFEATURES,
+ RENDERING_INTENT,
+ SPECULAREXPONENT,
+ SPECULARCONSTANT,
+ SUPERSCRIPTSHIFT,
+ STROKE_DASHARRAY,
+ XCHANNELSELECTOR,
+ YCHANNELSELECTOR,
+ ARIA_AUTOCOMPLETE,
+ ENABLE_BACKGROUND,
+ DOMINANT_BASELINE,
+ GRADIENTTRANSFORM,
+ ONBEFORDEACTIVATE,
+ ONDATASETCOMPLETE,
+ OVERLINE_POSITION,
+ ONBEFOREEDITFOCUS,
+ LIMITINGCONEANGLE,
+ VERYTHINMATHSPACE,
+ STROKE_DASHOFFSET,
+ STROKE_MITERLIMIT,
+ ALIGNMENT_BASELINE,
+ ONREADYSTATECHANGE,
+ OVERLINE_THICKNESS,
+ UNDERLINE_POSITION,
+ VERYTHICKMATHSPACE,
+ REQUIREDEXTENSIONS,
+ COLOR_INTERPOLATION,
+ UNDERLINE_THICKNESS,
+ PRESERVEASPECTRATIO,
+ PATTERNCONTENTUNITS,
+ ARIA_MULTISELECTABLE,
+ SCRIPTSIZEMULTIPLIER,
+ ARIA_ACTIVEDESCENDANT,
+ VERYVERYTHINMATHSPACE,
+ VERYVERYTHICKMATHSPACE,
+ STRIKETHROUGH_POSITION,
+ STRIKETHROUGH_THICKNESS,
+ GLYPH_ORIENTATION_VERTICAL,
+ COLOR_INTERPOLATION_FILTERS,
+ GLYPH_ORIENTATION_HORIZONTAL,
+ };
+ private final static int[] ATTRIBUTE_HASHES = {
+ 1153,
+ 1383,
+ 1601,
+ 1793,
+ 1827,
+ 1857,
+ 68600,
+ 69146,
+ 69177,
+ 70237,
+ 70270,
+ 71572,
+ 71669,
+ 72415,
+ 72444,
+ 74846,
+ 74904,
+ 74943,
+ 75001,
+ 75276,
+ 75590,
+ 84742,
+ 84839,
+ 85575,
+ 85963,
+ 85992,
+ 87204,
+ 88074,
+ 88171,
+ 89130,
+ 89163,
+ 3207892,
+ 3283895,
+ 3284791,
+ 3338752,
+ 3358197,
+ 3369562,
+ 3539124,
+ 3562402,
+ 3574260,
+ 3670335,
+ 3696933,
+ 3721879,
+ 135280021,
+ 135346322,
+ 136317019,
+ 136475749,
+ 136548517,
+ 136652214,
+ 136884919,
+ 136902418,
+ 136942992,
+ 137292068,
+ 139120259,
+ 139785574,
+ 142250603,
+ 142314056,
+ 142331176,
+ 142519584,
+ 144752417,
+ 145106895,
+ 146147200,
+ 146765926,
+ 148805544,
+ 149655723,
+ 149809441,
+ 150018784,
+ 150445028,
+ 150813181,
+ 150923321,
+ 152528754,
+ 152536216,
+ 152647366,
+ 152962785,
+ 155219321,
+ 155654904,
+ 157317483,
+ 157350248,
+ 157437941,
+ 157447478,
+ 157604838,
+ 157685404,
+ 157894402,
+ 158315188,
+ 166078431,
+ 169409980,
+ 169700259,
+ 169856932,
+ 170007032,
+ 170409695,
+ 170466488,
+ 170513710,
+ 170608367,
+ 173028944,
+ 173896963,
+ 176090625,
+ 176129212,
+ 179390001,
+ 179489057,
+ 179627464,
+ 179840468,
+ 179849042,
+ 180004216,
+ 181779081,
+ 183027151,
+ 183645319,
+ 183698797,
+ 185922012,
+ 185997252,
+ 188312483,
+ 188675799,
+ 190977533,
+ 190992569,
+ 191006194,
+ 191033518,
+ 191038774,
+ 191096249,
+ 191166163,
+ 191194426,
+ 191443343,
+ 191522106,
+ 191568039,
+ 200104642,
+ 202506661,
+ 202537381,
+ 202602917,
+ 203070590,
+ 203120766,
+ 203389054,
+ 203690071,
+ 203971238,
+ 203986524,
+ 209040857,
+ 209125756,
+ 212055489,
+ 212322418,
+ 212746849,
+ 213002877,
+ 213055164,
+ 213088023,
+ 213259873,
+ 213273386,
+ 213435118,
+ 213437318,
+ 213438231,
+ 213493071,
+ 213532268,
+ 213542834,
+ 213584431,
+ 213659891,
+ 215285828,
+ 215880731,
+ 216112976,
+ 216684637,
+ 217369699,
+ 217565298,
+ 217576549,
+ 218186795,
+ 219743185,
+ 220082234,
+ 221623802,
+ 221986406,
+ 222283890,
+ 223089542,
+ 223138630,
+ 223311265,
+ 224431494,
+ 224547358,
+ 224587256,
+ 224589550,
+ 224655650,
+ 224785518,
+ 224810917,
+ 224813302,
+ 225126263,
+ 225429618,
+ 225432950,
+ 225440869,
+ 236107233,
+ 236709921,
+ 236838947,
+ 237117095,
+ 237143271,
+ 237172455,
+ 237209953,
+ 237354143,
+ 237372743,
+ 237668065,
+ 237703073,
+ 237714273,
+ 239743521,
+ 240512803,
+ 240522627,
+ 240560417,
+ 240656513,
+ 241015715,
+ 241062755,
+ 241065383,
+ 243523041,
+ 245865199,
+ 246261793,
+ 246556195,
+ 246774817,
+ 246923491,
+ 246928419,
+ 246981667,
+ 247014847,
+ 247058369,
+ 247112833,
+ 247118177,
+ 247119137,
+ 247128739,
+ 247316903,
+ 249533729,
+ 250235623,
+ 250269543,
+ 251402351,
+ 252339047,
+ 253260911,
+ 253293679,
+ 254844367,
+ 255547879,
+ 256077281,
+ 256345377,
+ 258124199,
+ 258354465,
+ 258605063,
+ 258744193,
+ 258845603,
+ 258856961,
+ 258926689,
+ 269869248,
+ 270174334,
+ 270709417,
+ 270778994,
+ 270781796,
+ 271102503,
+ 271478858,
+ 271490090,
+ 272870654,
+ 273335275,
+ 273369140,
+ 273924313,
+ 274108530,
+ 274116736,
+ 276818662,
+ 277476156,
+ 279156579,
+ 279349675,
+ 280108533,
+ 280128712,
+ 280132869,
+ 280162403,
+ 280280292,
+ 280413430,
+ 280506130,
+ 280677397,
+ 280678580,
+ 280686710,
+ 280689066,
+ 282736758,
+ 283110901,
+ 283275116,
+ 283823226,
+ 283890012,
+ 284479340,
+ 284606461,
+ 286700477,
+ 286798916,
+ 290055764,
+ 291557706,
+ 291665349,
+ 291804100,
+ 292138018,
+ 292166446,
+ 292418738,
+ 292451039,
+ 300298041,
+ 300374839,
+ 300597935,
+ 303073389,
+ 303083839,
+ 303266673,
+ 303354997,
+ 303430688,
+ 303576261,
+ 303724281,
+ 303819694,
+ 304242723,
+ 304382625,
+ 306247792,
+ 307227811,
+ 307468786,
+ 307724489,
+ 310252031,
+ 310358241,
+ 310373094,
+ 310833159,
+ 311015256,
+ 313357609,
+ 313683893,
+ 313701861,
+ 313706996,
+ 313707317,
+ 313710350,
+ 313795700,
+ 314027746,
+ 314038181,
+ 314091299,
+ 314205627,
+ 314233813,
+ 316741830,
+ 316797986,
+ 317486755,
+ 317794164,
+ 320076137,
+ 322657125,
+ 322887778,
+ 323506876,
+ 323572412,
+ 323605180,
+ 325060058,
+ 325320188,
+ 325398738,
+ 325541490,
+ 325671619,
+ 333868843,
+ 336806130,
+ 337212108,
+ 337282686,
+ 337285434,
+ 337585223,
+ 338036037,
+ 338298087,
+ 338566051,
+ 340943551,
+ 341190970,
+ 342995704,
+ 343352124,
+ 343912673,
+ 344585053,
+ 346977248,
+ 347218098,
+ 347262163,
+ 347278576,
+ 347438191,
+ 347655959,
+ 347684788,
+ 347726430,
+ 347727772,
+ 347776035,
+ 347776629,
+ 349500753,
+ 350880161,
+ 350887073,
+ 353384123,
+ 355496998,
+ 355906922,
+ 355979793,
+ 356545959,
+ 358637867,
+ 358905016,
+ 359164318,
+ 359247286,
+ 359350571,
+ 359579447,
+ 365560330,
+ 367399355,
+ 367420285,
+ 367510727,
+ 368013212,
+ 370234760,
+ 370353345,
+ 370710317,
+ 371074566,
+ 371122285,
+ 371194213,
+ 371448425,
+ 371448430,
+ 371545055,
+ 371593469,
+ 371596922,
+ 371758751,
+ 371964792,
+ 372151328,
+ 376550136,
+ 376710172,
+ 376795771,
+ 376826271,
+ 376906556,
+ 380514830,
+ 380774774,
+ 380775037,
+ 381030322,
+ 381136500,
+ 381281631,
+ 381282269,
+ 381285504,
+ 381330595,
+ 381331422,
+ 381335911,
+ 381336484,
+ 383907298,
+ 383917408,
+ 384595009,
+ 384595013,
+ 387799894,
+ 387823201,
+ 392581647,
+ 392584937,
+ 392742684,
+ 392906485,
+ 393003349,
+ 400644707,
+ 400973830,
+ 404428547,
+ 404432113,
+ 404432865,
+ 404469244,
+ 404478897,
+ 404694860,
+ 406887479,
+ 408294949,
+ 408789955,
+ 410022510,
+ 410467324,
+ 410586448,
+ 410945965,
+ 411845275,
+ 414327152,
+ 414327932,
+ 414329781,
+ 414346257,
+ 414346439,
+ 414639928,
+ 414835998,
+ 414894517,
+ 414986533,
+ 417465377,
+ 417465381,
+ 417492216,
+ 418259232,
+ 419310946,
+ 420103495,
+ 420242342,
+ 420380455,
+ 420658662,
+ 420717432,
+ 423183880,
+ 424539259,
+ 425929170,
+ 425972964,
+ 426050649,
+ 426126450,
+ 426142833,
+ 426607922,
+ 437289840,
+ 437347469,
+ 437412335,
+ 437423943,
+ 437455540,
+ 437462252,
+ 437597991,
+ 437617485,
+ 437986305,
+ 437986507,
+ 437986828,
+ 437987072,
+ 438015591,
+ 438034813,
+ 438038966,
+ 438179623,
+ 438347971,
+ 438483573,
+ 438547062,
+ 438895551,
+ 441592676,
+ 442032555,
+ 443548979,
+ 447881379,
+ 447881655,
+ 447881895,
+ 447887844,
+ 448416189,
+ 448445746,
+ 448449012,
+ 450942191,
+ 452816744,
+ 453668677,
+ 454434495,
+ 456610076,
+ 456642844,
+ 456738709,
+ 457544600,
+ 459451897,
+ 459680944,
+ 468058810,
+ 468083581,
+ 470964084,
+ 471470955,
+ 471567278,
+ 472267822,
+ 481177859,
+ 481210627,
+ 481435874,
+ 481455115,
+ 481485378,
+ 481490218,
+ 485105638,
+ 486005878,
+ 486383494,
+ 487988916,
+ 488103783,
+ 490661867,
+ 491574090,
+ 491578272,
+ 492891370,
+ 493041952,
+ 493441205,
+ 493582844,
+ 493716979,
+ 504577572,
+ 504740359,
+ 505091638,
+ 505592418,
+ 505656212,
+ 509516275,
+ 514998531,
+ 515571132,
+ 515594682,
+ 518712698,
+ 521362273,
+ 526592419,
+ 526807354,
+ 527348842,
+ 538294791,
+ 544689535,
+ 545535009,
+ 548544752,
+ 548563346,
+ 548595116,
+ 551679010,
+ 558034099,
+ 560329411,
+ 560356209,
+ 560671018,
+ 560671152,
+ 560692590,
+ 560845442,
+ 569212097,
+ 569474241,
+ 572252718,
+ 575326764,
+ 576174758,
+ 576190819,
+ 582099184,
+ 582099438,
+ 582372519,
+ 582558889,
+ 586552164,
+ 591325418,
+ 594231990,
+ 594243961,
+ 605711268,
+ 615672071,
+ 616086845,
+ 621792370,
+ 624879850,
+ 627432831,
+ 640040548,
+ 654392808,
+ 658675477,
+ 659420283,
+ 672891587,
+ 694768102,
+ 705890982,
+ 725543146,
+ 759097578,
+ 761686526,
+ 795383908,
+ 878105336,
+ 908643300,
+ 945213471,
+ };
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java
new file mode 100644
index 000000000..01d76d700
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.NoLength;
+
+import org.xml.sax.SAXException;
+
+/**
+ * A common superclass for tree builders that coalesce their text nodes.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class CoalescingTreeBuilder<T> extends TreeBuilder<T> {
+
+ protected final void accumulateCharacters(@NoLength char[] buf, int start,
+ int length) throws SAXException {
+ System.arraycopy(buf, start, charBuffer, charBufferLen, length);
+ charBufferLen += length;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendCharacters(java.lang.Object, char[], int, int)
+ */
+ @Override protected final void appendCharacters(T parent, char[] buf, int start,
+ int length) throws SAXException {
+ appendCharacters(parent, new String(buf, start, length));
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object)
+ */
+ @Override protected void appendIsindexPrompt(T parent) throws SAXException {
+ appendCharacters(parent, "This is a searchable index. Enter search keywords: ");
+ }
+
+ protected abstract void appendCharacters(T parent, String text) throws SAXException;
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendComment(java.lang.Object, char[], int, int)
+ */
+ @Override final protected void appendComment(T parent, char[] buf, int start,
+ int length) throws SAXException {
+ appendComment(parent, new String(buf, start, length));
+ }
+
+ protected abstract void appendComment(T parent, String comment) throws SAXException;
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendCommentToDocument(char[], int, int)
+ */
+ @Override protected final void appendCommentToDocument(char[] buf, int start,
+ int length) throws SAXException {
+ // TODO Auto-generated method stub
+ appendCommentToDocument(new String(buf, start, length));
+ }
+
+ protected abstract void appendCommentToDocument(String comment) throws SAXException;
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#insertFosterParentedCharacters(char[], int, int, java.lang.Object, java.lang.Object)
+ */
+ @Override protected final void insertFosterParentedCharacters(char[] buf, int start,
+ int length, T table, T stackParent) throws SAXException {
+ insertFosterParentedCharacters(new String(buf, start, length), table, stackParent);
+ }
+
+ protected abstract void insertFosterParentedCharacters(String text, T table, T stackParent) throws SAXException;
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java
new file mode 100644
index 000000000..ee551a737
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java
@@ -0,0 +1,1609 @@
+/*
+ * Copyright (c) 2008-2014 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.util.Arrays;
+
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.annotation.Virtual;
+import nu.validator.htmlparser.common.Interner;
+
+public final class ElementName
+// uncomment when regenerating self
+// implements Comparable<ElementName>
+{
+
+ /**
+ * The mask for extracting the dispatch group.
+ */
+ public static final int GROUP_MASK = 127;
+
+ /**
+ * Indicates that the element is not a pre-interned element. Forbidden
+ * on preinterned elements.
+ */
+ public static final int CUSTOM = (1 << 30);
+
+ /**
+ * Indicates that the element is in the "special" category. This bit
+ * should not be pre-set on MathML or SVG specials--only on HTML specials.
+ */
+ public static final int SPECIAL = (1 << 29);
+
+ /**
+ * The element is foster-parenting. This bit should be pre-set on elements
+ * that are foster-parenting as HTML.
+ */
+ public static final int FOSTER_PARENTING = (1 << 28);
+
+ /**
+ * The element is scoping. This bit should be pre-set on elements
+ * that are scoping as HTML.
+ */
+ public static final int SCOPING = (1 << 27);
+
+ /**
+ * The element is scoping as SVG.
+ */
+ public static final int SCOPING_AS_SVG = (1 << 26);
+
+ /**
+ * The element is scoping as MathML.
+ */
+ public static final int SCOPING_AS_MATHML = (1 << 25);
+
+ /**
+ * The element is an HTML integration point.
+ */
+ public static final int HTML_INTEGRATION_POINT = (1 << 24);
+
+ /**
+ * The element has an optional end tag.
+ */
+ public static final int OPTIONAL_END_TAG = (1 << 23);
+
+ public static final ElementName NULL_ELEMENT_NAME = new ElementName(null);
+
+ public final @Local String name;
+
+ public final @Local String camelCaseName;
+
+ /**
+ * The lowest 7 bits are the dispatch group. The high bits are flags.
+ */
+ public final int flags;
+
+ @Inline public int getFlags() {
+ return flags;
+ }
+
+ public int getGroup() {
+ return flags & GROUP_MASK;
+ }
+
+ public boolean isCustom() {
+ return (flags & CUSTOM) != 0;
+ }
+
+ static ElementName elementNameByBuffer(@NoLength char[] buf, int offset, int length, Interner interner) {
+ int hash = ElementName.bufToHash(buf, length);
+ int index = Arrays.binarySearch(ElementName.ELEMENT_HASHES, hash);
+ if (index < 0) {
+ return new ElementName(Portability.newLocalNameFromBuffer(buf, offset, length, interner));
+ } else {
+ ElementName elementName = ElementName.ELEMENT_NAMES[index];
+ @Local String name = elementName.name;
+ if (!Portability.localEqualsBuffer(name, buf, offset, length)) {
+ return new ElementName(Portability.newLocalNameFromBuffer(buf,
+ offset, length, interner));
+ }
+ return elementName;
+ }
+ }
+
+ /**
+ * This method has to return a unique integer for each well-known
+ * lower-cased element name.
+ *
+ * @param buf
+ * @param len
+ * @return
+ */
+ private static int bufToHash(@NoLength char[] buf, int len) {
+ int hash = len;
+ hash <<= 5;
+ hash += buf[0] - 0x60;
+ int j = len;
+ for (int i = 0; i < 4 && j > 0; i++) {
+ j--;
+ hash <<= 5;
+ hash += buf[j] - 0x60;
+ }
+ return hash;
+ }
+
+ private ElementName(@Local String name, @Local String camelCaseName,
+ int flags) {
+ this.name = name;
+ this.camelCaseName = camelCaseName;
+ this.flags = flags;
+ }
+
+ protected ElementName(@Local String name) {
+ this.name = name;
+ this.camelCaseName = name;
+ this.flags = TreeBuilder.OTHER | CUSTOM;
+ }
+
+ @Virtual void release() {
+ // No-op in Java.
+ // Implement as delete this in subclass.
+ // Be sure to release the local name
+ }
+
+ @SuppressWarnings("unused") @Virtual private void destructor() {
+ }
+
+ @Virtual public ElementName cloneElementName(Interner interner) {
+ return this;
+ }
+
+ // START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate
+
+// /**
+// * @see java.lang.Object#toString()
+// */
+// @Override public String toString() {
+// return "(\"" + name + "\", \"" + camelCaseName + "\", " + decomposedFlags() + ")";
+// }
+//
+// private String decomposedFlags() {
+// StringBuilder buf = new StringBuilder("TreeBuilder.");
+// buf.append(treeBuilderGroupToName());
+// if ((flags & SPECIAL) != 0) {
+// buf.append(" | SPECIAL");
+// }
+// if ((flags & FOSTER_PARENTING) != 0) {
+// buf.append(" | FOSTER_PARENTING");
+// }
+// if ((flags & SCOPING) != 0) {
+// buf.append(" | SCOPING");
+// }
+// if ((flags & SCOPING_AS_MATHML) != 0) {
+// buf.append(" | SCOPING_AS_MATHML");
+// }
+// if ((flags & SCOPING_AS_SVG) != 0) {
+// buf.append(" | SCOPING_AS_SVG");
+// }
+// if ((flags & OPTIONAL_END_TAG) != 0) {
+// buf.append(" | OPTIONAL_END_TAG");
+// }
+// return buf.toString();
+// }
+//
+// private String constName() {
+// char[] buf = new char[name.length()];
+// for (int i = 0; i < name.length(); i++) {
+// char c = name.charAt(i);
+// if (c == '-') {
+// buf[i] = '_';
+// } else if (c >= '0' && c <= '9') {
+// buf[i] = c;
+// } else {
+// buf[i] = (char) (c - 0x20);
+// }
+// }
+// return new String(buf);
+// }
+//
+// private int hash() {
+// return bufToHash(name.toCharArray(), name.length());
+// }
+//
+// public int compareTo(ElementName other) {
+// int thisHash = this.hash();
+// int otherHash = other.hash();
+// if (thisHash < otherHash) {
+// return -1;
+// } else if (thisHash == otherHash) {
+// return 0;
+// } else {
+// return 1;
+// }
+// }
+//
+// private String treeBuilderGroupToName() {
+// switch (getGroup()) {
+// case TreeBuilder.OTHER:
+// return "OTHER";
+// case TreeBuilder.A:
+// return "A";
+// case TreeBuilder.BASE:
+// return "BASE";
+// case TreeBuilder.BODY:
+// return "BODY";
+// case TreeBuilder.BR:
+// return "BR";
+// case TreeBuilder.BUTTON:
+// return "BUTTON";
+// case TreeBuilder.CAPTION:
+// return "CAPTION";
+// case TreeBuilder.COL:
+// return "COL";
+// case TreeBuilder.COLGROUP:
+// return "COLGROUP";
+// case TreeBuilder.FONT:
+// return "FONT";
+// case TreeBuilder.FORM:
+// return "FORM";
+// case TreeBuilder.FRAME:
+// return "FRAME";
+// case TreeBuilder.FRAMESET:
+// return "FRAMESET";
+// case TreeBuilder.IMAGE:
+// return "IMAGE";
+// case TreeBuilder.INPUT:
+// return "INPUT";
+// case TreeBuilder.ISINDEX:
+// return "ISINDEX";
+// case TreeBuilder.LI:
+// return "LI";
+// case TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND:
+// return "LINK_OR_BASEFONT_OR_BGSOUND";
+// case TreeBuilder.MATH:
+// return "MATH";
+// case TreeBuilder.META:
+// return "META";
+// case TreeBuilder.SVG:
+// return "SVG";
+// case TreeBuilder.HEAD:
+// return "HEAD";
+// case TreeBuilder.HR:
+// return "HR";
+// case TreeBuilder.HTML:
+// return "HTML";
+// case TreeBuilder.KEYGEN:
+// return "KEYGEN";
+// case TreeBuilder.NOBR:
+// return "NOBR";
+// case TreeBuilder.NOFRAMES:
+// return "NOFRAMES";
+// case TreeBuilder.NOSCRIPT:
+// return "NOSCRIPT";
+// case TreeBuilder.OPTGROUP:
+// return "OPTGROUP";
+// case TreeBuilder.OPTION:
+// return "OPTION";
+// case TreeBuilder.P:
+// return "P";
+// case TreeBuilder.PLAINTEXT:
+// return "PLAINTEXT";
+// case TreeBuilder.SCRIPT:
+// return "SCRIPT";
+// case TreeBuilder.SELECT:
+// return "SELECT";
+// case TreeBuilder.STYLE:
+// return "STYLE";
+// case TreeBuilder.TABLE:
+// return "TABLE";
+// case TreeBuilder.TEXTAREA:
+// return "TEXTAREA";
+// case TreeBuilder.TITLE:
+// return "TITLE";
+// case TreeBuilder.TEMPLATE:
+// return "TEMPLATE";
+// case TreeBuilder.TR:
+// return "TR";
+// case TreeBuilder.XMP:
+// return "XMP";
+// case TreeBuilder.TBODY_OR_THEAD_OR_TFOOT:
+// return "TBODY_OR_THEAD_OR_TFOOT";
+// case TreeBuilder.TD_OR_TH:
+// return "TD_OR_TH";
+// case TreeBuilder.DD_OR_DT:
+// return "DD_OR_DT";
+// case TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+// return "H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6";
+// case TreeBuilder.OBJECT:
+// return "OBJECT";
+// case TreeBuilder.OUTPUT:
+// return "OUTPUT";
+// case TreeBuilder.MARQUEE_OR_APPLET:
+// return "MARQUEE_OR_APPLET";
+// case TreeBuilder.PRE_OR_LISTING:
+// return "PRE_OR_LISTING";
+// case TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+// return "B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U";
+// case TreeBuilder.UL_OR_OL_OR_DL:
+// return "UL_OR_OL_OR_DL";
+// case TreeBuilder.IFRAME:
+// return "IFRAME";
+// case TreeBuilder.NOEMBED:
+// return "NOEMBED";
+// case TreeBuilder.EMBED:
+// return "EMBED";
+// case TreeBuilder.IMG:
+// return "IMG";
+// case TreeBuilder.AREA_OR_WBR:
+// return "AREA_OR_WBR";
+// case TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+// return "DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU";
+// case TreeBuilder.FIELDSET:
+// return "FIELDSET";
+// case TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
+// return "ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY";
+// case TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR:
+// return "RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR";
+// case TreeBuilder.RB_OR_RTC:
+// return "RB_OR_RTC";
+// case TreeBuilder.RT_OR_RP:
+// return "RT_OR_RP";
+// case TreeBuilder.PARAM_OR_SOURCE_OR_TRACK:
+// return "PARAM_OR_SOURCE_OR_TRACK";
+// case TreeBuilder.MGLYPH_OR_MALIGNMARK:
+// return "MGLYPH_OR_MALIGNMARK";
+// case TreeBuilder.MI_MO_MN_MS_MTEXT:
+// return "MI_MO_MN_MS_MTEXT";
+// case TreeBuilder.ANNOTATION_XML:
+// return "ANNOTATION_XML";
+// case TreeBuilder.FOREIGNOBJECT_OR_DESC:
+// return "FOREIGNOBJECT_OR_DESC";
+// case TreeBuilder.MENUITEM:
+// return "MENUITEM";
+// }
+// return null;
+// }
+//
+// /**
+// * Regenerate self
+// *
+// * @param args
+// */
+// public static void main(String[] args) {
+// Arrays.sort(ELEMENT_NAMES);
+// for (int i = 1; i < ELEMENT_NAMES.length; i++) {
+// if (ELEMENT_NAMES[i].hash() == ELEMENT_NAMES[i - 1].hash()) {
+// System.err.println("Hash collision: " + ELEMENT_NAMES[i].name
+// + ", " + ELEMENT_NAMES[i - 1].name);
+// return;
+// }
+// }
+// for (int i = 0; i < ELEMENT_NAMES.length; i++) {
+// ElementName el = ELEMENT_NAMES[i];
+// System.out.println("public static final ElementName "
+// + el.constName() + " = new ElementName" + el.toString()
+// + ";");
+// }
+// System.out.println("private final static @NoLength ElementName[] ELEMENT_NAMES = {");
+// for (int i = 0; i < ELEMENT_NAMES.length; i++) {
+// ElementName el = ELEMENT_NAMES[i];
+// System.out.println(el.constName() + ",");
+// }
+// System.out.println("};");
+// System.out.println("private final static int[] ELEMENT_HASHES = {");
+// for (int i = 0; i < ELEMENT_NAMES.length; i++) {
+// ElementName el = ELEMENT_NAMES[i];
+// System.out.println(Integer.toString(el.hash()) + ",");
+// }
+// System.out.println("};");
+// }
+
+ // START GENERATED CODE
+ public static final ElementName A = new ElementName("a", "a", TreeBuilder.A);
+ public static final ElementName B = new ElementName("b", "b", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName G = new ElementName("g", "g", TreeBuilder.OTHER);
+ public static final ElementName I = new ElementName("i", "i", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName P = new ElementName("p", "p", TreeBuilder.P | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName Q = new ElementName("q", "q", TreeBuilder.OTHER);
+ public static final ElementName S = new ElementName("s", "s", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName U = new ElementName("u", "u", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName BR = new ElementName("br", "br", TreeBuilder.BR | SPECIAL);
+ public static final ElementName CI = new ElementName("ci", "ci", TreeBuilder.OTHER);
+ public static final ElementName CN = new ElementName("cn", "cn", TreeBuilder.OTHER);
+ public static final ElementName DD = new ElementName("dd", "dd", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName DL = new ElementName("dl", "dl", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL);
+ public static final ElementName DT = new ElementName("dt", "dt", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName EM = new ElementName("em", "em", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName EQ = new ElementName("eq", "eq", TreeBuilder.OTHER);
+ public static final ElementName FN = new ElementName("fn", "fn", TreeBuilder.OTHER);
+ public static final ElementName H1 = new ElementName("h1", "h1", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H2 = new ElementName("h2", "h2", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H3 = new ElementName("h3", "h3", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H4 = new ElementName("h4", "h4", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H5 = new ElementName("h5", "h5", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName H6 = new ElementName("h6", "h6", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL);
+ public static final ElementName GT = new ElementName("gt", "gt", TreeBuilder.OTHER);
+ public static final ElementName HR = new ElementName("hr", "hr", TreeBuilder.HR | SPECIAL);
+ public static final ElementName IN = new ElementName("in", "in", TreeBuilder.OTHER);
+ public static final ElementName LI = new ElementName("li", "li", TreeBuilder.LI | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName LN = new ElementName("ln", "ln", TreeBuilder.OTHER);
+ public static final ElementName LT = new ElementName("lt", "lt", TreeBuilder.OTHER);
+ public static final ElementName MI = new ElementName("mi", "mi", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName MN = new ElementName("mn", "mn", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName MO = new ElementName("mo", "mo", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName MS = new ElementName("ms", "ms", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName OL = new ElementName("ol", "ol", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL);
+ public static final ElementName OR = new ElementName("or", "or", TreeBuilder.OTHER);
+ public static final ElementName PI = new ElementName("pi", "pi", TreeBuilder.OTHER);
+ public static final ElementName RB = new ElementName("rb", "rb", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG);
+ public static final ElementName RP = new ElementName("rp", "rp", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG);
+ public static final ElementName RT = new ElementName("rt", "rt", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG);
+ public static final ElementName TD = new ElementName("td", "td", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG);
+ public static final ElementName TH = new ElementName("th", "th", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG);
+ public static final ElementName TR = new ElementName("tr", "tr", TreeBuilder.TR | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName TT = new ElementName("tt", "tt", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName UL = new ElementName("ul", "ul", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL);
+ public static final ElementName AND = new ElementName("and", "and", TreeBuilder.OTHER);
+ public static final ElementName ARG = new ElementName("arg", "arg", TreeBuilder.OTHER);
+ public static final ElementName ABS = new ElementName("abs", "abs", TreeBuilder.OTHER);
+ public static final ElementName BIG = new ElementName("big", "big", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName BDO = new ElementName("bdo", "bdo", TreeBuilder.OTHER);
+ public static final ElementName CSC = new ElementName("csc", "csc", TreeBuilder.OTHER);
+ public static final ElementName COL = new ElementName("col", "col", TreeBuilder.COL | SPECIAL);
+ public static final ElementName COS = new ElementName("cos", "cos", TreeBuilder.OTHER);
+ public static final ElementName COT = new ElementName("cot", "cot", TreeBuilder.OTHER);
+ public static final ElementName DEL = new ElementName("del", "del", TreeBuilder.OTHER);
+ public static final ElementName DFN = new ElementName("dfn", "dfn", TreeBuilder.OTHER);
+ public static final ElementName DIR = new ElementName("dir", "dir", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName DIV = new ElementName("div", "div", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName EXP = new ElementName("exp", "exp", TreeBuilder.OTHER);
+ public static final ElementName GCD = new ElementName("gcd", "gcd", TreeBuilder.OTHER);
+ public static final ElementName GEQ = new ElementName("geq", "geq", TreeBuilder.OTHER);
+ public static final ElementName IMG = new ElementName("img", "img", TreeBuilder.IMG | SPECIAL);
+ public static final ElementName INS = new ElementName("ins", "ins", TreeBuilder.OTHER);
+ public static final ElementName INT = new ElementName("int", "int", TreeBuilder.OTHER);
+ public static final ElementName KBD = new ElementName("kbd", "kbd", TreeBuilder.OTHER);
+ public static final ElementName LOG = new ElementName("log", "log", TreeBuilder.OTHER);
+ public static final ElementName LCM = new ElementName("lcm", "lcm", TreeBuilder.OTHER);
+ public static final ElementName LEQ = new ElementName("leq", "leq", TreeBuilder.OTHER);
+ public static final ElementName MTD = new ElementName("mtd", "mtd", TreeBuilder.OTHER);
+ public static final ElementName MIN = new ElementName("min", "min", TreeBuilder.OTHER);
+ public static final ElementName MAP = new ElementName("map", "map", TreeBuilder.OTHER);
+ public static final ElementName MTR = new ElementName("mtr", "mtr", TreeBuilder.OTHER);
+ public static final ElementName MAX = new ElementName("max", "max", TreeBuilder.OTHER);
+ public static final ElementName NEQ = new ElementName("neq", "neq", TreeBuilder.OTHER);
+ public static final ElementName NOT = new ElementName("not", "not", TreeBuilder.OTHER);
+ public static final ElementName NAV = new ElementName("nav", "nav", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName PRE = new ElementName("pre", "pre", TreeBuilder.PRE_OR_LISTING | SPECIAL);
+ public static final ElementName RTC = new ElementName("rtc", "rtc", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG);
+ public static final ElementName REM = new ElementName("rem", "rem", TreeBuilder.OTHER);
+ public static final ElementName SUB = new ElementName("sub", "sub", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SEC = new ElementName("sec", "sec", TreeBuilder.OTHER);
+ public static final ElementName SVG = new ElementName("svg", "svg", TreeBuilder.SVG);
+ public static final ElementName SUM = new ElementName("sum", "sum", TreeBuilder.OTHER);
+ public static final ElementName SIN = new ElementName("sin", "sin", TreeBuilder.OTHER);
+ public static final ElementName SEP = new ElementName("sep", "sep", TreeBuilder.OTHER);
+ public static final ElementName SUP = new ElementName("sup", "sup", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SET = new ElementName("set", "set", TreeBuilder.OTHER);
+ public static final ElementName TAN = new ElementName("tan", "tan", TreeBuilder.OTHER);
+ public static final ElementName USE = new ElementName("use", "use", TreeBuilder.OTHER);
+ public static final ElementName VAR = new ElementName("var", "var", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName WBR = new ElementName("wbr", "wbr", TreeBuilder.AREA_OR_WBR | SPECIAL);
+ public static final ElementName XMP = new ElementName("xmp", "xmp", TreeBuilder.XMP | SPECIAL);
+ public static final ElementName XOR = new ElementName("xor", "xor", TreeBuilder.OTHER);
+ public static final ElementName AREA = new ElementName("area", "area", TreeBuilder.AREA_OR_WBR | SPECIAL);
+ public static final ElementName ABBR = new ElementName("abbr", "abbr", TreeBuilder.OTHER);
+ public static final ElementName BASE = new ElementName("base", "base", TreeBuilder.BASE | SPECIAL);
+ public static final ElementName BVAR = new ElementName("bvar", "bvar", TreeBuilder.OTHER);
+ public static final ElementName BODY = new ElementName("body", "body", TreeBuilder.BODY | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName CARD = new ElementName("card", "card", TreeBuilder.OTHER);
+ public static final ElementName CODE = new ElementName("code", "code", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName CITE = new ElementName("cite", "cite", TreeBuilder.OTHER);
+ public static final ElementName CSCH = new ElementName("csch", "csch", TreeBuilder.OTHER);
+ public static final ElementName COSH = new ElementName("cosh", "cosh", TreeBuilder.OTHER);
+ public static final ElementName COTH = new ElementName("coth", "coth", TreeBuilder.OTHER);
+ public static final ElementName CURL = new ElementName("curl", "curl", TreeBuilder.OTHER);
+ public static final ElementName DESC = new ElementName("desc", "desc", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG);
+ public static final ElementName DIFF = new ElementName("diff", "diff", TreeBuilder.OTHER);
+ public static final ElementName DEFS = new ElementName("defs", "defs", TreeBuilder.OTHER);
+ public static final ElementName FORM = new ElementName("form", "form", TreeBuilder.FORM | SPECIAL);
+ public static final ElementName FONT = new ElementName("font", "font", TreeBuilder.FONT);
+ public static final ElementName GRAD = new ElementName("grad", "grad", TreeBuilder.OTHER);
+ public static final ElementName HEAD = new ElementName("head", "head", TreeBuilder.HEAD | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName HTML = new ElementName("html", "html", TreeBuilder.HTML | SPECIAL | SCOPING | OPTIONAL_END_TAG);
+ public static final ElementName LINE = new ElementName("line", "line", TreeBuilder.OTHER);
+ public static final ElementName LINK = new ElementName("link", "link", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
+ public static final ElementName LIST = new ElementName("list", "list", TreeBuilder.OTHER);
+ public static final ElementName META = new ElementName("meta", "meta", TreeBuilder.META | SPECIAL);
+ public static final ElementName MSUB = new ElementName("msub", "msub", TreeBuilder.OTHER);
+ public static final ElementName MODE = new ElementName("mode", "mode", TreeBuilder.OTHER);
+ public static final ElementName MATH = new ElementName("math", "math", TreeBuilder.MATH);
+ public static final ElementName MARK = new ElementName("mark", "mark", TreeBuilder.OTHER);
+ public static final ElementName MASK = new ElementName("mask", "mask", TreeBuilder.OTHER);
+ public static final ElementName MEAN = new ElementName("mean", "mean", TreeBuilder.OTHER);
+ public static final ElementName MAIN = new ElementName("main", "main", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName MSUP = new ElementName("msup", "msup", TreeBuilder.OTHER);
+ public static final ElementName MENU = new ElementName("menu", "menu", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName MROW = new ElementName("mrow", "mrow", TreeBuilder.OTHER);
+ public static final ElementName NONE = new ElementName("none", "none", TreeBuilder.OTHER);
+ public static final ElementName NOBR = new ElementName("nobr", "nobr", TreeBuilder.NOBR);
+ public static final ElementName NEST = new ElementName("nest", "nest", TreeBuilder.OTHER);
+ public static final ElementName PATH = new ElementName("path", "path", TreeBuilder.OTHER);
+ public static final ElementName PLUS = new ElementName("plus", "plus", TreeBuilder.OTHER);
+ public static final ElementName RULE = new ElementName("rule", "rule", TreeBuilder.OTHER);
+ public static final ElementName REAL = new ElementName("real", "real", TreeBuilder.OTHER);
+ public static final ElementName RELN = new ElementName("reln", "reln", TreeBuilder.OTHER);
+ public static final ElementName RECT = new ElementName("rect", "rect", TreeBuilder.OTHER);
+ public static final ElementName ROOT = new ElementName("root", "root", TreeBuilder.OTHER);
+ public static final ElementName RUBY = new ElementName("ruby", "ruby", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SECH = new ElementName("sech", "sech", TreeBuilder.OTHER);
+ public static final ElementName SINH = new ElementName("sinh", "sinh", TreeBuilder.OTHER);
+ public static final ElementName SPAN = new ElementName("span", "span", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR);
+ public static final ElementName SAMP = new ElementName("samp", "samp", TreeBuilder.OTHER);
+ public static final ElementName STOP = new ElementName("stop", "stop", TreeBuilder.OTHER);
+ public static final ElementName SDEV = new ElementName("sdev", "sdev", TreeBuilder.OTHER);
+ public static final ElementName TIME = new ElementName("time", "time", TreeBuilder.OTHER);
+ public static final ElementName TRUE = new ElementName("true", "true", TreeBuilder.OTHER);
+ public static final ElementName TREF = new ElementName("tref", "tref", TreeBuilder.OTHER);
+ public static final ElementName TANH = new ElementName("tanh", "tanh", TreeBuilder.OTHER);
+ public static final ElementName TEXT = new ElementName("text", "text", TreeBuilder.OTHER);
+ public static final ElementName VIEW = new ElementName("view", "view", TreeBuilder.OTHER);
+ public static final ElementName ASIDE = new ElementName("aside", "aside", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName AUDIO = new ElementName("audio", "audio", TreeBuilder.OTHER);
+ public static final ElementName APPLY = new ElementName("apply", "apply", TreeBuilder.OTHER);
+ public static final ElementName EMBED = new ElementName("embed", "embed", TreeBuilder.EMBED | SPECIAL);
+ public static final ElementName FRAME = new ElementName("frame", "frame", TreeBuilder.FRAME | SPECIAL);
+ public static final ElementName FALSE = new ElementName("false", "false", TreeBuilder.OTHER);
+ public static final ElementName FLOOR = new ElementName("floor", "floor", TreeBuilder.OTHER);
+ public static final ElementName GLYPH = new ElementName("glyph", "glyph", TreeBuilder.OTHER);
+ public static final ElementName HKERN = new ElementName("hkern", "hkern", TreeBuilder.OTHER);
+ public static final ElementName IMAGE = new ElementName("image", "image", TreeBuilder.IMAGE);
+ public static final ElementName IDENT = new ElementName("ident", "ident", TreeBuilder.OTHER);
+ public static final ElementName INPUT = new ElementName("input", "input", TreeBuilder.INPUT | SPECIAL);
+ public static final ElementName LABEL = new ElementName("label", "label", TreeBuilder.OTHER);
+ public static final ElementName LIMIT = new ElementName("limit", "limit", TreeBuilder.OTHER);
+ public static final ElementName MFRAC = new ElementName("mfrac", "mfrac", TreeBuilder.OTHER);
+ public static final ElementName MPATH = new ElementName("mpath", "mpath", TreeBuilder.OTHER);
+ public static final ElementName METER = new ElementName("meter", "meter", TreeBuilder.OTHER);
+ public static final ElementName MOVER = new ElementName("mover", "mover", TreeBuilder.OTHER);
+ public static final ElementName MINUS = new ElementName("minus", "minus", TreeBuilder.OTHER);
+ public static final ElementName MROOT = new ElementName("mroot", "mroot", TreeBuilder.OTHER);
+ public static final ElementName MSQRT = new ElementName("msqrt", "msqrt", TreeBuilder.OTHER);
+ public static final ElementName MTEXT = new ElementName("mtext", "mtext", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML);
+ public static final ElementName NOTIN = new ElementName("notin", "notin", TreeBuilder.OTHER);
+ public static final ElementName PIECE = new ElementName("piece", "piece", TreeBuilder.OTHER);
+ public static final ElementName PARAM = new ElementName("param", "param", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL);
+ public static final ElementName POWER = new ElementName("power", "power", TreeBuilder.OTHER);
+ public static final ElementName REALS = new ElementName("reals", "reals", TreeBuilder.OTHER);
+ public static final ElementName STYLE = new ElementName("style", "style", TreeBuilder.STYLE | SPECIAL);
+ public static final ElementName SMALL = new ElementName("small", "small", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName THEAD = new ElementName("thead", "thead", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName TABLE = new ElementName("table", "table", TreeBuilder.TABLE | SPECIAL | FOSTER_PARENTING | SCOPING);
+ public static final ElementName TITLE = new ElementName("title", "title", TreeBuilder.TITLE | SPECIAL | SCOPING_AS_SVG);
+ public static final ElementName TRACK = new ElementName("track", "track", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL);
+ public static final ElementName TSPAN = new ElementName("tspan", "tspan", TreeBuilder.OTHER);
+ public static final ElementName TIMES = new ElementName("times", "times", TreeBuilder.OTHER);
+ public static final ElementName TFOOT = new ElementName("tfoot", "tfoot", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName TBODY = new ElementName("tbody", "tbody", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG);
+ public static final ElementName UNION = new ElementName("union", "union", TreeBuilder.OTHER);
+ public static final ElementName VKERN = new ElementName("vkern", "vkern", TreeBuilder.OTHER);
+ public static final ElementName VIDEO = new ElementName("video", "video", TreeBuilder.OTHER);
+ public static final ElementName ARCSEC = new ElementName("arcsec", "arcsec", TreeBuilder.OTHER);
+ public static final ElementName ARCCSC = new ElementName("arccsc", "arccsc", TreeBuilder.OTHER);
+ public static final ElementName ARCTAN = new ElementName("arctan", "arctan", TreeBuilder.OTHER);
+ public static final ElementName ARCSIN = new ElementName("arcsin", "arcsin", TreeBuilder.OTHER);
+ public static final ElementName ARCCOS = new ElementName("arccos", "arccos", TreeBuilder.OTHER);
+ public static final ElementName APPLET = new ElementName("applet", "applet", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING);
+ public static final ElementName ARCCOT = new ElementName("arccot", "arccot", TreeBuilder.OTHER);
+ public static final ElementName APPROX = new ElementName("approx", "approx", TreeBuilder.OTHER);
+ public static final ElementName BUTTON = new ElementName("button", "button", TreeBuilder.BUTTON | SPECIAL);
+ public static final ElementName CIRCLE = new ElementName("circle", "circle", TreeBuilder.OTHER);
+ public static final ElementName CENTER = new ElementName("center", "center", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName CURSOR = new ElementName("cursor", "cursor", TreeBuilder.OTHER);
+ public static final ElementName CANVAS = new ElementName("canvas", "canvas", TreeBuilder.OTHER);
+ public static final ElementName DIVIDE = new ElementName("divide", "divide", TreeBuilder.OTHER);
+ public static final ElementName DEGREE = new ElementName("degree", "degree", TreeBuilder.OTHER);
+ public static final ElementName DOMAIN = new ElementName("domain", "domain", TreeBuilder.OTHER);
+ public static final ElementName EXISTS = new ElementName("exists", "exists", TreeBuilder.OTHER);
+ public static final ElementName FETILE = new ElementName("fetile", "feTile", TreeBuilder.OTHER);
+ public static final ElementName FIGURE = new ElementName("figure", "figure", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName FORALL = new ElementName("forall", "forall", TreeBuilder.OTHER);
+ public static final ElementName FILTER = new ElementName("filter", "filter", TreeBuilder.OTHER);
+ public static final ElementName FOOTER = new ElementName("footer", "footer", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName HGROUP = new ElementName("hgroup", "hgroup", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName HEADER = new ElementName("header", "header", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName IFRAME = new ElementName("iframe", "iframe", TreeBuilder.IFRAME | SPECIAL);
+ public static final ElementName KEYGEN = new ElementName("keygen", "keygen", TreeBuilder.KEYGEN);
+ public static final ElementName LAMBDA = new ElementName("lambda", "lambda", TreeBuilder.OTHER);
+ public static final ElementName LEGEND = new ElementName("legend", "legend", TreeBuilder.OTHER);
+ public static final ElementName MSPACE = new ElementName("mspace", "mspace", TreeBuilder.OTHER);
+ public static final ElementName MTABLE = new ElementName("mtable", "mtable", TreeBuilder.OTHER);
+ public static final ElementName MSTYLE = new ElementName("mstyle", "mstyle", TreeBuilder.OTHER);
+ public static final ElementName MGLYPH = new ElementName("mglyph", "mglyph", TreeBuilder.MGLYPH_OR_MALIGNMARK);
+ public static final ElementName MEDIAN = new ElementName("median", "median", TreeBuilder.OTHER);
+ public static final ElementName MUNDER = new ElementName("munder", "munder", TreeBuilder.OTHER);
+ public static final ElementName MARKER = new ElementName("marker", "marker", TreeBuilder.OTHER);
+ public static final ElementName MERROR = new ElementName("merror", "merror", TreeBuilder.OTHER);
+ public static final ElementName MOMENT = new ElementName("moment", "moment", TreeBuilder.OTHER);
+ public static final ElementName MATRIX = new ElementName("matrix", "matrix", TreeBuilder.OTHER);
+ public static final ElementName OPTION = new ElementName("option", "option", TreeBuilder.OPTION | OPTIONAL_END_TAG);
+ public static final ElementName OBJECT = new ElementName("object", "object", TreeBuilder.OBJECT | SPECIAL | SCOPING);
+ public static final ElementName OUTPUT = new ElementName("output", "output", TreeBuilder.OUTPUT);
+ public static final ElementName PRIMES = new ElementName("primes", "primes", TreeBuilder.OTHER);
+ public static final ElementName SOURCE = new ElementName("source", "source", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK);
+ public static final ElementName STRIKE = new ElementName("strike", "strike", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName STRONG = new ElementName("strong", "strong", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U);
+ public static final ElementName SWITCH = new ElementName("switch", "switch", TreeBuilder.OTHER);
+ public static final ElementName SYMBOL = new ElementName("symbol", "symbol", TreeBuilder.OTHER);
+ public static final ElementName SELECT = new ElementName("select", "select", TreeBuilder.SELECT | SPECIAL);
+ public static final ElementName SUBSET = new ElementName("subset", "subset", TreeBuilder.OTHER);
+ public static final ElementName SCRIPT = new ElementName("script", "script", TreeBuilder.SCRIPT | SPECIAL);
+ public static final ElementName TBREAK = new ElementName("tbreak", "tbreak", TreeBuilder.OTHER);
+ public static final ElementName VECTOR = new ElementName("vector", "vector", TreeBuilder.OTHER);
+ public static final ElementName ARTICLE = new ElementName("article", "article", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName ANIMATE = new ElementName("animate", "animate", TreeBuilder.OTHER);
+ public static final ElementName ARCSECH = new ElementName("arcsech", "arcsech", TreeBuilder.OTHER);
+ public static final ElementName ARCCSCH = new ElementName("arccsch", "arccsch", TreeBuilder.OTHER);
+ public static final ElementName ARCTANH = new ElementName("arctanh", "arctanh", TreeBuilder.OTHER);
+ public static final ElementName ARCSINH = new ElementName("arcsinh", "arcsinh", TreeBuilder.OTHER);
+ public static final ElementName ARCCOSH = new ElementName("arccosh", "arccosh", TreeBuilder.OTHER);
+ public static final ElementName ARCCOTH = new ElementName("arccoth", "arccoth", TreeBuilder.OTHER);
+ public static final ElementName ACRONYM = new ElementName("acronym", "acronym", TreeBuilder.OTHER);
+ public static final ElementName ADDRESS = new ElementName("address", "address", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName BGSOUND = new ElementName("bgsound", "bgsound", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
+ public static final ElementName COMPOSE = new ElementName("compose", "compose", TreeBuilder.OTHER);
+ public static final ElementName CEILING = new ElementName("ceiling", "ceiling", TreeBuilder.OTHER);
+ public static final ElementName CSYMBOL = new ElementName("csymbol", "csymbol", TreeBuilder.OTHER);
+ public static final ElementName CAPTION = new ElementName("caption", "caption", TreeBuilder.CAPTION | SPECIAL | SCOPING);
+ public static final ElementName DISCARD = new ElementName("discard", "discard", TreeBuilder.OTHER);
+ public static final ElementName DECLARE = new ElementName("declare", "declare", TreeBuilder.OTHER);
+ public static final ElementName DETAILS = new ElementName("details", "details", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName ELLIPSE = new ElementName("ellipse", "ellipse", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCA = new ElementName("fefunca", "feFuncA", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCB = new ElementName("fefuncb", "feFuncB", TreeBuilder.OTHER);
+ public static final ElementName FEBLEND = new ElementName("feblend", "feBlend", TreeBuilder.OTHER);
+ public static final ElementName FEFLOOD = new ElementName("feflood", "feFlood", TreeBuilder.OTHER);
+ public static final ElementName FEIMAGE = new ElementName("feimage", "feImage", TreeBuilder.OTHER);
+ public static final ElementName FEMERGE = new ElementName("femerge", "feMerge", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCG = new ElementName("fefuncg", "feFuncG", TreeBuilder.OTHER);
+ public static final ElementName FEFUNCR = new ElementName("fefuncr", "feFuncR", TreeBuilder.OTHER);
+ public static final ElementName HANDLER = new ElementName("handler", "handler", TreeBuilder.OTHER);
+ public static final ElementName INVERSE = new ElementName("inverse", "inverse", TreeBuilder.OTHER);
+ public static final ElementName IMPLIES = new ElementName("implies", "implies", TreeBuilder.OTHER);
+ public static final ElementName ISINDEX = new ElementName("isindex", "isindex", TreeBuilder.ISINDEX | SPECIAL);
+ public static final ElementName LOGBASE = new ElementName("logbase", "logbase", TreeBuilder.OTHER);
+ public static final ElementName LISTING = new ElementName("listing", "listing", TreeBuilder.PRE_OR_LISTING | SPECIAL);
+ public static final ElementName MFENCED = new ElementName("mfenced", "mfenced", TreeBuilder.OTHER);
+ public static final ElementName MPADDED = new ElementName("mpadded", "mpadded", TreeBuilder.OTHER);
+ public static final ElementName MARQUEE = new ElementName("marquee", "marquee", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING);
+ public static final ElementName MACTION = new ElementName("maction", "maction", TreeBuilder.OTHER);
+ public static final ElementName MSUBSUP = new ElementName("msubsup", "msubsup", TreeBuilder.OTHER);
+ public static final ElementName NOEMBED = new ElementName("noembed", "noembed", TreeBuilder.NOEMBED | SPECIAL);
+ public static final ElementName POLYGON = new ElementName("polygon", "polygon", TreeBuilder.OTHER);
+ public static final ElementName PATTERN = new ElementName("pattern", "pattern", TreeBuilder.OTHER);
+ public static final ElementName PICTURE = new ElementName("picture", "picture", TreeBuilder.OTHER);
+ public static final ElementName PRODUCT = new ElementName("product", "product", TreeBuilder.OTHER);
+ public static final ElementName SETDIFF = new ElementName("setdiff", "setdiff", TreeBuilder.OTHER);
+ public static final ElementName SECTION = new ElementName("section", "section", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName SUMMARY = new ElementName("summary", "summary", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName TENDSTO = new ElementName("tendsto", "tendsto", TreeBuilder.OTHER);
+ public static final ElementName UPLIMIT = new ElementName("uplimit", "uplimit", TreeBuilder.OTHER);
+ public static final ElementName ALTGLYPH = new ElementName("altglyph", "altGlyph", TreeBuilder.OTHER);
+ public static final ElementName BASEFONT = new ElementName("basefont", "basefont", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL);
+ public static final ElementName CLIPPATH = new ElementName("clippath", "clipPath", TreeBuilder.OTHER);
+ public static final ElementName CODOMAIN = new ElementName("codomain", "codomain", TreeBuilder.OTHER);
+ public static final ElementName COLGROUP = new ElementName("colgroup", "colgroup", TreeBuilder.COLGROUP | SPECIAL | OPTIONAL_END_TAG);
+ public static final ElementName EMPTYSET = new ElementName("emptyset", "emptyset", TreeBuilder.OTHER);
+ public static final ElementName FACTOROF = new ElementName("factorof", "factorof", TreeBuilder.OTHER);
+ public static final ElementName FIELDSET = new ElementName("fieldset", "fieldset", TreeBuilder.FIELDSET | SPECIAL);
+ public static final ElementName FRAMESET = new ElementName("frameset", "frameset", TreeBuilder.FRAMESET | SPECIAL);
+ public static final ElementName FEOFFSET = new ElementName("feoffset", "feOffset", TreeBuilder.OTHER);
+ public static final ElementName GLYPHREF = new ElementName("glyphref", "glyphRef", TreeBuilder.OTHER);
+ public static final ElementName INTERVAL = new ElementName("interval", "interval", TreeBuilder.OTHER);
+ public static final ElementName INTEGERS = new ElementName("integers", "integers", TreeBuilder.OTHER);
+ public static final ElementName INFINITY = new ElementName("infinity", "infinity", TreeBuilder.OTHER);
+ public static final ElementName LISTENER = new ElementName("listener", "listener", TreeBuilder.OTHER);
+ public static final ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", TreeBuilder.OTHER);
+ public static final ElementName METADATA = new ElementName("metadata", "metadata", TreeBuilder.OTHER);
+ public static final ElementName MENCLOSE = new ElementName("menclose", "menclose", TreeBuilder.OTHER);
+ public static final ElementName MENUITEM = new ElementName("menuitem", "menuitem", TreeBuilder.MENUITEM);
+ public static final ElementName MPHANTOM = new ElementName("mphantom", "mphantom", TreeBuilder.OTHER);
+ public static final ElementName NOFRAMES = new ElementName("noframes", "noframes", TreeBuilder.NOFRAMES | SPECIAL);
+ public static final ElementName NOSCRIPT = new ElementName("noscript", "noscript", TreeBuilder.NOSCRIPT | SPECIAL);
+ public static final ElementName OPTGROUP = new ElementName("optgroup", "optgroup", TreeBuilder.OPTGROUP | OPTIONAL_END_TAG);
+ public static final ElementName POLYLINE = new ElementName("polyline", "polyline", TreeBuilder.OTHER);
+ public static final ElementName PREFETCH = new ElementName("prefetch", "prefetch", TreeBuilder.OTHER);
+ public static final ElementName PROGRESS = new ElementName("progress", "progress", TreeBuilder.OTHER);
+ public static final ElementName PRSUBSET = new ElementName("prsubset", "prsubset", TreeBuilder.OTHER);
+ public static final ElementName QUOTIENT = new ElementName("quotient", "quotient", TreeBuilder.OTHER);
+ public static final ElementName SELECTOR = new ElementName("selector", "selector", TreeBuilder.OTHER);
+ public static final ElementName TEXTAREA = new ElementName("textarea", "textarea", TreeBuilder.TEXTAREA | SPECIAL);
+ public static final ElementName TEMPLATE = new ElementName("template", "template", TreeBuilder.TEMPLATE | SPECIAL | SCOPING);
+ public static final ElementName TEXTPATH = new ElementName("textpath", "textPath", TreeBuilder.OTHER);
+ public static final ElementName VARIANCE = new ElementName("variance", "variance", TreeBuilder.OTHER);
+ public static final ElementName ANIMATION = new ElementName("animation", "animation", TreeBuilder.OTHER);
+ public static final ElementName CONJUGATE = new ElementName("conjugate", "conjugate", TreeBuilder.OTHER);
+ public static final ElementName CONDITION = new ElementName("condition", "condition", TreeBuilder.OTHER);
+ public static final ElementName COMPLEXES = new ElementName("complexes", "complexes", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE = new ElementName("font-face", "font-face", TreeBuilder.OTHER);
+ public static final ElementName FACTORIAL = new ElementName("factorial", "factorial", TreeBuilder.OTHER);
+ public static final ElementName INTERSECT = new ElementName("intersect", "intersect", TreeBuilder.OTHER);
+ public static final ElementName IMAGINARY = new ElementName("imaginary", "imaginary", TreeBuilder.OTHER);
+ public static final ElementName LAPLACIAN = new ElementName("laplacian", "laplacian", TreeBuilder.OTHER);
+ public static final ElementName MATRIXROW = new ElementName("matrixrow", "matrixrow", TreeBuilder.OTHER);
+ public static final ElementName NOTSUBSET = new ElementName("notsubset", "notsubset", TreeBuilder.OTHER);
+ public static final ElementName OTHERWISE = new ElementName("otherwise", "otherwise", TreeBuilder.OTHER);
+ public static final ElementName PIECEWISE = new ElementName("piecewise", "piecewise", TreeBuilder.OTHER);
+ public static final ElementName PLAINTEXT = new ElementName("plaintext", "plaintext", TreeBuilder.PLAINTEXT | SPECIAL);
+ public static final ElementName RATIONALS = new ElementName("rationals", "rationals", TreeBuilder.OTHER);
+ public static final ElementName SEMANTICS = new ElementName("semantics", "semantics", TreeBuilder.OTHER);
+ public static final ElementName TRANSPOSE = new ElementName("transpose", "transpose", TreeBuilder.OTHER);
+ public static final ElementName ANNOTATION = new ElementName("annotation", "annotation", TreeBuilder.OTHER);
+ public static final ElementName BLOCKQUOTE = new ElementName("blockquote", "blockquote", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL);
+ public static final ElementName DIVERGENCE = new ElementName("divergence", "divergence", TreeBuilder.OTHER);
+ public static final ElementName EULERGAMMA = new ElementName("eulergamma", "eulergamma", TreeBuilder.OTHER);
+ public static final ElementName EQUIVALENT = new ElementName("equivalent", "equivalent", TreeBuilder.OTHER);
+ public static final ElementName FIGCAPTION = new ElementName("figcaption", "figcaption", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL);
+ public static final ElementName IMAGINARYI = new ElementName("imaginaryi", "imaginaryi", TreeBuilder.OTHER);
+ public static final ElementName MALIGNMARK = new ElementName("malignmark", "malignmark", TreeBuilder.MGLYPH_OR_MALIGNMARK);
+ public static final ElementName MUNDEROVER = new ElementName("munderover", "munderover", TreeBuilder.OTHER);
+ public static final ElementName MLABELEDTR = new ElementName("mlabeledtr", "mlabeledtr", TreeBuilder.OTHER);
+ public static final ElementName NOTANUMBER = new ElementName("notanumber", "notanumber", TreeBuilder.OTHER);
+ public static final ElementName SOLIDCOLOR = new ElementName("solidcolor", "solidcolor", TreeBuilder.OTHER);
+ public static final ElementName ALTGLYPHDEF = new ElementName("altglyphdef", "altGlyphDef", TreeBuilder.OTHER);
+ public static final ElementName DETERMINANT = new ElementName("determinant", "determinant", TreeBuilder.OTHER);
+ public static final ElementName FEMERGENODE = new ElementName("femergenode", "feMergeNode", TreeBuilder.OTHER);
+ public static final ElementName FECOMPOSITE = new ElementName("fecomposite", "feComposite", TreeBuilder.OTHER);
+ public static final ElementName FESPOTLIGHT = new ElementName("fespotlight", "feSpotLight", TreeBuilder.OTHER);
+ public static final ElementName MALIGNGROUP = new ElementName("maligngroup", "maligngroup", TreeBuilder.OTHER);
+ public static final ElementName MPRESCRIPTS = new ElementName("mprescripts", "mprescripts", TreeBuilder.OTHER);
+ public static final ElementName MOMENTABOUT = new ElementName("momentabout", "momentabout", TreeBuilder.OTHER);
+ public static final ElementName NOTPRSUBSET = new ElementName("notprsubset", "notprsubset", TreeBuilder.OTHER);
+ public static final ElementName PARTIALDIFF = new ElementName("partialdiff", "partialdiff", TreeBuilder.OTHER);
+ public static final ElementName ALTGLYPHITEM = new ElementName("altglyphitem", "altGlyphItem", TreeBuilder.OTHER);
+ public static final ElementName ANIMATECOLOR = new ElementName("animatecolor", "animateColor", TreeBuilder.OTHER);
+ public static final ElementName DATATEMPLATE = new ElementName("datatemplate", "datatemplate", TreeBuilder.OTHER);
+ public static final ElementName EXPONENTIALE = new ElementName("exponentiale", "exponentiale", TreeBuilder.OTHER);
+ public static final ElementName FETURBULENCE = new ElementName("feturbulence", "feTurbulence", TreeBuilder.OTHER);
+ public static final ElementName FEPOINTLIGHT = new ElementName("fepointlight", "fePointLight", TreeBuilder.OTHER);
+ public static final ElementName FEDROPSHADOW = new ElementName("fedropshadow", "feDropShadow", TreeBuilder.OTHER);
+ public static final ElementName FEMORPHOLOGY = new ElementName("femorphology", "feMorphology", TreeBuilder.OTHER);
+ public static final ElementName OUTERPRODUCT = new ElementName("outerproduct", "outerproduct", TreeBuilder.OTHER);
+ public static final ElementName ANIMATEMOTION = new ElementName("animatemotion", "animateMotion", TreeBuilder.OTHER);
+ public static final ElementName COLOR_PROFILE = new ElementName("color-profile", "color-profile", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_SRC = new ElementName("font-face-src", "font-face-src", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_URI = new ElementName("font-face-uri", "font-face-uri", TreeBuilder.OTHER);
+ public static final ElementName FOREIGNOBJECT = new ElementName("foreignobject", "foreignObject", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG);
+ public static final ElementName FECOLORMATRIX = new ElementName("fecolormatrix", "feColorMatrix", TreeBuilder.OTHER);
+ public static final ElementName MISSING_GLYPH = new ElementName("missing-glyph", "missing-glyph", TreeBuilder.OTHER);
+ public static final ElementName MMULTISCRIPTS = new ElementName("mmultiscripts", "mmultiscripts", TreeBuilder.OTHER);
+ public static final ElementName SCALARPRODUCT = new ElementName("scalarproduct", "scalarproduct", TreeBuilder.OTHER);
+ public static final ElementName VECTORPRODUCT = new ElementName("vectorproduct", "vectorproduct", TreeBuilder.OTHER);
+ public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML);
+ public static final ElementName DEFINITION_SRC = new ElementName("definition-src", "definition-src", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_NAME = new ElementName("font-face-name", "font-face-name", TreeBuilder.OTHER);
+ public static final ElementName FEGAUSSIANBLUR = new ElementName("fegaussianblur", "feGaussianBlur", TreeBuilder.OTHER);
+ public static final ElementName FEDISTANTLIGHT = new ElementName("fedistantlight", "feDistantLight", TreeBuilder.OTHER);
+ public static final ElementName LINEARGRADIENT = new ElementName("lineargradient", "linearGradient", TreeBuilder.OTHER);
+ public static final ElementName NATURALNUMBERS = new ElementName("naturalnumbers", "naturalnumbers", TreeBuilder.OTHER);
+ public static final ElementName RADIALGRADIENT = new ElementName("radialgradient", "radialGradient", TreeBuilder.OTHER);
+ public static final ElementName ANIMATETRANSFORM = new ElementName("animatetransform", "animateTransform", TreeBuilder.OTHER);
+ public static final ElementName CARTESIANPRODUCT = new ElementName("cartesianproduct", "cartesianproduct", TreeBuilder.OTHER);
+ public static final ElementName FONT_FACE_FORMAT = new ElementName("font-face-format", "font-face-format", TreeBuilder.OTHER);
+ public static final ElementName FECONVOLVEMATRIX = new ElementName("feconvolvematrix", "feConvolveMatrix", TreeBuilder.OTHER);
+ public static final ElementName FEDIFFUSELIGHTING = new ElementName("fediffuselighting", "feDiffuseLighting", TreeBuilder.OTHER);
+ public static final ElementName FEDISPLACEMENTMAP = new ElementName("fedisplacementmap", "feDisplacementMap", TreeBuilder.OTHER);
+ public static final ElementName FESPECULARLIGHTING = new ElementName("fespecularlighting", "feSpecularLighting", TreeBuilder.OTHER);
+ public static final ElementName DOMAINOFAPPLICATION = new ElementName("domainofapplication", "domainofapplication", TreeBuilder.OTHER);
+ public static final ElementName FECOMPONENTTRANSFER = new ElementName("fecomponenttransfer", "feComponentTransfer", TreeBuilder.OTHER);
+ private final static @NoLength ElementName[] ELEMENT_NAMES = {
+ A,
+ B,
+ G,
+ I,
+ P,
+ Q,
+ S,
+ U,
+ BR,
+ CI,
+ CN,
+ DD,
+ DL,
+ DT,
+ EM,
+ EQ,
+ FN,
+ H1,
+ H2,
+ H3,
+ H4,
+ H5,
+ H6,
+ GT,
+ HR,
+ IN,
+ LI,
+ LN,
+ LT,
+ MI,
+ MN,
+ MO,
+ MS,
+ OL,
+ OR,
+ PI,
+ RB,
+ RP,
+ RT,
+ TD,
+ TH,
+ TR,
+ TT,
+ UL,
+ AND,
+ ARG,
+ ABS,
+ BIG,
+ BDO,
+ CSC,
+ COL,
+ COS,
+ COT,
+ DEL,
+ DFN,
+ DIR,
+ DIV,
+ EXP,
+ GCD,
+ GEQ,
+ IMG,
+ INS,
+ INT,
+ KBD,
+ LOG,
+ LCM,
+ LEQ,
+ MTD,
+ MIN,
+ MAP,
+ MTR,
+ MAX,
+ NEQ,
+ NOT,
+ NAV,
+ PRE,
+ RTC,
+ REM,
+ SUB,
+ SEC,
+ SVG,
+ SUM,
+ SIN,
+ SEP,
+ SUP,
+ SET,
+ TAN,
+ USE,
+ VAR,
+ WBR,
+ XMP,
+ XOR,
+ AREA,
+ ABBR,
+ BASE,
+ BVAR,
+ BODY,
+ CARD,
+ CODE,
+ CITE,
+ CSCH,
+ COSH,
+ COTH,
+ CURL,
+ DESC,
+ DIFF,
+ DEFS,
+ FORM,
+ FONT,
+ GRAD,
+ HEAD,
+ HTML,
+ LINE,
+ LINK,
+ LIST,
+ META,
+ MSUB,
+ MODE,
+ MATH,
+ MARK,
+ MASK,
+ MEAN,
+ MAIN,
+ MSUP,
+ MENU,
+ MROW,
+ NONE,
+ NOBR,
+ NEST,
+ PATH,
+ PLUS,
+ RULE,
+ REAL,
+ RELN,
+ RECT,
+ ROOT,
+ RUBY,
+ SECH,
+ SINH,
+ SPAN,
+ SAMP,
+ STOP,
+ SDEV,
+ TIME,
+ TRUE,
+ TREF,
+ TANH,
+ TEXT,
+ VIEW,
+ ASIDE,
+ AUDIO,
+ APPLY,
+ EMBED,
+ FRAME,
+ FALSE,
+ FLOOR,
+ GLYPH,
+ HKERN,
+ IMAGE,
+ IDENT,
+ INPUT,
+ LABEL,
+ LIMIT,
+ MFRAC,
+ MPATH,
+ METER,
+ MOVER,
+ MINUS,
+ MROOT,
+ MSQRT,
+ MTEXT,
+ NOTIN,
+ PIECE,
+ PARAM,
+ POWER,
+ REALS,
+ STYLE,
+ SMALL,
+ THEAD,
+ TABLE,
+ TITLE,
+ TRACK,
+ TSPAN,
+ TIMES,
+ TFOOT,
+ TBODY,
+ UNION,
+ VKERN,
+ VIDEO,
+ ARCSEC,
+ ARCCSC,
+ ARCTAN,
+ ARCSIN,
+ ARCCOS,
+ APPLET,
+ ARCCOT,
+ APPROX,
+ BUTTON,
+ CIRCLE,
+ CENTER,
+ CURSOR,
+ CANVAS,
+ DIVIDE,
+ DEGREE,
+ DOMAIN,
+ EXISTS,
+ FETILE,
+ FIGURE,
+ FORALL,
+ FILTER,
+ FOOTER,
+ HGROUP,
+ HEADER,
+ IFRAME,
+ KEYGEN,
+ LAMBDA,
+ LEGEND,
+ MSPACE,
+ MTABLE,
+ MSTYLE,
+ MGLYPH,
+ MEDIAN,
+ MUNDER,
+ MARKER,
+ MERROR,
+ MOMENT,
+ MATRIX,
+ OPTION,
+ OBJECT,
+ OUTPUT,
+ PRIMES,
+ SOURCE,
+ STRIKE,
+ STRONG,
+ SWITCH,
+ SYMBOL,
+ SELECT,
+ SUBSET,
+ SCRIPT,
+ TBREAK,
+ VECTOR,
+ ARTICLE,
+ ANIMATE,
+ ARCSECH,
+ ARCCSCH,
+ ARCTANH,
+ ARCSINH,
+ ARCCOSH,
+ ARCCOTH,
+ ACRONYM,
+ ADDRESS,
+ BGSOUND,
+ COMPOSE,
+ CEILING,
+ CSYMBOL,
+ CAPTION,
+ DISCARD,
+ DECLARE,
+ DETAILS,
+ ELLIPSE,
+ FEFUNCA,
+ FEFUNCB,
+ FEBLEND,
+ FEFLOOD,
+ FEIMAGE,
+ FEMERGE,
+ FEFUNCG,
+ FEFUNCR,
+ HANDLER,
+ INVERSE,
+ IMPLIES,
+ ISINDEX,
+ LOGBASE,
+ LISTING,
+ MFENCED,
+ MPADDED,
+ MARQUEE,
+ MACTION,
+ MSUBSUP,
+ NOEMBED,
+ POLYGON,
+ PATTERN,
+ PICTURE,
+ PRODUCT,
+ SETDIFF,
+ SECTION,
+ SUMMARY,
+ TENDSTO,
+ UPLIMIT,
+ ALTGLYPH,
+ BASEFONT,
+ CLIPPATH,
+ CODOMAIN,
+ COLGROUP,
+ EMPTYSET,
+ FACTOROF,
+ FIELDSET,
+ FRAMESET,
+ FEOFFSET,
+ GLYPHREF,
+ INTERVAL,
+ INTEGERS,
+ INFINITY,
+ LISTENER,
+ LOWLIMIT,
+ METADATA,
+ MENCLOSE,
+ MENUITEM,
+ MPHANTOM,
+ NOFRAMES,
+ NOSCRIPT,
+ OPTGROUP,
+ POLYLINE,
+ PREFETCH,
+ PROGRESS,
+ PRSUBSET,
+ QUOTIENT,
+ SELECTOR,
+ TEXTAREA,
+ TEMPLATE,
+ TEXTPATH,
+ VARIANCE,
+ ANIMATION,
+ CONJUGATE,
+ CONDITION,
+ COMPLEXES,
+ FONT_FACE,
+ FACTORIAL,
+ INTERSECT,
+ IMAGINARY,
+ LAPLACIAN,
+ MATRIXROW,
+ NOTSUBSET,
+ OTHERWISE,
+ PIECEWISE,
+ PLAINTEXT,
+ RATIONALS,
+ SEMANTICS,
+ TRANSPOSE,
+ ANNOTATION,
+ BLOCKQUOTE,
+ DIVERGENCE,
+ EULERGAMMA,
+ EQUIVALENT,
+ FIGCAPTION,
+ IMAGINARYI,
+ MALIGNMARK,
+ MUNDEROVER,
+ MLABELEDTR,
+ NOTANUMBER,
+ SOLIDCOLOR,
+ ALTGLYPHDEF,
+ DETERMINANT,
+ FEMERGENODE,
+ FECOMPOSITE,
+ FESPOTLIGHT,
+ MALIGNGROUP,
+ MPRESCRIPTS,
+ MOMENTABOUT,
+ NOTPRSUBSET,
+ PARTIALDIFF,
+ ALTGLYPHITEM,
+ ANIMATECOLOR,
+ DATATEMPLATE,
+ EXPONENTIALE,
+ FETURBULENCE,
+ FEPOINTLIGHT,
+ FEDROPSHADOW,
+ FEMORPHOLOGY,
+ OUTERPRODUCT,
+ ANIMATEMOTION,
+ COLOR_PROFILE,
+ FONT_FACE_SRC,
+ FONT_FACE_URI,
+ FOREIGNOBJECT,
+ FECOLORMATRIX,
+ MISSING_GLYPH,
+ MMULTISCRIPTS,
+ SCALARPRODUCT,
+ VECTORPRODUCT,
+ ANNOTATION_XML,
+ DEFINITION_SRC,
+ FONT_FACE_NAME,
+ FEGAUSSIANBLUR,
+ FEDISTANTLIGHT,
+ LINEARGRADIENT,
+ NATURALNUMBERS,
+ RADIALGRADIENT,
+ ANIMATETRANSFORM,
+ CARTESIANPRODUCT,
+ FONT_FACE_FORMAT,
+ FECONVOLVEMATRIX,
+ FEDIFFUSELIGHTING,
+ FEDISPLACEMENTMAP,
+ FESPECULARLIGHTING,
+ DOMAINOFAPPLICATION,
+ FECOMPONENTTRANSFER,
+ };
+ private final static int[] ELEMENT_HASHES = {
+ 1057,
+ 1090,
+ 1255,
+ 1321,
+ 1552,
+ 1585,
+ 1651,
+ 1717,
+ 68162,
+ 68899,
+ 69059,
+ 69764,
+ 70020,
+ 70276,
+ 71077,
+ 71205,
+ 72134,
+ 72232,
+ 72264,
+ 72296,
+ 72328,
+ 72360,
+ 72392,
+ 73351,
+ 74312,
+ 75209,
+ 78124,
+ 78284,
+ 78476,
+ 79149,
+ 79309,
+ 79341,
+ 79469,
+ 81295,
+ 81487,
+ 82224,
+ 84050,
+ 84498,
+ 84626,
+ 86164,
+ 86292,
+ 86612,
+ 86676,
+ 87445,
+ 3183041,
+ 3186241,
+ 3198017,
+ 3218722,
+ 3226754,
+ 3247715,
+ 3256803,
+ 3263971,
+ 3264995,
+ 3289252,
+ 3291332,
+ 3295524,
+ 3299620,
+ 3326725,
+ 3379303,
+ 3392679,
+ 3448233,
+ 3460553,
+ 3461577,
+ 3510347,
+ 3546604,
+ 3552364,
+ 3556524,
+ 3576461,
+ 3586349,
+ 3588141,
+ 3590797,
+ 3596333,
+ 3622062,
+ 3625454,
+ 3627054,
+ 3675728,
+ 3739282,
+ 3749042,
+ 3771059,
+ 3771571,
+ 3776211,
+ 3782323,
+ 3782963,
+ 3784883,
+ 3785395,
+ 3788979,
+ 3815476,
+ 3839605,
+ 3885110,
+ 3917911,
+ 3948984,
+ 3951096,
+ 135304769,
+ 135858241,
+ 136498210,
+ 136906434,
+ 137138658,
+ 137512995,
+ 137531875,
+ 137548067,
+ 137629283,
+ 137645539,
+ 137646563,
+ 137775779,
+ 138529956,
+ 138615076,
+ 139040932,
+ 140954086,
+ 141179366,
+ 141690439,
+ 142738600,
+ 143013512,
+ 146979116,
+ 147175724,
+ 147475756,
+ 147902637,
+ 147936877,
+ 148017645,
+ 148131885,
+ 148228141,
+ 148229165,
+ 148309165,
+ 148317229,
+ 148395629,
+ 148551853,
+ 148618829,
+ 149076462,
+ 149490158,
+ 149572782,
+ 151277616,
+ 151639440,
+ 153268914,
+ 153486514,
+ 153563314,
+ 153750706,
+ 153763314,
+ 153914034,
+ 154406067,
+ 154417459,
+ 154600979,
+ 154678323,
+ 154680979,
+ 154866835,
+ 155366708,
+ 155375188,
+ 155391572,
+ 155465780,
+ 155869364,
+ 158045494,
+ 168988979,
+ 169321621,
+ 169652752,
+ 173151309,
+ 174240818,
+ 174247297,
+ 174669292,
+ 175391532,
+ 176638123,
+ 177380397,
+ 177879204,
+ 177886734,
+ 180753473,
+ 181020073,
+ 181503558,
+ 181686320,
+ 181999237,
+ 181999311,
+ 182048201,
+ 182074866,
+ 182078003,
+ 182083764,
+ 182920847,
+ 184716457,
+ 184976961,
+ 185145071,
+ 187281445,
+ 187872052,
+ 188100653,
+ 188875944,
+ 188919873,
+ 188920457,
+ 189107250,
+ 189203987,
+ 189371817,
+ 189414886,
+ 189567458,
+ 190266670,
+ 191318187,
+ 191337609,
+ 202479203,
+ 202493027,
+ 202835587,
+ 202843747,
+ 203013219,
+ 203036048,
+ 203045987,
+ 203177552,
+ 203898516,
+ 204648562,
+ 205067918,
+ 205078130,
+ 205096654,
+ 205689142,
+ 205690439,
+ 205988909,
+ 207213161,
+ 207794484,
+ 207800999,
+ 208023602,
+ 208213644,
+ 208213647,
+ 210261490,
+ 210310273,
+ 210940978,
+ 213325049,
+ 213946445,
+ 214055079,
+ 215125040,
+ 215134273,
+ 215135028,
+ 215237420,
+ 215418148,
+ 215553166,
+ 215553394,
+ 215563858,
+ 215627949,
+ 215754324,
+ 217529652,
+ 217713834,
+ 217732628,
+ 218731945,
+ 221417045,
+ 221424946,
+ 221493746,
+ 221515401,
+ 221658189,
+ 221908140,
+ 221910626,
+ 221921586,
+ 222659762,
+ 225001091,
+ 236105833,
+ 236113965,
+ 236194995,
+ 236195427,
+ 236206132,
+ 236206387,
+ 236211683,
+ 236212707,
+ 236381647,
+ 236571826,
+ 237124271,
+ 238210544,
+ 238270764,
+ 238435405,
+ 238501172,
+ 239224867,
+ 239257644,
+ 239710497,
+ 240307721,
+ 241208789,
+ 241241557,
+ 241318060,
+ 241319404,
+ 241343533,
+ 241344069,
+ 241405397,
+ 241765845,
+ 243864964,
+ 244502085,
+ 244946220,
+ 245109902,
+ 247647266,
+ 247707956,
+ 248648814,
+ 248648836,
+ 248682161,
+ 248986932,
+ 249058914,
+ 249697357,
+ 252132601,
+ 252135604,
+ 251841204,
+ 252317348,
+ 255007012,
+ 255278388,
+ 255641645,
+ 256365156,
+ 257566121,
+ 269763372,
+ 271202790,
+ 271863856,
+ 272049197,
+ 272127474,
+ 274339449,
+ 274939471,
+ 275388004,
+ 275388005,
+ 275388006,
+ 275977800,
+ 278267602,
+ 278513831,
+ 278712622,
+ 281613765,
+ 281683369,
+ 282120228,
+ 282250732,
+ 282498697,
+ 282508942,
+ 283743649,
+ 283787570,
+ 284710386,
+ 285391148,
+ 285478533,
+ 285854898,
+ 285873762,
+ 286931113,
+ 288964227,
+ 289445441,
+ 289591340,
+ 289689648,
+ 291671489,
+ 303512884,
+ 305319975,
+ 305610036,
+ 305764101,
+ 308448294,
+ 308675890,
+ 312085683,
+ 312264750,
+ 315032867,
+ 316391000,
+ 317331042,
+ 317902135,
+ 318950711,
+ 319447220,
+ 321499182,
+ 322538804,
+ 323145200,
+ 337067316,
+ 337826293,
+ 339905989,
+ 340833697,
+ 341457068,
+ 342310196,
+ 345302593,
+ 349554733,
+ 349771471,
+ 349786245,
+ 350819405,
+ 356072847,
+ 370349192,
+ 373962798,
+ 375558638,
+ 375574835,
+ 376053993,
+ 383276530,
+ 383373833,
+ 383407586,
+ 384439906,
+ 386079012,
+ 404133513,
+ 404307343,
+ 407031852,
+ 408072233,
+ 409112005,
+ 409608425,
+ 409713793,
+ 409771500,
+ 419040932,
+ 437730612,
+ 439529766,
+ 442616365,
+ 442813037,
+ 443157674,
+ 443295316,
+ 450118444,
+ 450482697,
+ 456789668,
+ 459935396,
+ 471217869,
+ 474073645,
+ 476230702,
+ 476665218,
+ 476717289,
+ 483014825,
+ 485083298,
+ 489306281,
+ 538364390,
+ 540675748,
+ 543819186,
+ 543958612,
+ 576960820,
+ 577242548,
+ 610515252,
+ 642202932,
+ 644420819,
+ };
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java
new file mode 100644
index 000000000..f1749e0b3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java
@@ -0,0 +1,772 @@
+/*
+ * Copyright (c) 2009-2013 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+import java.util.HashMap;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class ErrorReportingTokenizer extends Tokenizer {
+
+ /**
+ * Magic value for UTF-16 operations.
+ */
+ private static final int SURROGATE_OFFSET = (0x10000 - (0xD800 << 10) - 0xDC00);
+
+ /**
+ * The policy for non-space non-XML characters.
+ */
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ /**
+ * Keeps track of PUA warnings.
+ */
+ private boolean alreadyWarnedAboutPrivateUseCharacters;
+
+ /**
+ * The current line number in the current resource being parsed. (First line
+ * is 1.) Passed on as locator data.
+ */
+ private int line;
+
+ private int linePrev;
+
+ /**
+ * The current column number in the current resource being tokenized. (First
+ * column is 1, counted by UTF-16 code units.) Passed on as locator data.
+ */
+ private int col;
+
+ private int colPrev;
+
+ private boolean nextCharOnNewLine;
+
+ private char prev;
+
+ private HashMap<String, String> errorProfileMap = null;
+
+ private TransitionHandler transitionHandler = null;
+
+ private int transitionBaseOffset = 0;
+
+ /**
+ * @param tokenHandler
+ * @param newAttributesEachTime
+ */
+ public ErrorReportingTokenizer(TokenHandler tokenHandler,
+ boolean newAttributesEachTime) {
+ super(tokenHandler, newAttributesEachTime);
+ }
+
+ /**
+ * @param tokenHandler
+ */
+ public ErrorReportingTokenizer(TokenHandler tokenHandler) {
+ super(tokenHandler);
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ if (line > 0) {
+ return line;
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ if (col > 0) {
+ return col;
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * Sets the contentNonXmlCharPolicy.
+ *
+ * @param contentNonXmlCharPolicy
+ * the contentNonXmlCharPolicy to set
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Sets the errorProfile.
+ *
+ * @param errorProfile
+ */
+ public void setErrorProfile(HashMap<String, String> errorProfileMap) {
+ this.errorProfileMap = errorProfileMap;
+ }
+
+ /**
+ * Reports on an event based on profile selected.
+ *
+ * @param profile
+ * the profile this message belongs to
+ * @param message
+ * the message itself
+ * @throws SAXException
+ */
+ public void note(String profile, String message) throws SAXException {
+ if (errorProfileMap == null)
+ return;
+ String level = errorProfileMap.get(profile);
+ if ("warn".equals(level)) {
+ warn(message);
+ } else if ("err".equals(level)) {
+ err(message);
+ // } else if ("info".equals(level)) {
+ // info(message);
+ }
+ }
+
+ protected void startErrorReporting() throws SAXException {
+ line = linePrev = 0;
+ col = colPrev = 1;
+ nextCharOnNewLine = true;
+ prev = '\u0000';
+ alreadyWarnedAboutPrivateUseCharacters = false;
+ transitionBaseOffset = 0;
+ }
+
+ @Inline protected void silentCarriageReturn() {
+ nextCharOnNewLine = true;
+ lastCR = true;
+ }
+
+ @Inline protected void silentLineFeed() {
+ nextCharOnNewLine = true;
+ }
+
+ /**
+ * Returns the line.
+ *
+ * @return the line
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Returns the col.
+ *
+ * @return the col
+ */
+ public int getCol() {
+ return col;
+ }
+
+ /**
+ * Returns the nextCharOnNewLine.
+ *
+ * @return the nextCharOnNewLine
+ */
+ public boolean isNextCharOnNewLine() {
+ return nextCharOnNewLine;
+ }
+
+ /**
+ * Flushes coalesced character tokens.
+ *
+ * @param buf
+ * TODO
+ * @param pos
+ * TODO
+ *
+ * @throws SAXException
+ */
+ @Override protected void flushChars(char[] buf, int pos)
+ throws SAXException {
+ if (pos > cstart) {
+ int currLine = line;
+ int currCol = col;
+ line = linePrev;
+ col = colPrev;
+ tokenHandler.characters(buf, cstart, pos - cstart);
+ line = currLine;
+ col = currCol;
+ }
+ cstart = 0x7fffffff;
+ }
+
+ @Override protected char checkChar(@NoLength char[] buf, int pos)
+ throws SAXException {
+ linePrev = line;
+ colPrev = col;
+ if (nextCharOnNewLine) {
+ line++;
+ col = 1;
+ nextCharOnNewLine = false;
+ } else {
+ col++;
+ }
+
+ char c = buf[pos];
+ switch (c) {
+ case '\u0000':
+ err("Saw U+0000 in stream.");
+ case '\t':
+ case '\r':
+ case '\n':
+ break;
+ case '\u000C':
+ if (contentNonXmlCharPolicy == XmlViolationPolicy.FATAL) {
+ fatal("This document is not mappable to XML 1.0 without data loss due to "
+ + toUPlusString(c)
+ + " which is not a legal XML 1.0 character.");
+ } else {
+ if (contentNonXmlCharPolicy == XmlViolationPolicy.ALTER_INFOSET) {
+ c = buf[pos] = ' ';
+ }
+ warn("This document is not mappable to XML 1.0 without data loss due to "
+ + toUPlusString(c)
+ + " which is not a legal XML 1.0 character.");
+ }
+ break;
+ default:
+ if ((c & 0xFC00) == 0xDC00) {
+ // Got a low surrogate. See if prev was high
+ // surrogate
+ if ((prev & 0xFC00) == 0xD800) {
+ int intVal = (prev << 10) + c + SURROGATE_OFFSET;
+ if ((intVal & 0xFFFE) == 0xFFFE) {
+ err("Astral non-character.");
+ }
+ if (isAstralPrivateUse(intVal)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+ } else if ((c < ' ' || ((c & 0xFFFE) == 0xFFFE))) {
+ switch (contentNonXmlCharPolicy) {
+ case FATAL:
+ fatal("Forbidden code point " + toUPlusString(c)
+ + ".");
+ break;
+ case ALTER_INFOSET:
+ c = buf[pos] = '\uFFFD';
+ // fall through
+ case ALLOW:
+ err("Forbidden code point " + toUPlusString(c)
+ + ".");
+ }
+ } else if ((c >= '\u007F') && (c <= '\u009F')
+ || (c >= '\uFDD0') && (c <= '\uFDEF')) {
+ err("Forbidden code point " + toUPlusString(c) + ".");
+ } else if (isPrivateUse(c)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+ prev = c;
+ return c;
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.htmlparser.impl.Tokenizer#transition(int, int, boolean,
+ * int)
+ */
+ @Override protected int transition(int from, int to, boolean reconsume,
+ int pos) throws SAXException {
+ if (transitionHandler != null) {
+ transitionHandler.transition(from, to, reconsume,
+ transitionBaseOffset + pos);
+ }
+ return to;
+ }
+
+ private String toUPlusString(int c) {
+ String hexString = Integer.toHexString(c);
+ switch (hexString.length()) {
+ case 1:
+ return "U+000" + hexString;
+ case 2:
+ return "U+00" + hexString;
+ case 3:
+ return "U+0" + hexString;
+ default:
+ return "U+" + hexString;
+ }
+ }
+
+ /**
+ * Emits a warning about private use characters if the warning has not been
+ * emitted yet.
+ *
+ * @throws SAXException
+ */
+ private void warnAboutPrivateUseChar() throws SAXException {
+ if (!alreadyWarnedAboutPrivateUseCharacters) {
+ warn("Document uses the Unicode Private Use Area(s), which should not be used in publicly exchanged documents. (Charmod C073)");
+ alreadyWarnedAboutPrivateUseCharacters = true;
+ }
+ }
+
+ /**
+ * Tells if the argument is a BMP PUA character.
+ *
+ * @param c
+ * the UTF-16 code unit to check
+ * @return <code>true</code> if PUA character
+ */
+ private boolean isPrivateUse(char c) {
+ return c >= '\uE000' && c <= '\uF8FF';
+ }
+
+ /**
+ * Tells if the argument is an astral PUA character.
+ *
+ * @param c
+ * the code point to check
+ * @return <code>true</code> if astral private use
+ */
+ private boolean isAstralPrivateUse(int c) {
+ return (c >= 0xF0000 && c <= 0xFFFFD)
+ || (c >= 0x100000 && c <= 0x10FFFD);
+ }
+
+ @Override protected void errGarbageAfterLtSlash() throws SAXException {
+ err("Garbage after \u201C</\u201D.");
+ }
+
+ @Override protected void errLtSlashGt() throws SAXException {
+ err("Saw \u201C</>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C&lt;\u201D) or mistyped end tag.");
+ }
+
+ @Override protected void errWarnLtSlashInRcdata() throws SAXException {
+ if (html4) {
+ err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
+ + " element \u201C"
+ + endTagExpectation
+ + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)");
+ } else {
+ warn((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
+ + " element \u201C"
+ + endTagExpectation
+ + "\u201D contained the string \u201C</\u201D, but this did not close the element.");
+ }
+ }
+
+ @Override protected void errHtml4LtSlashInRcdata(char folded)
+ throws SAXException {
+ if (html4 && (index > 0 || (folded >= 'a' && folded <= 'z'))
+ && ElementName.IFRAME != endTagExpectation) {
+ err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
+ + " element \u201C"
+ + endTagExpectation.name
+ + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)");
+ }
+ }
+
+ @Override protected void errCharRefLacksSemicolon() throws SAXException {
+ err("Character reference was not terminated by a semicolon.");
+ }
+
+ @Override protected void errNoDigitsInNCR() throws SAXException {
+ err("No digits after \u201C" + strBufToString() + "\u201D.");
+ }
+
+ @Override protected void errGtInSystemId() throws SAXException {
+ err("\u201C>\u201D in system identifier.");
+ }
+
+ @Override protected void errGtInPublicId() throws SAXException {
+ err("\u201C>\u201D in public identifier.");
+ }
+
+ @Override protected void errNamelessDoctype() throws SAXException {
+ err("Nameless doctype.");
+ }
+
+ @Override protected void errConsecutiveHyphens() throws SAXException {
+ err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is.");
+ }
+
+ @Override protected void errPrematureEndOfComment() throws SAXException {
+ err("Premature end of comment. Use \u201C-->\u201D to end a comment properly.");
+ }
+
+ @Override protected void errBogusComment() throws SAXException {
+ err("Bogus comment.");
+ }
+
+ @Override protected void errUnquotedAttributeValOrNull(char c)
+ throws SAXException {
+ switch (c) {
+ case '<':
+ err("\u201C<\u201D in an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before.");
+ return;
+ case '`':
+ err("\u201C`\u201D in an unquoted attribute value. Probable cause: Using the wrong character as a quote.");
+ return;
+ case '\uFFFD':
+ return;
+ default:
+ err("\u201C"
+ + c
+ + "\u201D in an unquoted attribute value. Probable causes: Attributes running together or a URL query string in an unquoted attribute value.");
+ return;
+ }
+ }
+
+ @Override protected void errSlashNotFollowedByGt() throws SAXException {
+ err("A slash was not immediately followed by \u201C>\u201D.");
+ }
+
+ @Override protected void errHtml4XmlVoidSyntax() throws SAXException {
+ if (html4) {
+ err("The \u201C/>\u201D syntax on void elements is not allowed. (This is an HTML4-only error.)");
+ }
+ }
+
+ @Override protected void errNoSpaceBetweenAttributes() throws SAXException {
+ err("No space between attributes.");
+ }
+
+ @Override protected void errHtml4NonNameInUnquotedAttribute(char c)
+ throws SAXException {
+ if (html4
+ && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9') || c == '.' || c == '-'
+ || c == '_' || c == ':')) {
+ err("Non-name character in an unquoted attribute value. (This is an HTML4-only error.)");
+ }
+ }
+
+ @Override protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(
+ char c) throws SAXException {
+ switch (c) {
+ case '=':
+ err("\u201C=\u201D at the start of an unquoted attribute value. Probable cause: Stray duplicate equals sign.");
+ return;
+ case '<':
+ err("\u201C<\u201D at the start of an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before.");
+ return;
+ case '`':
+ err("\u201C`\u201D at the start of an unquoted attribute value. Probable cause: Using the wrong character as a quote.");
+ return;
+ }
+ }
+
+ @Override protected void errAttributeValueMissing() throws SAXException {
+ err("Attribute value missing.");
+ }
+
+ @Override protected void errBadCharBeforeAttributeNameOrNull(char c)
+ throws SAXException {
+ if (c == '<') {
+ err("Saw \u201C<\u201D when expecting an attribute name. Probable cause: Missing \u201C>\u201D immediately before.");
+ } else if (c == '=') {
+ errEqualsSignBeforeAttributeName();
+ } else if (c != '\uFFFD') {
+ errQuoteBeforeAttributeName(c);
+ }
+ }
+
+ @Override protected void errEqualsSignBeforeAttributeName()
+ throws SAXException {
+ err("Saw \u201C=\u201D when expecting an attribute name. Probable cause: Attribute name missing.");
+ }
+
+ @Override protected void errBadCharAfterLt(char c) throws SAXException {
+ err("Bad character \u201C"
+ + c
+ + "\u201D after \u201C<\u201D. Probable cause: Unescaped \u201C<\u201D. Try escaping it as \u201C&lt;\u201D.");
+ }
+
+ @Override protected void errLtGt() throws SAXException {
+ err("Saw \u201C<>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C&lt;\u201D) or mistyped start tag.");
+ }
+
+ @Override protected void errProcessingInstruction() throws SAXException {
+ err("Saw \u201C<?\u201D. Probable cause: Attempt to use an XML processing instruction in HTML. (XML processing instructions are not supported in HTML.)");
+ }
+
+ @Override protected void errUnescapedAmpersandInterpretedAsCharacterReference()
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(
+ "The string following \u201C&\u201D was interpreted as a character reference. (\u201C&\u201D probably should have been escaped as \u201C&amp;\u201D.)",
+ ampersandLocation);
+ errorHandler.error(spe);
+ }
+
+ @Override protected void errNotSemicolonTerminated() throws SAXException {
+ err("Named character reference was not terminated by a semicolon. (Or \u201C&\u201D should have been escaped as \u201C&amp;\u201D.)");
+ }
+
+ @Override protected void errNoNamedCharacterMatch() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(
+ "\u201C&\u201D did not start a character reference. (\u201C&\u201D probably should have been escaped as \u201C&amp;\u201D.)",
+ ampersandLocation);
+ errorHandler.error(spe);
+ }
+
+ @Override protected void errQuoteBeforeAttributeName(char c)
+ throws SAXException {
+ err("Saw \u201C"
+ + c
+ + "\u201D when expecting an attribute name. Probable cause: \u201C=\u201D missing immediately before.");
+ }
+
+ @Override protected void errQuoteOrLtInAttributeNameOrNull(char c)
+ throws SAXException {
+ if (c == '<') {
+ err("\u201C<\u201D in attribute name. Probable cause: \u201C>\u201D missing immediately before.");
+ } else if (c != '\uFFFD') {
+ err("Quote \u201C"
+ + c
+ + "\u201D in attribute name. Probable cause: Matching quote missing somewhere earlier.");
+ }
+ }
+
+ @Override protected void errExpectedPublicId() throws SAXException {
+ err("Expected a public identifier but the doctype ended.");
+ }
+
+ @Override protected void errBogusDoctype() throws SAXException {
+ err("Bogus doctype.");
+ }
+
+ @Override protected void maybeWarnPrivateUseAstral() throws SAXException {
+ if (errorHandler != null && isAstralPrivateUse(value)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+
+ @Override protected void maybeWarnPrivateUse(char ch) throws SAXException {
+ if (errorHandler != null && isPrivateUse(ch)) {
+ warnAboutPrivateUseChar();
+ }
+ }
+
+ @Override protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
+ throws SAXException {
+ if (attrs.getLength() != 0) {
+ /*
+ * When an end tag token is emitted with attributes, that is a parse
+ * error.
+ */
+ err("End tag had attributes.");
+ }
+ }
+
+ @Override protected void maybeErrSlashInEndTag(boolean selfClosing)
+ throws SAXException {
+ if (selfClosing && endTag) {
+ err("Stray \u201C/\u201D at the end of an end tag.");
+ }
+ }
+
+ @Override protected char errNcrNonCharacter(char ch) throws SAXException {
+ switch (contentNonXmlCharPolicy) {
+ case FATAL:
+ fatal("Character reference expands to a non-character ("
+ + toUPlusString((char) value) + ").");
+ break;
+ case ALTER_INFOSET:
+ ch = '\uFFFD';
+ // fall through
+ case ALLOW:
+ err("Character reference expands to a non-character ("
+ + toUPlusString((char) value) + ").");
+ }
+ return ch;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.Tokenizer#errAstralNonCharacter(int)
+ */
+ @Override protected void errAstralNonCharacter(int ch) throws SAXException {
+ err("Character reference expands to an astral non-character ("
+ + toUPlusString(value) + ").");
+ }
+
+ @Override protected void errNcrSurrogate() throws SAXException {
+ err("Character reference expands to a surrogate.");
+ }
+
+ @Override protected char errNcrControlChar(char ch) throws SAXException {
+ switch (contentNonXmlCharPolicy) {
+ case FATAL:
+ fatal("Character reference expands to a control character ("
+ + toUPlusString((char) value) + ").");
+ break;
+ case ALTER_INFOSET:
+ ch = '\uFFFD';
+ // fall through
+ case ALLOW:
+ err("Character reference expands to a control character ("
+ + toUPlusString((char) value) + ").");
+ }
+ return ch;
+ }
+
+ @Override protected void errNcrCr() throws SAXException {
+ err("A numeric character reference expanded to carriage return.");
+ }
+
+ @Override protected void errNcrInC1Range() throws SAXException {
+ err("A numeric character reference expanded to the C1 controls range.");
+ }
+
+ @Override protected void errEofInPublicId() throws SAXException {
+ err("End of file inside public identifier.");
+ }
+
+ @Override protected void errEofInComment() throws SAXException {
+ err("End of file inside comment.");
+ }
+
+ @Override protected void errEofInDoctype() throws SAXException {
+ err("End of file inside doctype.");
+ }
+
+ @Override protected void errEofInAttributeValue() throws SAXException {
+ err("End of file reached when inside an attribute value. Ignoring tag.");
+ }
+
+ @Override protected void errEofInAttributeName() throws SAXException {
+ err("End of file occurred in an attribute name. Ignoring tag.");
+ }
+
+ @Override protected void errEofWithoutGt() throws SAXException {
+ err("Saw end of file without the previous tag ending with \u201C>\u201D. Ignoring tag.");
+ }
+
+ @Override protected void errEofInTagName() throws SAXException {
+ err("End of file seen when looking for tag name. Ignoring tag.");
+ }
+
+ @Override protected void errEofInEndTag() throws SAXException {
+ err("End of file inside end tag. Ignoring tag.");
+ }
+
+ @Override protected void errEofAfterLt() throws SAXException {
+ err("End of file after \u201C<\u201D.");
+ }
+
+ @Override protected void errNcrOutOfRange() throws SAXException {
+ err("Character reference outside the permissible Unicode range.");
+ }
+
+ @Override protected void errNcrUnassigned() throws SAXException {
+ err("Character reference expands to a permanently unassigned code point.");
+ }
+
+ @Override protected void errDuplicateAttribute() throws SAXException {
+ err("Duplicate attribute \u201C"
+ + attributeName.getLocal(AttributeName.HTML) + "\u201D.");
+ }
+
+ @Override protected void errEofInSystemId() throws SAXException {
+ err("End of file inside system identifier.");
+ }
+
+ @Override protected void errExpectedSystemId() throws SAXException {
+ err("Expected a system identifier but the doctype ended.");
+ }
+
+ @Override protected void errMissingSpaceBeforeDoctypeName()
+ throws SAXException {
+ err("Missing space before doctype name.");
+ }
+
+ @Override protected void errHyphenHyphenBang() throws SAXException {
+ err("\u201C--!\u201D found in comment.");
+ }
+
+ @Override protected void errNcrControlChar() throws SAXException {
+ err("Character reference expands to a control character ("
+ + toUPlusString((char) value) + ").");
+ }
+
+ @Override protected void errNcrZero() throws SAXException {
+ err("Character reference expands to zero.");
+ }
+
+ @Override protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
+ throws SAXException {
+ err("No space between the doctype \u201CSYSTEM\u201D keyword and the quote.");
+ }
+
+ @Override protected void errNoSpaceBetweenPublicAndSystemIds()
+ throws SAXException {
+ err("No space between the doctype public and system identifiers.");
+ }
+
+ @Override protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
+ throws SAXException {
+ err("No space between the doctype \u201CPUBLIC\u201D keyword and the quote.");
+ }
+
+ @Override protected void noteAttributeWithoutValue() throws SAXException {
+ note("xhtml2", "Attribute without value");
+ }
+
+ @Override protected void noteUnquotedAttributeValue() throws SAXException {
+ note("xhtml1", "Unquoted attribute value.");
+ }
+
+ /**
+ * Sets the transitionHandler.
+ *
+ * @param transitionHandler
+ * the transitionHandler to set
+ */
+ public void setTransitionHandler(TransitionHandler transitionHandler) {
+ this.transitionHandler = transitionHandler;
+ }
+
+ /**
+ * Sets an offset to be added to the position reported to
+ * <code>TransitionHandler</code>.
+ *
+ * @param offset
+ * the offset
+ */
+ public void setTransitionBaseOffset(int offset) {
+ this.transitionBaseOffset = offset;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt
new file mode 100644
index 000000000..c389a8cac
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * compressed returnValue:
+ * int returnState = returnValue >> 33
+ * boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0)
+ * int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue
+ */
+ @SuppressWarnings("unused") private long workAroundHotSpotHugeMethodLimit(
+ int state, char c, int pos, @NoLength char[] buf,
+ boolean reconsume, int returnState, int endPos) throws SAXException {
+ stateloop: for (;;) {
+ switch (state) {
+ // BEGIN HOTSPOT WORKAROUND
+ default:
+ long returnStateAndPos = workAroundHotSpotHugeMethodLimit(
+ state, c, pos, buf, reconsume, returnState, endPos);
+ pos = (int)returnStateAndPos; // 5.1.3 in the Java spec
+ returnState = (int)(returnStateAndPos >> 33);
+ state = stateSave;
+ if ( (pos == endPos) || ( (((int)(returnStateAndPos >> 32)) & 0x1) != 0) ) {
+ break stateloop;
+ }
+ continue stateloop;
+ // END HOTSPOT WORKAROUND
+ default:
+ assert !reconsume : "Must not reconsume when returning from HotSpot workaround.";
+ stateSave = state;
+ return (((long)returnState) << 33) | pos;
+ }
+ }
+ assert !reconsume : "Must not reconsume when returning from HotSpot workaround.";
+ stateSave = state;
+ return (((long)returnState) << 33) | (1L << 32) | pos ;
+ }
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java
new file mode 100644
index 000000000..0ec25f96f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java
@@ -0,0 +1,618 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.IdType;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NsUri;
+import nu.validator.htmlparser.annotation.Prefix;
+import nu.validator.htmlparser.annotation.QName;
+import nu.validator.htmlparser.common.Interner;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * Be careful with this class. QName is the name in from HTML tokenization.
+ * Otherwise, please refer to the interface doc.
+ *
+ * @version $Id: AttributesImpl.java 206 2008-03-20 14:09:29Z hsivonen $
+ * @author hsivonen
+ */
+public final class HtmlAttributes implements Attributes {
+
+ // [NOCPP[
+
+ private static final AttributeName[] EMPTY_ATTRIBUTENAMES = new AttributeName[0];
+
+ private static final String[] EMPTY_STRINGS = new String[0];
+
+ // ]NOCPP]
+
+ public static final HtmlAttributes EMPTY_ATTRIBUTES = new HtmlAttributes(
+ AttributeName.HTML);
+
+ private int mode;
+
+ private int length;
+
+ private @Auto AttributeName[] names;
+
+ private @Auto String[] values; // XXX perhaps make this @NoLength?
+
+ // CPPONLY: private @Auto int[] lines; // XXX perhaps make this @NoLength?
+
+ // [NOCPP[
+
+ private String idValue;
+
+ private int xmlnsLength;
+
+ private AttributeName[] xmlnsNames;
+
+ private String[] xmlnsValues;
+
+ // ]NOCPP]
+
+ public HtmlAttributes(int mode) {
+ this.mode = mode;
+ this.length = 0;
+ /*
+ * The length of 5 covers covers 98.3% of elements
+ * according to Hixie, but lets round to the next power of two for
+ * jemalloc.
+ */
+ this.names = new AttributeName[8];
+ this.values = new String[8];
+ // CPPONLY: this.lines = new int[8];
+
+ // [NOCPP[
+
+ this.idValue = null;
+
+ this.xmlnsLength = 0;
+
+ this.xmlnsNames = HtmlAttributes.EMPTY_ATTRIBUTENAMES;
+
+ this.xmlnsValues = HtmlAttributes.EMPTY_STRINGS;
+
+ // ]NOCPP]
+ }
+ /*
+ public HtmlAttributes(HtmlAttributes other) {
+ this.mode = other.mode;
+ this.length = other.length;
+ this.names = new AttributeName[other.length];
+ this.values = new String[other.length];
+ // [NOCPP[
+ this.idValue = other.idValue;
+ this.xmlnsLength = other.xmlnsLength;
+ this.xmlnsNames = new AttributeName[other.xmlnsLength];
+ this.xmlnsValues = new String[other.xmlnsLength];
+ // ]NOCPP]
+ }
+ */
+
+ void destructor() {
+ clear(0);
+ }
+
+ /**
+ * Only use with a static argument
+ *
+ * @param name
+ * @return
+ */
+ public int getIndex(AttributeName name) {
+ for (int i = 0; i < length; i++) {
+ if (names[i] == name) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Only use with static argument.
+ *
+ * @see org.xml.sax.Attributes#getValue(java.lang.String)
+ */
+ public String getValue(AttributeName name) {
+ int index = getIndex(name);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValueNoBoundsCheck(index);
+ }
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * Variant of <code>getLocalName(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the local name at index
+ */
+ public @Local String getLocalNameNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index].getLocal(mode);
+ }
+
+ /**
+ * Variant of <code>getURI(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the namespace URI at index
+ */
+ public @NsUri String getURINoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index].getUri(mode);
+ }
+
+ /**
+ * Variant of <code>getPrefix(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the namespace prefix at index
+ */
+ public @Prefix String getPrefixNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index].getPrefix(mode);
+ }
+
+ /**
+ * Variant of <code>getValue(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the attribute value at index
+ */
+ public String getValueNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return values[index];
+ }
+
+ /**
+ * Variant of <code>getAttributeName(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the attribute name at index
+ */
+ public AttributeName getAttributeNameNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ return names[index];
+ }
+
+ // CPPONLY: /**
+ // CPPONLY: * Obtains a line number without bounds check.
+ // CPPONLY: * @param index a valid attribute index
+ // CPPONLY: * @return the line number at index or -1 if unknown
+ // CPPONLY: */
+ // CPPONLY: public int getLineNoBoundsCheck(int index) {
+ // CPPONLY: assert index < length && index >= 0: "Index out of bounds";
+ // CPPONLY: return lines[index];
+ // CPPONLY: }
+
+ // [NOCPP[
+
+ /**
+ * Variant of <code>getQName(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the QName at index
+ */
+ public @QName String getQNameNoBoundsCheck(int index) {
+ return names[index].getQName(mode);
+ }
+
+ /**
+ * Variant of <code>getType(int index)</code> without bounds check.
+ * @param index a valid attribute index
+ * @return the attribute type at index
+ */
+ public @IdType String getTypeNoBoundsCheck(int index) {
+ return (names[index] == AttributeName.ID) ? "ID" : "CDATA";
+ }
+
+ public int getIndex(String qName) {
+ for (int i = 0; i < length; i++) {
+ if (names[i].getQName(mode).equals(qName)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public int getIndex(String uri, String localName) {
+ for (int i = 0; i < length; i++) {
+ if (names[i].getLocal(mode).equals(localName)
+ && names[i].getUri(mode).equals(uri)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public @IdType String getType(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public @IdType String getType(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getType(index);
+ }
+ }
+
+ public String getValue(String qName) {
+ int index = getIndex(qName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ public String getValue(String uri, String localName) {
+ int index = getIndex(uri, localName);
+ if (index == -1) {
+ return null;
+ } else {
+ return getValue(index);
+ }
+ }
+
+ public @Local String getLocalName(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getLocal(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @QName String getQName(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getQName(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @IdType String getType(int index) {
+ if (index < length && index >= 0) {
+ return (names[index] == AttributeName.ID) ? "ID" : "CDATA";
+ } else {
+ return null;
+ }
+ }
+
+ public AttributeName getAttributeName(int index) {
+ if (index < length && index >= 0) {
+ return names[index];
+ } else {
+ return null;
+ }
+ }
+
+ public @NsUri String getURI(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getUri(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @Prefix String getPrefix(int index) {
+ if (index < length && index >= 0) {
+ return names[index].getPrefix(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public String getValue(int index) {
+ if (index < length && index >= 0) {
+ return values[index];
+ } else {
+ return null;
+ }
+ }
+
+ public String getId() {
+ return idValue;
+ }
+
+ public int getXmlnsLength() {
+ return xmlnsLength;
+ }
+
+ public @Local String getXmlnsLocalName(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsNames[index].getLocal(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public @NsUri String getXmlnsURI(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsNames[index].getUri(mode);
+ } else {
+ return null;
+ }
+ }
+
+ public String getXmlnsValue(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsValues[index];
+ } else {
+ return null;
+ }
+ }
+
+ public int getXmlnsIndex(AttributeName name) {
+ for (int i = 0; i < xmlnsLength; i++) {
+ if (xmlnsNames[i] == name) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public String getXmlnsValue(AttributeName name) {
+ int index = getXmlnsIndex(name);
+ if (index == -1) {
+ return null;
+ } else {
+ return getXmlnsValue(index);
+ }
+ }
+
+ public AttributeName getXmlnsAttributeName(int index) {
+ if (index < xmlnsLength && index >= 0) {
+ return xmlnsNames[index];
+ } else {
+ return null;
+ }
+ }
+
+ // ]NOCPP]
+
+ void addAttribute(AttributeName name, String value
+ // [NOCPP[
+ , XmlViolationPolicy xmlnsPolicy
+ // ]NOCPP]
+ // CPPONLY: , int line
+ ) throws SAXException {
+ // [NOCPP[
+ if (name == AttributeName.ID) {
+ idValue = value;
+ }
+
+ if (name.isXmlns()) {
+ if (xmlnsNames.length == xmlnsLength) {
+ int newLen = xmlnsLength == 0 ? 2 : xmlnsLength << 1;
+ AttributeName[] newNames = new AttributeName[newLen];
+ System.arraycopy(xmlnsNames, 0, newNames, 0, xmlnsNames.length);
+ xmlnsNames = newNames;
+ String[] newValues = new String[newLen];
+ System.arraycopy(xmlnsValues, 0, newValues, 0, xmlnsValues.length);
+ xmlnsValues = newValues;
+ }
+ xmlnsNames[xmlnsLength] = name;
+ xmlnsValues[xmlnsLength] = value;
+ xmlnsLength++;
+ switch (xmlnsPolicy) {
+ case FATAL:
+ // this is ugly
+ throw new SAXException("Saw an xmlns attribute.");
+ case ALTER_INFOSET:
+ return;
+ case ALLOW:
+ // fall through
+ }
+ }
+
+ // ]NOCPP]
+
+ if (names.length == length) {
+ int newLen = length << 1; // The first growth covers virtually
+ // 100% of elements according to
+ // Hixie
+ AttributeName[] newNames = new AttributeName[newLen];
+ System.arraycopy(names, 0, newNames, 0, names.length);
+ names = newNames;
+ String[] newValues = new String[newLen];
+ System.arraycopy(values, 0, newValues, 0, values.length);
+ values = newValues;
+ // CPPONLY: int[] newLines = new int[newLen];
+ // CPPONLY: System.arraycopy(lines, 0, newLines, 0, lines.length);
+ // CPPONLY: lines = newLines;
+ }
+ names[length] = name;
+ values[length] = value;
+ // CPPONLY: lines[length] = line;
+ length++;
+ }
+
+ void clear(int m) {
+ for (int i = 0; i < length; i++) {
+ names[i].release();
+ names[i] = null;
+ Portability.releaseString(values[i]);
+ values[i] = null;
+ }
+ length = 0;
+ mode = m;
+ // [NOCPP[
+ idValue = null;
+ for (int i = 0; i < xmlnsLength; i++) {
+ xmlnsNames[i] = null;
+ xmlnsValues[i] = null;
+ }
+ xmlnsLength = 0;
+ // ]NOCPP]
+ }
+
+ /**
+ * This is used in C++ to release special <code>isindex</code>
+ * attribute values whose ownership is not transferred.
+ */
+ void releaseValue(int i) {
+ Portability.releaseString(values[i]);
+ }
+
+ /**
+ * This is only used for <code>AttributeName</code> ownership transfer
+ * in the isindex case to avoid freeing custom names twice in C++.
+ */
+ void clearWithoutReleasingContents() {
+ for (int i = 0; i < length; i++) {
+ names[i] = null;
+ values[i] = null;
+ }
+ length = 0;
+ }
+
+ boolean contains(AttributeName name) {
+ for (int i = 0; i < length; i++) {
+ if (name.equalsAnother(names[i])) {
+ return true;
+ }
+ }
+ // [NOCPP[
+ for (int i = 0; i < xmlnsLength; i++) {
+ if (name.equalsAnother(xmlnsNames[i])) {
+ return true;
+ }
+ }
+ // ]NOCPP]
+ return false;
+ }
+
+ public void adjustForMath() {
+ mode = AttributeName.MATHML;
+ }
+
+ public void adjustForSvg() {
+ mode = AttributeName.SVG;
+ }
+
+ public HtmlAttributes cloneAttributes(Interner interner)
+ throws SAXException {
+ assert (length == 0
+ // [NOCPP[
+ && xmlnsLength == 0
+ // ]NOCPP]
+ )
+ || mode == 0 || mode == 3;
+ HtmlAttributes clone = new HtmlAttributes(0);
+ for (int i = 0; i < length; i++) {
+ clone.addAttribute(names[i].cloneAttributeName(interner),
+ Portability.newStringFromString(values[i])
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , lines[i]
+ );
+ }
+ // [NOCPP[
+ for (int i = 0; i < xmlnsLength; i++) {
+ clone.addAttribute(xmlnsNames[i], xmlnsValues[i],
+ XmlViolationPolicy.ALLOW);
+ }
+ // ]NOCPP]
+ return clone; // XXX!!!
+ }
+
+ public boolean equalsAnother(HtmlAttributes other) {
+ assert mode == 0 || mode == 3 : "Trying to compare attributes in foreign content.";
+ int otherLength = other.getLength();
+ if (length != otherLength) {
+ return false;
+ }
+ for (int i = 0; i < length; i++) {
+ // Work around the limitations of C++
+ boolean found = false;
+ // The comparing just the local names is OK, since these attribute
+ // holders are both supposed to belong to HTML formatting elements
+ @Local String ownLocal = names[i].getLocal(AttributeName.HTML);
+ for (int j = 0; j < otherLength; j++) {
+ if (ownLocal == other.names[j].getLocal(AttributeName.HTML)) {
+ found = true;
+ if (!Portability.stringEqualsString(values[i], other.values[j])) {
+ return false;
+ }
+ }
+ }
+ if (!found) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // [NOCPP[
+
+ void processNonNcNames(TreeBuilder<?> treeBuilder, XmlViolationPolicy namePolicy) throws SAXException {
+ for (int i = 0; i < length; i++) {
+ AttributeName attName = names[i];
+ if (!attName.isNcName(mode)) {
+ String name = attName.getLocal(mode);
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ names[i] = AttributeName.create(NCName.escapeName(name));
+ // fall through
+ case ALLOW:
+ if (attName != AttributeName.XML_LANG) {
+ treeBuilder.warn("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0.");
+ }
+ break;
+ case FATAL:
+ treeBuilder.fatal("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0.");
+ break;
+ }
+ }
+ }
+ }
+
+ public void merge(HtmlAttributes attributes) throws SAXException {
+ int len = attributes.getLength();
+ for (int i = 0; i < len; i++) {
+ AttributeName name = attributes.getAttributeNameNoBoundsCheck(i);
+ if (!contains(name)) {
+ addAttribute(name, attributes.getValueNoBoundsCheck(i), XmlViolationPolicy.ALLOW);
+ }
+ }
+ }
+
+
+ // ]NOCPP]
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java
new file mode 100644
index 000000000..7a559d903
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import org.xml.sax.Locator;
+
+public class LocatorImpl implements Locator {
+
+ private final String systemId;
+
+ private final String publicId;
+
+ private final int column;
+
+ private final int line;
+
+ public LocatorImpl(Locator locator) {
+ this.systemId = locator.getSystemId();
+ this.publicId = locator.getPublicId();
+ this.column = locator.getColumnNumber();
+ this.line = locator.getLineNumber();
+ }
+
+ public final int getColumnNumber() {
+ return column;
+ }
+
+ public final int getLineNumber() {
+ return line;
+ }
+
+ public final String getPublicId() {
+ return publicId;
+ }
+
+ public final String getSystemId() {
+ return systemId;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java
new file mode 100644
index 000000000..be9aabfe3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java
@@ -0,0 +1,854 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.io.IOException;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.common.ByteReadable;
+
+import org.xml.sax.SAXException;
+
+public abstract class MetaScanner {
+
+ /**
+ * Constant for "charset".
+ */
+ private static final char[] CHARSET = { 'h', 'a', 'r', 's', 'e', 't' };
+
+ /**
+ * Constant for "content".
+ */
+ private static final char[] CONTENT = { 'o', 'n', 't', 'e', 'n', 't' };
+
+ /**
+ * Constant for "http-equiv".
+ */
+ private static final char[] HTTP_EQUIV = { 't', 't', 'p', '-', 'e', 'q',
+ 'u', 'i', 'v' };
+
+ /**
+ * Constant for "content-type".
+ */
+ private static final char[] CONTENT_TYPE = { 'c', 'o', 'n', 't', 'e', 'n',
+ 't', '-', 't', 'y', 'p', 'e' };
+
+ private static final int NO = 0;
+
+ private static final int M = 1;
+
+ private static final int E = 2;
+
+ private static final int T = 3;
+
+ private static final int A = 4;
+
+ private static final int DATA = 0;
+
+ private static final int TAG_OPEN = 1;
+
+ private static final int SCAN_UNTIL_GT = 2;
+
+ private static final int TAG_NAME = 3;
+
+ private static final int BEFORE_ATTRIBUTE_NAME = 4;
+
+ private static final int ATTRIBUTE_NAME = 5;
+
+ private static final int AFTER_ATTRIBUTE_NAME = 6;
+
+ private static final int BEFORE_ATTRIBUTE_VALUE = 7;
+
+ private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
+
+ private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
+
+ private static final int ATTRIBUTE_VALUE_UNQUOTED = 10;
+
+ private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
+
+ private static final int MARKUP_DECLARATION_OPEN = 13;
+
+ private static final int MARKUP_DECLARATION_HYPHEN = 14;
+
+ private static final int COMMENT_START = 15;
+
+ private static final int COMMENT_START_DASH = 16;
+
+ private static final int COMMENT = 17;
+
+ private static final int COMMENT_END_DASH = 18;
+
+ private static final int COMMENT_END = 19;
+
+ private static final int SELF_CLOSING_START_TAG = 20;
+
+ private static final int HTTP_EQUIV_NOT_SEEN = 0;
+
+ private static final int HTTP_EQUIV_CONTENT_TYPE = 1;
+
+ private static final int HTTP_EQUIV_OTHER = 2;
+
+ /**
+ * The data source.
+ */
+ protected ByteReadable readable;
+
+ /**
+ * The state of the state machine that recognizes the tag name "meta".
+ */
+ private int metaState = NO;
+
+ /**
+ * The current position in recognizing the attribute name "content".
+ */
+ private int contentIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute name "charset".
+ */
+ private int charsetIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute name "http-equive".
+ */
+ private int httpEquivIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute value "content-type".
+ */
+ private int contentTypeIndex = Integer.MAX_VALUE;
+
+ /**
+ * The tokenizer state.
+ */
+ protected int stateSave = DATA;
+
+ /**
+ * The currently filled length of strBuf.
+ */
+ private int strBufLen;
+
+ /**
+ * Accumulation buffer for attribute values.
+ */
+ private @Auto char[] strBuf;
+
+ private String content;
+
+ private String charset;
+
+ private int httpEquivState;
+
+ // CPPONLY: private TreeBuilder treeBuilder;
+
+ public MetaScanner(
+ // CPPONLY: TreeBuilder tb
+ ) {
+ this.readable = null;
+ this.metaState = NO;
+ this.contentIndex = Integer.MAX_VALUE;
+ this.charsetIndex = Integer.MAX_VALUE;
+ this.httpEquivIndex = Integer.MAX_VALUE;
+ this.contentTypeIndex = Integer.MAX_VALUE;
+ this.stateSave = DATA;
+ this.strBufLen = 0;
+ this.strBuf = new char[36];
+ this.content = null;
+ this.charset = null;
+ this.httpEquivState = HTTP_EQUIV_NOT_SEEN;
+ // CPPONLY: this.treeBuilder = tb;
+ }
+
+ @SuppressWarnings("unused") private void destructor() {
+ Portability.releaseString(content);
+ Portability.releaseString(charset);
+ }
+
+ // [NOCPP[
+
+ /**
+ * Reads a byte from the data source.
+ *
+ * -1 means end.
+ * @return
+ * @throws IOException
+ */
+ protected int read() throws IOException {
+ return readable.readByte();
+ }
+
+ // ]NOCPP]
+
+ // WARNING When editing this, makes sure the bytecode length shown by javap
+ // stays under 8000 bytes!
+ /**
+ * The runs the meta scanning algorithm.
+ */
+ protected final void stateLoop(int state)
+ throws SAXException, IOException {
+ int c = -1;
+ boolean reconsume = false;
+ stateloop: for (;;) {
+ switch (state) {
+ case DATA:
+ dataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '<':
+ state = MetaScanner.TAG_OPEN;
+ break dataloop; // FALL THROUGH continue
+ // stateloop;
+ default:
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case TAG_OPEN:
+ tagopenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case 'm':
+ case 'M':
+ metaState = M;
+ state = MetaScanner.TAG_NAME;
+ break tagopenloop;
+ // continue stateloop;
+ case '!':
+ state = MetaScanner.MARKUP_DECLARATION_OPEN;
+ continue stateloop;
+ case '?':
+ case '/':
+ state = MetaScanner.SCAN_UNTIL_GT;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+ metaState = NO;
+ state = MetaScanner.TAG_NAME;
+ break tagopenloop;
+ // continue stateloop;
+ }
+ state = MetaScanner.DATA;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALL THROUGH DON'T REORDER
+ case TAG_NAME:
+ tagnameloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ break tagnameloop;
+ // continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case 'e':
+ case 'E':
+ if (metaState == M) {
+ metaState = E;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ case 't':
+ case 'T':
+ if (metaState == E) {
+ metaState = T;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ case 'a':
+ case 'A':
+ if (metaState == T) {
+ metaState = A;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ default:
+ metaState = NO;
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_NAME:
+ beforeattributenameloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = DATA;
+ continue stateloop;
+ case 'c':
+ case 'C':
+ contentIndex = 0;
+ charsetIndex = 0;
+ httpEquivIndex = Integer.MAX_VALUE;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ case 'h':
+ case 'H':
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ httpEquivIndex = 0;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ default:
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ httpEquivIndex = Integer.MAX_VALUE;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_NAME:
+ attributenameloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.AFTER_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '=':
+ strBufLen = 0;
+ contentTypeIndex = 0;
+ state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
+ break attributenameloop;
+ // continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ if (metaState == A) {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) {
+ ++contentIndex;
+ } else {
+ contentIndex = Integer.MAX_VALUE;
+ }
+ if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) {
+ ++charsetIndex;
+ } else {
+ charsetIndex = Integer.MAX_VALUE;
+ }
+ if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) {
+ ++httpEquivIndex;
+ } else {
+ httpEquivIndex = Integer.MAX_VALUE;
+ }
+ }
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_VALUE:
+ beforeattributevalueloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '"':
+ state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
+ break beforeattributevalueloop;
+ // continue stateloop;
+ case '\'':
+ state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED;
+ continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ attributevaluedoublequotedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '"':
+ handleAttributeValue();
+ state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
+ break attributevaluedoublequotedloop;
+ // continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_ATTRIBUTE_VALUE_QUOTED:
+ afterattributevaluequotedloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ break afterattributevaluequotedloop;
+ // continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case SELF_CLOSING_START_TAG:
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ reconsume = true;
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_UNQUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+
+ case '\u000C':
+ handleAttributeValue();
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '>':
+ handleAttributeValue();
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // XXX reorder point
+ case AFTER_ATTRIBUTE_NAME:
+ for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '/':
+ handleAttributeValue();
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '=':
+ strBufLen = 0;
+ contentTypeIndex = 0;
+ state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
+ continue stateloop;
+ case '>':
+ handleAttributeValue();
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case 'c':
+ case 'C':
+ contentIndex = 0;
+ charsetIndex = 0;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ continue stateloop;
+ default:
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case MARKUP_DECLARATION_OPEN:
+ markupdeclarationopenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.MARKUP_DECLARATION_HYPHEN;
+ break markupdeclarationopenloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.SCAN_UNTIL_GT;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case MARKUP_DECLARATION_HYPHEN:
+ markupdeclarationhyphenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_START;
+ break markupdeclarationhyphenloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.SCAN_UNTIL_GT;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_START:
+ commentstartloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_START_DASH;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ break commentstartloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT:
+ commentloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END_DASH;
+ break commentloop;
+ // continue stateloop;
+ default:
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END_DASH:
+ commentenddashloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END;
+ break commentenddashloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END:
+ for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case '-':
+ continue;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case COMMENT_START_DASH:
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '\'':
+ handleAttributeValue();
+ state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // XXX reorder point
+ case SCAN_UNTIL_GT:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ continue;
+ }
+ }
+ }
+ }
+ stateSave = state;
+ }
+
+ private void handleCharInAttributeValue(int c) {
+ if (metaState == A) {
+ if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
+ addToBuffer(c);
+ } else if (httpEquivIndex == HTTP_EQUIV.length) {
+ if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
+ ++contentTypeIndex;
+ } else {
+ contentTypeIndex = Integer.MAX_VALUE;
+ }
+ }
+ }
+ }
+
+ @Inline private int toAsciiLowerCase(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c + 0x20;
+ }
+ return c;
+ }
+
+ /**
+ * Adds a character to the accumulation buffer.
+ * @param c the character to add
+ */
+ private void addToBuffer(int c) {
+ if (strBufLen == strBuf.length) {
+ char[] newBuf = new char[strBuf.length + (strBuf.length << 1)];
+ System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
+ strBuf = newBuf;
+ }
+ strBuf[strBufLen++] = (char)c;
+ }
+
+ /**
+ * Attempts to extract a charset name from the accumulation buffer.
+ * @return <code>true</code> if successful
+ * @throws SAXException
+ */
+ private void handleAttributeValue() throws SAXException {
+ if (metaState != A) {
+ return;
+ }
+ if (contentIndex == CONTENT.length && content == null) {
+ content = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , treeBuilder
+ );
+ return;
+ }
+ if (charsetIndex == CHARSET.length && charset == null) {
+ charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , treeBuilder
+ );
+ return;
+ }
+ if (httpEquivIndex == HTTP_EQUIV.length
+ && httpEquivState == HTTP_EQUIV_NOT_SEEN) {
+ httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE
+ : HTTP_EQUIV_OTHER;
+ return;
+ }
+ }
+
+ private boolean handleTag() throws SAXException {
+ boolean stop = handleTagInner();
+ Portability.releaseString(content);
+ content = null;
+ Portability.releaseString(charset);
+ charset = null;
+ httpEquivState = HTTP_EQUIV_NOT_SEEN;
+ return stop;
+ }
+
+ private boolean handleTagInner() throws SAXException {
+ if (charset != null && tryCharset(charset)) {
+ return true;
+ }
+ if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
+ String extract = TreeBuilder.extractCharsetFromContent(content
+ // CPPONLY: , treeBuilder
+ );
+ if (extract == null) {
+ return false;
+ }
+ boolean success = tryCharset(extract);
+ Portability.releaseString(extract);
+ return success;
+ }
+ return false;
+ }
+
+ /**
+ * Tries to switch to an encoding.
+ *
+ * @param encoding
+ * @return <code>true</code> if successful
+ * @throws SAXException
+ */
+ protected abstract boolean tryCharset(String encoding) throws SAXException;
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java
new file mode 100644
index 000000000..940cf2e9c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+public final class NCName {
+ // [NOCPP[
+
+ private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
+
+ private static final char[] HEX_TABLE = "0123456789ABCDEF".toCharArray();
+
+ public static boolean isNCNameStart(char c) {
+ return ((c >= '\u0041' && c <= '\u005A')
+ || (c >= '\u0061' && c <= '\u007A')
+ || (c >= '\u00C0' && c <= '\u00D6')
+ || (c >= '\u00D8' && c <= '\u00F6')
+ || (c >= '\u00F8' && c <= '\u00FF')
+ || (c >= '\u0100' && c <= '\u0131')
+ || (c >= '\u0134' && c <= '\u013E')
+ || (c >= '\u0141' && c <= '\u0148')
+ || (c >= '\u014A' && c <= '\u017E')
+ || (c >= '\u0180' && c <= '\u01C3')
+ || (c >= '\u01CD' && c <= '\u01F0')
+ || (c >= '\u01F4' && c <= '\u01F5')
+ || (c >= '\u01FA' && c <= '\u0217')
+ || (c >= '\u0250' && c <= '\u02A8')
+ || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386')
+ || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
+ || (c >= '\u038E' && c <= '\u03A1')
+ || (c >= '\u03A3' && c <= '\u03CE')
+ || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA')
+ || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0')
+ || (c >= '\u03E2' && c <= '\u03F3')
+ || (c >= '\u0401' && c <= '\u040C')
+ || (c >= '\u040E' && c <= '\u044F')
+ || (c >= '\u0451' && c <= '\u045C')
+ || (c >= '\u045E' && c <= '\u0481')
+ || (c >= '\u0490' && c <= '\u04C4')
+ || (c >= '\u04C7' && c <= '\u04C8')
+ || (c >= '\u04CB' && c <= '\u04CC')
+ || (c >= '\u04D0' && c <= '\u04EB')
+ || (c >= '\u04EE' && c <= '\u04F5')
+ || (c >= '\u04F8' && c <= '\u04F9')
+ || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559')
+ || (c >= '\u0561' && c <= '\u0586')
+ || (c >= '\u05D0' && c <= '\u05EA')
+ || (c >= '\u05F0' && c <= '\u05F2')
+ || (c >= '\u0621' && c <= '\u063A')
+ || (c >= '\u0641' && c <= '\u064A')
+ || (c >= '\u0671' && c <= '\u06B7')
+ || (c >= '\u06BA' && c <= '\u06BE')
+ || (c >= '\u06C0' && c <= '\u06CE')
+ || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5')
+ || (c >= '\u06E5' && c <= '\u06E6')
+ || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D')
+ || (c >= '\u0958' && c <= '\u0961')
+ || (c >= '\u0985' && c <= '\u098C')
+ || (c >= '\u098F' && c <= '\u0990')
+ || (c >= '\u0993' && c <= '\u09A8')
+ || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2')
+ || (c >= '\u09B6' && c <= '\u09B9')
+ || (c >= '\u09DC' && c <= '\u09DD')
+ || (c >= '\u09DF' && c <= '\u09E1')
+ || (c >= '\u09F0' && c <= '\u09F1')
+ || (c >= '\u0A05' && c <= '\u0A0A')
+ || (c >= '\u0A0F' && c <= '\u0A10')
+ || (c >= '\u0A13' && c <= '\u0A28')
+ || (c >= '\u0A2A' && c <= '\u0A30')
+ || (c >= '\u0A32' && c <= '\u0A33')
+ || (c >= '\u0A35' && c <= '\u0A36')
+ || (c >= '\u0A38' && c <= '\u0A39')
+ || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E')
+ || (c >= '\u0A72' && c <= '\u0A74')
+ || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D')
+ || (c >= '\u0A8F' && c <= '\u0A91')
+ || (c >= '\u0A93' && c <= '\u0AA8')
+ || (c >= '\u0AAA' && c <= '\u0AB0')
+ || (c >= '\u0AB2' && c <= '\u0AB3')
+ || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD')
+ || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C')
+ || (c >= '\u0B0F' && c <= '\u0B10')
+ || (c >= '\u0B13' && c <= '\u0B28')
+ || (c >= '\u0B2A' && c <= '\u0B30')
+ || (c >= '\u0B32' && c <= '\u0B33')
+ || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D')
+ || (c >= '\u0B5C' && c <= '\u0B5D')
+ || (c >= '\u0B5F' && c <= '\u0B61')
+ || (c >= '\u0B85' && c <= '\u0B8A')
+ || (c >= '\u0B8E' && c <= '\u0B90')
+ || (c >= '\u0B92' && c <= '\u0B95')
+ || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C')
+ || (c >= '\u0B9E' && c <= '\u0B9F')
+ || (c >= '\u0BA3' && c <= '\u0BA4')
+ || (c >= '\u0BA8' && c <= '\u0BAA')
+ || (c >= '\u0BAE' && c <= '\u0BB5')
+ || (c >= '\u0BB7' && c <= '\u0BB9')
+ || (c >= '\u0C05' && c <= '\u0C0C')
+ || (c >= '\u0C0E' && c <= '\u0C10')
+ || (c >= '\u0C12' && c <= '\u0C28')
+ || (c >= '\u0C2A' && c <= '\u0C33')
+ || (c >= '\u0C35' && c <= '\u0C39')
+ || (c >= '\u0C60' && c <= '\u0C61')
+ || (c >= '\u0C85' && c <= '\u0C8C')
+ || (c >= '\u0C8E' && c <= '\u0C90')
+ || (c >= '\u0C92' && c <= '\u0CA8')
+ || (c >= '\u0CAA' && c <= '\u0CB3')
+ || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE')
+ || (c >= '\u0CE0' && c <= '\u0CE1')
+ || (c >= '\u0D05' && c <= '\u0D0C')
+ || (c >= '\u0D0E' && c <= '\u0D10')
+ || (c >= '\u0D12' && c <= '\u0D28')
+ || (c >= '\u0D2A' && c <= '\u0D39')
+ || (c >= '\u0D60' && c <= '\u0D61')
+ || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30')
+ || (c >= '\u0E32' && c <= '\u0E33')
+ || (c >= '\u0E40' && c <= '\u0E45')
+ || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84')
+ || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A')
+ || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97')
+ || (c >= '\u0E99' && c <= '\u0E9F')
+ || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5')
+ || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB')
+ || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0')
+ || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD')
+ || (c >= '\u0EC0' && c <= '\u0EC4')
+ || (c >= '\u0F40' && c <= '\u0F47')
+ || (c >= '\u0F49' && c <= '\u0F69')
+ || (c >= '\u10A0' && c <= '\u10C5')
+ || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100')
+ || (c >= '\u1102' && c <= '\u1103')
+ || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109')
+ || (c >= '\u110B' && c <= '\u110C')
+ || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C')
+ || (c == '\u113E') || (c == '\u1140') || (c == '\u114C')
+ || (c == '\u114E') || (c == '\u1150')
+ || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159')
+ || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163')
+ || (c == '\u1165') || (c == '\u1167') || (c == '\u1169')
+ || (c >= '\u116D' && c <= '\u116E')
+ || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175')
+ || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB')
+ || (c >= '\u11AE' && c <= '\u11AF')
+ || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA')
+ || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB')
+ || (c == '\u11F0') || (c == '\u11F9')
+ || (c >= '\u1E00' && c <= '\u1E9B')
+ || (c >= '\u1EA0' && c <= '\u1EF9')
+ || (c >= '\u1F00' && c <= '\u1F15')
+ || (c >= '\u1F18' && c <= '\u1F1D')
+ || (c >= '\u1F20' && c <= '\u1F45')
+ || (c >= '\u1F48' && c <= '\u1F4D')
+ || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59')
+ || (c == '\u1F5B') || (c == '\u1F5D')
+ || (c >= '\u1F5F' && c <= '\u1F7D')
+ || (c >= '\u1F80' && c <= '\u1FB4')
+ || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE')
+ || (c >= '\u1FC2' && c <= '\u1FC4')
+ || (c >= '\u1FC6' && c <= '\u1FCC')
+ || (c >= '\u1FD0' && c <= '\u1FD3')
+ || (c >= '\u1FD6' && c <= '\u1FDB')
+ || (c >= '\u1FE0' && c <= '\u1FEC')
+ || (c >= '\u1FF2' && c <= '\u1FF4')
+ || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126')
+ || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E')
+ || (c >= '\u2180' && c <= '\u2182')
+ || (c >= '\u3041' && c <= '\u3094')
+ || (c >= '\u30A1' && c <= '\u30FA')
+ || (c >= '\u3105' && c <= '\u312C')
+ || (c >= '\uAC00' && c <= '\uD7A3')
+ || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007')
+ || (c >= '\u3021' && c <= '\u3029') || (c == '_'));
+ }
+
+ public static boolean isNCNameTrail(char c) {
+ return ((c >= '\u0030' && c <= '\u0039')
+ || (c >= '\u0660' && c <= '\u0669')
+ || (c >= '\u06F0' && c <= '\u06F9')
+ || (c >= '\u0966' && c <= '\u096F')
+ || (c >= '\u09E6' && c <= '\u09EF')
+ || (c >= '\u0A66' && c <= '\u0A6F')
+ || (c >= '\u0AE6' && c <= '\u0AEF')
+ || (c >= '\u0B66' && c <= '\u0B6F')
+ || (c >= '\u0BE7' && c <= '\u0BEF')
+ || (c >= '\u0C66' && c <= '\u0C6F')
+ || (c >= '\u0CE6' && c <= '\u0CEF')
+ || (c >= '\u0D66' && c <= '\u0D6F')
+ || (c >= '\u0E50' && c <= '\u0E59')
+ || (c >= '\u0ED0' && c <= '\u0ED9')
+ || (c >= '\u0F20' && c <= '\u0F29')
+ || (c >= '\u0041' && c <= '\u005A')
+ || (c >= '\u0061' && c <= '\u007A')
+ || (c >= '\u00C0' && c <= '\u00D6')
+ || (c >= '\u00D8' && c <= '\u00F6')
+ || (c >= '\u00F8' && c <= '\u00FF')
+ || (c >= '\u0100' && c <= '\u0131')
+ || (c >= '\u0134' && c <= '\u013E')
+ || (c >= '\u0141' && c <= '\u0148')
+ || (c >= '\u014A' && c <= '\u017E')
+ || (c >= '\u0180' && c <= '\u01C3')
+ || (c >= '\u01CD' && c <= '\u01F0')
+ || (c >= '\u01F4' && c <= '\u01F5')
+ || (c >= '\u01FA' && c <= '\u0217')
+ || (c >= '\u0250' && c <= '\u02A8')
+ || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386')
+ || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
+ || (c >= '\u038E' && c <= '\u03A1')
+ || (c >= '\u03A3' && c <= '\u03CE')
+ || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA')
+ || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0')
+ || (c >= '\u03E2' && c <= '\u03F3')
+ || (c >= '\u0401' && c <= '\u040C')
+ || (c >= '\u040E' && c <= '\u044F')
+ || (c >= '\u0451' && c <= '\u045C')
+ || (c >= '\u045E' && c <= '\u0481')
+ || (c >= '\u0490' && c <= '\u04C4')
+ || (c >= '\u04C7' && c <= '\u04C8')
+ || (c >= '\u04CB' && c <= '\u04CC')
+ || (c >= '\u04D0' && c <= '\u04EB')
+ || (c >= '\u04EE' && c <= '\u04F5')
+ || (c >= '\u04F8' && c <= '\u04F9')
+ || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559')
+ || (c >= '\u0561' && c <= '\u0586')
+ || (c >= '\u05D0' && c <= '\u05EA')
+ || (c >= '\u05F0' && c <= '\u05F2')
+ || (c >= '\u0621' && c <= '\u063A')
+ || (c >= '\u0641' && c <= '\u064A')
+ || (c >= '\u0671' && c <= '\u06B7')
+ || (c >= '\u06BA' && c <= '\u06BE')
+ || (c >= '\u06C0' && c <= '\u06CE')
+ || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5')
+ || (c >= '\u06E5' && c <= '\u06E6')
+ || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D')
+ || (c >= '\u0958' && c <= '\u0961')
+ || (c >= '\u0985' && c <= '\u098C')
+ || (c >= '\u098F' && c <= '\u0990')
+ || (c >= '\u0993' && c <= '\u09A8')
+ || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2')
+ || (c >= '\u09B6' && c <= '\u09B9')
+ || (c >= '\u09DC' && c <= '\u09DD')
+ || (c >= '\u09DF' && c <= '\u09E1')
+ || (c >= '\u09F0' && c <= '\u09F1')
+ || (c >= '\u0A05' && c <= '\u0A0A')
+ || (c >= '\u0A0F' && c <= '\u0A10')
+ || (c >= '\u0A13' && c <= '\u0A28')
+ || (c >= '\u0A2A' && c <= '\u0A30')
+ || (c >= '\u0A32' && c <= '\u0A33')
+ || (c >= '\u0A35' && c <= '\u0A36')
+ || (c >= '\u0A38' && c <= '\u0A39')
+ || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E')
+ || (c >= '\u0A72' && c <= '\u0A74')
+ || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D')
+ || (c >= '\u0A8F' && c <= '\u0A91')
+ || (c >= '\u0A93' && c <= '\u0AA8')
+ || (c >= '\u0AAA' && c <= '\u0AB0')
+ || (c >= '\u0AB2' && c <= '\u0AB3')
+ || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD')
+ || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C')
+ || (c >= '\u0B0F' && c <= '\u0B10')
+ || (c >= '\u0B13' && c <= '\u0B28')
+ || (c >= '\u0B2A' && c <= '\u0B30')
+ || (c >= '\u0B32' && c <= '\u0B33')
+ || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D')
+ || (c >= '\u0B5C' && c <= '\u0B5D')
+ || (c >= '\u0B5F' && c <= '\u0B61')
+ || (c >= '\u0B85' && c <= '\u0B8A')
+ || (c >= '\u0B8E' && c <= '\u0B90')
+ || (c >= '\u0B92' && c <= '\u0B95')
+ || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C')
+ || (c >= '\u0B9E' && c <= '\u0B9F')
+ || (c >= '\u0BA3' && c <= '\u0BA4')
+ || (c >= '\u0BA8' && c <= '\u0BAA')
+ || (c >= '\u0BAE' && c <= '\u0BB5')
+ || (c >= '\u0BB7' && c <= '\u0BB9')
+ || (c >= '\u0C05' && c <= '\u0C0C')
+ || (c >= '\u0C0E' && c <= '\u0C10')
+ || (c >= '\u0C12' && c <= '\u0C28')
+ || (c >= '\u0C2A' && c <= '\u0C33')
+ || (c >= '\u0C35' && c <= '\u0C39')
+ || (c >= '\u0C60' && c <= '\u0C61')
+ || (c >= '\u0C85' && c <= '\u0C8C')
+ || (c >= '\u0C8E' && c <= '\u0C90')
+ || (c >= '\u0C92' && c <= '\u0CA8')
+ || (c >= '\u0CAA' && c <= '\u0CB3')
+ || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE')
+ || (c >= '\u0CE0' && c <= '\u0CE1')
+ || (c >= '\u0D05' && c <= '\u0D0C')
+ || (c >= '\u0D0E' && c <= '\u0D10')
+ || (c >= '\u0D12' && c <= '\u0D28')
+ || (c >= '\u0D2A' && c <= '\u0D39')
+ || (c >= '\u0D60' && c <= '\u0D61')
+ || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30')
+ || (c >= '\u0E32' && c <= '\u0E33')
+ || (c >= '\u0E40' && c <= '\u0E45')
+ || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84')
+ || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A')
+ || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97')
+ || (c >= '\u0E99' && c <= '\u0E9F')
+ || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5')
+ || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB')
+ || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0')
+ || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD')
+ || (c >= '\u0EC0' && c <= '\u0EC4')
+ || (c >= '\u0F40' && c <= '\u0F47')
+ || (c >= '\u0F49' && c <= '\u0F69')
+ || (c >= '\u10A0' && c <= '\u10C5')
+ || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100')
+ || (c >= '\u1102' && c <= '\u1103')
+ || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109')
+ || (c >= '\u110B' && c <= '\u110C')
+ || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C')
+ || (c == '\u113E') || (c == '\u1140') || (c == '\u114C')
+ || (c == '\u114E') || (c == '\u1150')
+ || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159')
+ || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163')
+ || (c == '\u1165') || (c == '\u1167') || (c == '\u1169')
+ || (c >= '\u116D' && c <= '\u116E')
+ || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175')
+ || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB')
+ || (c >= '\u11AE' && c <= '\u11AF')
+ || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA')
+ || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB')
+ || (c == '\u11F0') || (c == '\u11F9')
+ || (c >= '\u1E00' && c <= '\u1E9B')
+ || (c >= '\u1EA0' && c <= '\u1EF9')
+ || (c >= '\u1F00' && c <= '\u1F15')
+ || (c >= '\u1F18' && c <= '\u1F1D')
+ || (c >= '\u1F20' && c <= '\u1F45')
+ || (c >= '\u1F48' && c <= '\u1F4D')
+ || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59')
+ || (c == '\u1F5B') || (c == '\u1F5D')
+ || (c >= '\u1F5F' && c <= '\u1F7D')
+ || (c >= '\u1F80' && c <= '\u1FB4')
+ || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE')
+ || (c >= '\u1FC2' && c <= '\u1FC4')
+ || (c >= '\u1FC6' && c <= '\u1FCC')
+ || (c >= '\u1FD0' && c <= '\u1FD3')
+ || (c >= '\u1FD6' && c <= '\u1FDB')
+ || (c >= '\u1FE0' && c <= '\u1FEC')
+ || (c >= '\u1FF2' && c <= '\u1FF4')
+ || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126')
+ || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E')
+ || (c >= '\u2180' && c <= '\u2182')
+ || (c >= '\u3041' && c <= '\u3094')
+ || (c >= '\u30A1' && c <= '\u30FA')
+ || (c >= '\u3105' && c <= '\u312C')
+ || (c >= '\uAC00' && c <= '\uD7A3')
+ || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007')
+ || (c >= '\u3021' && c <= '\u3029') || (c == '_') || (c == '.')
+ || (c == '-') || (c >= '\u0300' && c <= '\u0345')
+ || (c >= '\u0360' && c <= '\u0361')
+ || (c >= '\u0483' && c <= '\u0486')
+ || (c >= '\u0591' && c <= '\u05A1')
+ || (c >= '\u05A3' && c <= '\u05B9')
+ || (c >= '\u05BB' && c <= '\u05BD') || (c == '\u05BF')
+ || (c >= '\u05C1' && c <= '\u05C2') || (c == '\u05C4')
+ || (c >= '\u064B' && c <= '\u0652') || (c == '\u0670')
+ || (c >= '\u06D6' && c <= '\u06DC')
+ || (c >= '\u06DD' && c <= '\u06DF')
+ || (c >= '\u06E0' && c <= '\u06E4')
+ || (c >= '\u06E7' && c <= '\u06E8')
+ || (c >= '\u06EA' && c <= '\u06ED')
+ || (c >= '\u0901' && c <= '\u0903') || (c == '\u093C')
+ || (c >= '\u093E' && c <= '\u094C') || (c == '\u094D')
+ || (c >= '\u0951' && c <= '\u0954')
+ || (c >= '\u0962' && c <= '\u0963')
+ || (c >= '\u0981' && c <= '\u0983') || (c == '\u09BC')
+ || (c == '\u09BE') || (c == '\u09BF')
+ || (c >= '\u09C0' && c <= '\u09C4')
+ || (c >= '\u09C7' && c <= '\u09C8')
+ || (c >= '\u09CB' && c <= '\u09CD') || (c == '\u09D7')
+ || (c >= '\u09E2' && c <= '\u09E3') || (c == '\u0A02')
+ || (c == '\u0A3C') || (c == '\u0A3E') || (c == '\u0A3F')
+ || (c >= '\u0A40' && c <= '\u0A42')
+ || (c >= '\u0A47' && c <= '\u0A48')
+ || (c >= '\u0A4B' && c <= '\u0A4D')
+ || (c >= '\u0A70' && c <= '\u0A71')
+ || (c >= '\u0A81' && c <= '\u0A83') || (c == '\u0ABC')
+ || (c >= '\u0ABE' && c <= '\u0AC5')
+ || (c >= '\u0AC7' && c <= '\u0AC9')
+ || (c >= '\u0ACB' && c <= '\u0ACD')
+ || (c >= '\u0B01' && c <= '\u0B03') || (c == '\u0B3C')
+ || (c >= '\u0B3E' && c <= '\u0B43')
+ || (c >= '\u0B47' && c <= '\u0B48')
+ || (c >= '\u0B4B' && c <= '\u0B4D')
+ || (c >= '\u0B56' && c <= '\u0B57')
+ || (c >= '\u0B82' && c <= '\u0B83')
+ || (c >= '\u0BBE' && c <= '\u0BC2')
+ || (c >= '\u0BC6' && c <= '\u0BC8')
+ || (c >= '\u0BCA' && c <= '\u0BCD') || (c == '\u0BD7')
+ || (c >= '\u0C01' && c <= '\u0C03')
+ || (c >= '\u0C3E' && c <= '\u0C44')
+ || (c >= '\u0C46' && c <= '\u0C48')
+ || (c >= '\u0C4A' && c <= '\u0C4D')
+ || (c >= '\u0C55' && c <= '\u0C56')
+ || (c >= '\u0C82' && c <= '\u0C83')
+ || (c >= '\u0CBE' && c <= '\u0CC4')
+ || (c >= '\u0CC6' && c <= '\u0CC8')
+ || (c >= '\u0CCA' && c <= '\u0CCD')
+ || (c >= '\u0CD5' && c <= '\u0CD6')
+ || (c >= '\u0D02' && c <= '\u0D03')
+ || (c >= '\u0D3E' && c <= '\u0D43')
+ || (c >= '\u0D46' && c <= '\u0D48')
+ || (c >= '\u0D4A' && c <= '\u0D4D') || (c == '\u0D57')
+ || (c == '\u0E31') || (c >= '\u0E34' && c <= '\u0E3A')
+ || (c >= '\u0E47' && c <= '\u0E4E') || (c == '\u0EB1')
+ || (c >= '\u0EB4' && c <= '\u0EB9')
+ || (c >= '\u0EBB' && c <= '\u0EBC')
+ || (c >= '\u0EC8' && c <= '\u0ECD')
+ || (c >= '\u0F18' && c <= '\u0F19') || (c == '\u0F35')
+ || (c == '\u0F37') || (c == '\u0F39') || (c == '\u0F3E')
+ || (c == '\u0F3F') || (c >= '\u0F71' && c <= '\u0F84')
+ || (c >= '\u0F86' && c <= '\u0F8B')
+ || (c >= '\u0F90' && c <= '\u0F95') || (c == '\u0F97')
+ || (c >= '\u0F99' && c <= '\u0FAD')
+ || (c >= '\u0FB1' && c <= '\u0FB7') || (c == '\u0FB9')
+ || (c >= '\u20D0' && c <= '\u20DC') || (c == '\u20E1')
+ || (c >= '\u302A' && c <= '\u302F') || (c == '\u3099')
+ || (c == '\u309A') || (c == '\u00B7') || (c == '\u02D0')
+ || (c == '\u02D1') || (c == '\u0387') || (c == '\u0640')
+ || (c == '\u0E46') || (c == '\u0EC6') || (c == '\u3005')
+ || (c >= '\u3031' && c <= '\u3035')
+ || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE'));
+ }
+
+ public static boolean isNCName(String str) {
+ if (str == null) {
+ return false;
+ } else {
+ int len = str.length();
+ switch (len) {
+ case 0:
+ return false;
+ case 1:
+ return NCName.isNCNameStart(str.charAt(0));
+ default:
+ if (!NCName.isNCNameStart(str.charAt(0))) {
+ return false;
+ }
+ for (int i = 1; i < len; i++) {
+ if (!NCName.isNCNameTrail(str.charAt(i))) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ }
+
+ private static void appendUHexTo(StringBuilder sb, int c) {
+ sb.append('U');
+ for (int i = 0; i < 6; i++) {
+ sb.append(HEX_TABLE[(c & 0xF00000) >> 20]);
+ c <<= 4;
+ }
+ }
+
+ public static String escapeName(String str) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if ((c & 0xFC00) == 0xD800) {
+ char next = str.charAt(++i);
+ appendUHexTo(sb, (c << 10) + next + SURROGATE_OFFSET);
+ } else if (i == 0 && !isNCNameStart(c)) {
+ appendUHexTo(sb, c);
+ } else if (i != 0 && !isNCNameTrail(c)) {
+ appendUHexTo(sb, c);
+ } else {
+ sb.append(c);
+ }
+ }
+ return sb.toString().intern();
+ }
+ // ]NOCPP]
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java
new file mode 100644
index 000000000..266a5a28e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera
+ * Software ASA.
+ *
+ * You are granted a license to use, reproduce and create derivative works of
+ * this document.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.CharacterName;
+import nu.validator.htmlparser.annotation.NoLength;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class NamedCharacters {
+
+ static final @NoLength @CharacterName String[] NAMES = { "lig", "lig;",
+ "P", "P;", "cute", "cute;", "reve;", "irc", "irc;", "y;", "r;",
+ "rave", "rave;", "pha;", "acr;", "d;", "gon;", "pf;",
+ "plyFunction;", "ing", "ing;", "cr;", "sign;", "ilde", "ilde;",
+ "ml", "ml;", "ckslash;", "rv;", "rwed;", "y;", "cause;",
+ "rnoullis;", "ta;", "r;", "pf;", "eve;", "cr;", "mpeq;", "cy;",
+ "PY", "PY;", "cute;", "p;", "pitalDifferentialD;", "yleys;",
+ "aron;", "edil", "edil;", "irc;", "onint;", "ot;", "dilla;",
+ "nterDot;", "r;", "i;", "rcleDot;", "rcleMinus;", "rclePlus;",
+ "rcleTimes;", "ockwiseContourIntegral;", "oseCurlyDoubleQuote;",
+ "oseCurlyQuote;", "lon;", "lone;", "ngruent;", "nint;",
+ "ntourIntegral;", "pf;", "product;",
+ "unterClockwiseContourIntegral;", "oss;", "cr;", "p;", "pCap;",
+ ";", "otrahd;", "cy;", "cy;", "cy;", "gger;", "rr;", "shv;",
+ "aron;", "y;", "l;", "lta;", "r;", "acriticalAcute;",
+ "acriticalDot;", "acriticalDoubleAcute;", "acriticalGrave;",
+ "acriticalTilde;", "amond;", "fferentialD;", "pf;", "t;", "tDot;",
+ "tEqual;", "ubleContourIntegral;", "ubleDot;", "ubleDownArrow;",
+ "ubleLeftArrow;", "ubleLeftRightArrow;", "ubleLeftTee;",
+ "ubleLongLeftArrow;", "ubleLongLeftRightArrow;",
+ "ubleLongRightArrow;", "ubleRightArrow;", "ubleRightTee;",
+ "ubleUpArrow;", "ubleUpDownArrow;", "ubleVerticalBar;", "wnArrow;",
+ "wnArrowBar;", "wnArrowUpArrow;", "wnBreve;", "wnLeftRightVector;",
+ "wnLeftTeeVector;", "wnLeftVector;", "wnLeftVectorBar;",
+ "wnRightTeeVector;", "wnRightVector;", "wnRightVectorBar;",
+ "wnTee;", "wnTeeArrow;", "wnarrow;", "cr;", "trok;", "G;", "H",
+ "H;", "cute", "cute;", "aron;", "irc", "irc;", "y;", "ot;", "r;",
+ "rave", "rave;", "ement;", "acr;", "ptySmallSquare;",
+ "ptyVerySmallSquare;", "gon;", "pf;", "silon;", "ual;",
+ "ualTilde;", "uilibrium;", "cr;", "im;", "a;", "ml", "ml;",
+ "ists;", "ponentialE;", "y;", "r;", "lledSmallSquare;",
+ "lledVerySmallSquare;", "pf;", "rAll;", "uriertrf;", "cr;", "cy;",
+ "", ";", "mma;", "mmad;", "reve;", "edil;", "irc;", "y;", "ot;",
+ "r;", ";", "pf;", "eaterEqual;", "eaterEqualLess;",
+ "eaterFullEqual;", "eaterGreater;", "eaterLess;",
+ "eaterSlantEqual;", "eaterTilde;", "cr;", ";", "RDcy;", "cek;",
+ "t;", "irc;", "r;", "lbertSpace;", "pf;", "rizontalLine;", "cr;",
+ "trok;", "mpDownHump;", "mpEqual;", "cy;", "lig;", "cy;", "cute",
+ "cute;", "irc", "irc;", "y;", "ot;", "r;", "rave", "rave;", ";",
+ "acr;", "aginaryI;", "plies;", "t;", "tegral;", "tersection;",
+ "visibleComma;", "visibleTimes;", "gon;", "pf;", "ta;", "cr;",
+ "ilde;", "kcy;", "ml", "ml;", "irc;", "y;", "r;", "pf;", "cr;",
+ "ercy;", "kcy;", "cy;", "cy;", "ppa;", "edil;", "y;", "r;", "pf;",
+ "cr;", "cy;", "", ";", "cute;", "mbda;", "ng;", "placetrf;", "rr;",
+ "aron;", "edil;", "y;", "ftAngleBracket;", "ftArrow;",
+ "ftArrowBar;", "ftArrowRightArrow;", "ftCeiling;",
+ "ftDoubleBracket;", "ftDownTeeVector;", "ftDownVector;",
+ "ftDownVectorBar;", "ftFloor;", "ftRightArrow;", "ftRightVector;",
+ "ftTee;", "ftTeeArrow;", "ftTeeVector;", "ftTriangle;",
+ "ftTriangleBar;", "ftTriangleEqual;", "ftUpDownVector;",
+ "ftUpTeeVector;", "ftUpVector;", "ftUpVectorBar;", "ftVector;",
+ "ftVectorBar;", "ftarrow;", "ftrightarrow;", "ssEqualGreater;",
+ "ssFullEqual;", "ssGreater;", "ssLess;", "ssSlantEqual;",
+ "ssTilde;", "r;", ";", "eftarrow;", "idot;", "ngLeftArrow;",
+ "ngLeftRightArrow;", "ngRightArrow;", "ngleftarrow;",
+ "ngleftrightarrow;", "ngrightarrow;", "pf;", "werLeftArrow;",
+ "werRightArrow;", "cr;", "h;", "trok;", ";", "p;", "y;",
+ "diumSpace;", "llintrf;", "r;", "nusPlus;", "pf;", "cr;", ";",
+ "cy;", "cute;", "aron;", "edil;", "y;", "gativeMediumSpace;",
+ "gativeThickSpace;", "gativeThinSpace;", "gativeVeryThinSpace;",
+ "stedGreaterGreater;", "stedLessLess;", "wLine;", "r;", "Break;",
+ "nBreakingSpace;", "pf;", "t;", "tCongruent;", "tCupCap;",
+ "tDoubleVerticalBar;", "tElement;", "tEqual;", "tEqualTilde;",
+ "tExists;", "tGreater;", "tGreaterEqual;", "tGreaterFullEqual;",
+ "tGreaterGreater;", "tGreaterLess;", "tGreaterSlantEqual;",
+ "tGreaterTilde;", "tHumpDownHump;", "tHumpEqual;",
+ "tLeftTriangle;", "tLeftTriangleBar;", "tLeftTriangleEqual;",
+ "tLess;", "tLessEqual;", "tLessGreater;", "tLessLess;",
+ "tLessSlantEqual;", "tLessTilde;", "tNestedGreaterGreater;",
+ "tNestedLessLess;", "tPrecedes;", "tPrecedesEqual;",
+ "tPrecedesSlantEqual;", "tReverseElement;", "tRightTriangle;",
+ "tRightTriangleBar;", "tRightTriangleEqual;", "tSquareSubset;",
+ "tSquareSubsetEqual;", "tSquareSuperset;", "tSquareSupersetEqual;",
+ "tSubset;", "tSubsetEqual;", "tSucceeds;", "tSucceedsEqual;",
+ "tSucceedsSlantEqual;", "tSucceedsTilde;", "tSuperset;",
+ "tSupersetEqual;", "tTilde;", "tTildeEqual;", "tTildeFullEqual;",
+ "tTildeTilde;", "tVerticalBar;", "cr;", "ilde", "ilde;", ";",
+ "lig;", "cute", "cute;", "irc", "irc;", "y;", "blac;", "r;",
+ "rave", "rave;", "acr;", "ega;", "icron;", "pf;",
+ "enCurlyDoubleQuote;", "enCurlyQuote;", ";", "cr;", "lash",
+ "lash;", "ilde", "ilde;", "imes;", "ml", "ml;", "erBar;",
+ "erBrace;", "erBracket;", "erParenthesis;", "rtialD;", "y;", "r;",
+ "i;", ";", "usMinus;", "incareplane;", "pf;", ";", "ecedes;",
+ "ecedesEqual;", "ecedesSlantEqual;", "ecedesTilde;", "ime;",
+ "oduct;", "oportion;", "oportional;", "cr;", "i;", "OT", "OT;",
+ "r;", "pf;", "cr;", "arr;", "G", "G;", "cute;", "ng;", "rr;",
+ "rrtl;", "aron;", "edil;", "y;", ";", "verseElement;",
+ "verseEquilibrium;", "verseUpEquilibrium;", "r;", "o;",
+ "ghtAngleBracket;", "ghtArrow;", "ghtArrowBar;",
+ "ghtArrowLeftArrow;", "ghtCeiling;", "ghtDoubleBracket;",
+ "ghtDownTeeVector;", "ghtDownVector;", "ghtDownVectorBar;",
+ "ghtFloor;", "ghtTee;", "ghtTeeArrow;", "ghtTeeVector;",
+ "ghtTriangle;", "ghtTriangleBar;", "ghtTriangleEqual;",
+ "ghtUpDownVector;", "ghtUpTeeVector;", "ghtUpVector;",
+ "ghtUpVectorBar;", "ghtVector;", "ghtVectorBar;", "ghtarrow;",
+ "pf;", "undImplies;", "ightarrow;", "cr;", "h;", "leDelayed;",
+ "CHcy;", "cy;", "FTcy;", "cute;", ";", "aron;", "edil;", "irc;",
+ "y;", "r;", "ortDownArrow;", "ortLeftArrow;", "ortRightArrow;",
+ "ortUpArrow;", "gma;", "allCircle;", "pf;", "rt;", "uare;",
+ "uareIntersection;", "uareSubset;", "uareSubsetEqual;",
+ "uareSuperset;", "uareSupersetEqual;", "uareUnion;", "cr;", "ar;",
+ "b;", "bset;", "bsetEqual;", "cceeds;", "cceedsEqual;",
+ "cceedsSlantEqual;", "cceedsTilde;", "chThat;", "m;", "p;",
+ "perset;", "persetEqual;", "pset;", "ORN", "ORN;", "ADE;", "Hcy;",
+ "cy;", "b;", "u;", "aron;", "edil;", "y;", "r;", "erefore;",
+ "eta;", "ickSpace;", "inSpace;", "lde;", "ldeEqual;",
+ "ldeFullEqual;", "ldeTilde;", "pf;", "ipleDot;", "cr;", "trok;",
+ "cute", "cute;", "rr;", "rrocir;", "rcy;", "reve;", "irc", "irc;",
+ "y;", "blac;", "r;", "rave", "rave;", "acr;", "derBar;",
+ "derBrace;", "derBracket;", "derParenthesis;", "ion;", "ionPlus;",
+ "gon;", "pf;", "Arrow;", "ArrowBar;", "ArrowDownArrow;",
+ "DownArrow;", "Equilibrium;", "Tee;", "TeeArrow;", "arrow;",
+ "downarrow;", "perLeftArrow;", "perRightArrow;", "si;", "silon;",
+ "ing;", "cr;", "ilde;", "ml", "ml;", "ash;", "ar;", "y;", "ash;",
+ "ashl;", "e;", "rbar;", "rt;", "rticalBar;", "rticalLine;",
+ "rticalSeparator;", "rticalTilde;", "ryThinSpace;", "r;", "pf;",
+ "cr;", "dash;", "irc;", "dge;", "r;", "pf;", "cr;", "r;", ";",
+ "pf;", "cr;", "cy;", "cy;", "cy;", "cute", "cute;", "irc;", "y;",
+ "r;", "pf;", "cr;", "ml;", "cy;", "cute;", "aron;", "y;", "ot;",
+ "roWidthSpace;", "ta;", "r;", "pf;", "cr;", "cute", "cute;",
+ "reve;", ";", "E;", "d;", "irc", "irc;", "ute", "ute;", "y;",
+ "lig", "lig;", ";", "r;", "rave", "rave;", "efsym;", "eph;",
+ "pha;", "acr;", "alg;", "p", "p;", "d;", "dand;", "dd;", "dslope;",
+ "dv;", "g;", "ge;", "gle;", "gmsd;", "gmsdaa;", "gmsdab;",
+ "gmsdac;", "gmsdad;", "gmsdae;", "gmsdaf;", "gmsdag;", "gmsdah;",
+ "grt;", "grtvb;", "grtvbd;", "gsph;", "gst;", "gzarr;", "gon;",
+ "pf;", ";", "E;", "acir;", "e;", "id;", "os;", "prox;", "proxeq;",
+ "ing", "ing;", "cr;", "t;", "ymp;", "ympeq;", "ilde", "ilde;",
+ "ml", "ml;", "conint;", "int;", "ot;", "ckcong;", "ckepsilon;",
+ "ckprime;", "cksim;", "cksimeq;", "rvee;", "rwed;", "rwedge;",
+ "rk;", "rktbrk;", "ong;", "y;", "quo;", "caus;", "cause;",
+ "mptyv;", "psi;", "rnou;", "ta;", "th;", "tween;", "r;", "gcap;",
+ "gcirc;", "gcup;", "godot;", "goplus;", "gotimes;", "gsqcup;",
+ "gstar;", "gtriangledown;", "gtriangleup;", "guplus;", "gvee;",
+ "gwedge;", "arow;", "acklozenge;", "acksquare;", "acktriangle;",
+ "acktriangledown;", "acktriangleleft;", "acktriangleright;",
+ "ank;", "k12;", "k14;", "k34;", "ock;", "e;", "equiv;", "ot;",
+ "pf;", "t;", "ttom;", "wtie;", "xDL;", "xDR;", "xDl;", "xDr;",
+ "xH;", "xHD;", "xHU;", "xHd;", "xHu;", "xUL;", "xUR;", "xUl;",
+ "xUr;", "xV;", "xVH;", "xVL;", "xVR;", "xVh;", "xVl;", "xVr;",
+ "xbox;", "xdL;", "xdR;", "xdl;", "xdr;", "xh;", "xhD;", "xhU;",
+ "xhd;", "xhu;", "xminus;", "xplus;", "xtimes;", "xuL;", "xuR;",
+ "xul;", "xur;", "xv;", "xvH;", "xvL;", "xvR;", "xvh;", "xvl;",
+ "xvr;", "rime;", "eve;", "vbar", "vbar;", "cr;", "emi;", "im;",
+ "ime;", "ol;", "olb;", "olhsub;", "ll;", "llet;", "mp;", "mpE;",
+ "mpe;", "mpeq;", "cute;", "p;", "pand;", "pbrcup;", "pcap;",
+ "pcup;", "pdot;", "ps;", "ret;", "ron;", "aps;", "aron;", "edil",
+ "edil;", "irc;", "ups;", "upssm;", "ot;", "dil", "dil;", "mptyv;",
+ "nt", "nt;", "nterdot;", "r;", "cy;", "eck;", "eckmark;", "i;",
+ "r;", "rE;", "rc;", "rceq;", "rclearrowleft;", "rclearrowright;",
+ "rcledR;", "rcledS;", "rcledast;", "rcledcirc;", "rcleddash;",
+ "re;", "rfnint;", "rmid;", "rscir;", "ubs;", "ubsuit;", "lon;",
+ "lone;", "loneq;", "mma;", "mmat;", "mp;", "mpfn;", "mplement;",
+ "mplexes;", "ng;", "ngdot;", "nint;", "pf;", "prod;", "py", "py;",
+ "pysr;", "arr;", "oss;", "cr;", "ub;", "ube;", "up;", "upe;",
+ "dot;", "darrl;", "darrr;", "epr;", "esc;", "larr;", "larrp;",
+ "p;", "pbrcap;", "pcap;", "pcup;", "pdot;", "por;", "ps;", "rarr;",
+ "rarrm;", "rlyeqprec;", "rlyeqsucc;", "rlyvee;", "rlywedge;",
+ "rren", "rren;", "rvearrowleft;", "rvearrowright;", "vee;", "wed;",
+ "conint;", "int;", "lcty;", "rr;", "ar;", "gger;", "leth;", "rr;",
+ "sh;", "shv;", "karow;", "lac;", "aron;", "y;", ";", "agger;",
+ "arr;", "otseq;", "g", "g;", "lta;", "mptyv;", "isht;", "r;",
+ "arl;", "arr;", "am;", "amond;", "amondsuit;", "ams;", "e;",
+ "gamma;", "sin;", "v;", "vide", "vide;", "videontimes;", "vonx;",
+ "cy;", "corn;", "crop;", "llar;", "pf;", "t;", "teq;", "teqdot;",
+ "tminus;", "tplus;", "tsquare;", "ublebarwedge;", "wnarrow;",
+ "wndownarrows;", "wnharpoonleft;", "wnharpoonright;", "bkarow;",
+ "corn;", "crop;", "cr;", "cy;", "ol;", "trok;", "dot;", "ri;",
+ "rif;", "arr;", "har;", "angle;", "cy;", "igrarr;", "Dot;", "ot;",
+ "cute", "cute;", "ster;", "aron;", "ir;", "irc", "irc;", "olon;",
+ "y;", "ot;", ";", "Dot;", "r;", ";", "rave", "rave;", "s;",
+ "sdot;", ";", "inters;", "l;", "s;", "sdot;", "acr;", "pty;",
+ "ptyset;", "ptyv;", "sp13;", "sp14;", "sp;", "g;", "sp;", "gon;",
+ "pf;", "ar;", "arsl;", "lus;", "si;", "silon;", "siv;", "circ;",
+ "colon;", "sim;", "slantgtr;", "slantless;", "uals;", "uest;",
+ "uiv;", "uivDD;", "vparsl;", "Dot;", "arr;", "cr;", "dot;", "im;",
+ "a;", "h", "h;", "ml", "ml;", "ro;", "cl;", "ist;", "pectation;",
+ "ponentiale;", "llingdotseq;", "y;", "male;", "ilig;", "lig;",
+ "llig;", "r;", "lig;", "lig;", "at;", "lig;", "tns;", "of;", "pf;",
+ "rall;", "rk;", "rkv;", "artint;", "ac12", "ac12;", "ac13;",
+ "ac14", "ac14;", "ac15;", "ac16;", "ac18;", "ac23;", "ac25;",
+ "ac34", "ac34;", "ac35;", "ac38;", "ac45;", "ac56;", "ac58;",
+ "ac78;", "asl;", "own;", "cr;", ";", "l;", "cute;", "mma;",
+ "mmad;", "p;", "reve;", "irc;", "y;", "ot;", ";", "l;", "q;",
+ "qq;", "qslant;", "s;", "scc;", "sdot;", "sdoto;", "sdotol;",
+ "sl;", "sles;", "r;", ";", "g;", "mel;", "cy;", ";", "E;", "a;",
+ "j;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;", "pf;",
+ "ave;", "cr;", "im;", "ime;", "iml;", "", ";", "cc;", "cir;",
+ "dot;", "lPar;", "quest;", "rapprox;", "rarr;", "rdot;",
+ "reqless;", "reqqless;", "rless;", "rsim;", "ertneqq;", "nE;",
+ "rr;", "irsp;", "lf;", "milt;", "rdcy;", "rr;", "rrcir;", "rrw;",
+ "ar;", "irc;", "arts;", "artsuit;", "llip;", "rcon;", "r;",
+ "searow;", "swarow;", "arr;", "mtht;", "okleftarrow;",
+ "okrightarrow;", "pf;", "rbar;", "cr;", "lash;", "trok;", "bull;",
+ "phen;", "cute", "cute;", ";", "irc", "irc;", "y;", "cy;", "xcl",
+ "xcl;", "f;", "r;", "rave", "rave;", ";", "iint;", "int;", "nfin;",
+ "ota;", "lig;", "acr;", "age;", "agline;", "agpart;", "ath;",
+ "of;", "ped;", ";", "care;", "fin;", "fintie;", "odot;", "t;",
+ "tcal;", "tegers;", "tercal;", "tlarhk;", "tprod;", "cy;", "gon;",
+ "pf;", "ta;", "rod;", "uest", "uest;", "cr;", "in;", "inE;",
+ "indot;", "ins;", "insv;", "inv;", ";", "ilde;", "kcy;", "ml",
+ "ml;", "irc;", "y;", "r;", "ath;", "pf;", "cr;", "ercy;", "kcy;",
+ "ppa;", "ppav;", "edil;", "y;", "r;", "reen;", "cy;", "cy;", "pf;",
+ "cr;", "arr;", "rr;", "tail;", "arr;", ";", "g;", "ar;", "cute;",
+ "emptyv;", "gran;", "mbda;", "ng;", "ngd;", "ngle;", "p;", "quo",
+ "quo;", "rr;", "rrb;", "rrbfs;", "rrfs;", "rrhk;", "rrlp;",
+ "rrpl;", "rrsim;", "rrtl;", "t;", "tail;", "te;", "tes;", "arr;",
+ "brk;", "race;", "rack;", "rke;", "rksld;", "rkslu;", "aron;",
+ "edil;", "eil;", "ub;", "y;", "ca;", "quo;", "quor;", "rdhar;",
+ "rushar;", "sh;", ";", "ftarrow;", "ftarrowtail;",
+ "ftharpoondown;", "ftharpoonup;", "ftleftarrows;", "ftrightarrow;",
+ "ftrightarrows;", "ftrightharpoons;", "ftrightsquigarrow;",
+ "ftthreetimes;", "g;", "q;", "qq;", "qslant;", "s;", "scc;",
+ "sdot;", "sdoto;", "sdotor;", "sg;", "sges;", "ssapprox;",
+ "ssdot;", "sseqgtr;", "sseqqgtr;", "ssgtr;", "sssim;", "isht;",
+ "loor;", "r;", ";", "E;", "ard;", "aru;", "arul;", "blk;", "cy;",
+ ";", "arr;", "corner;", "hard;", "tri;", "idot;", "oust;",
+ "oustache;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;",
+ "ang;", "arr;", "brk;", "ngleftarrow;", "ngleftrightarrow;",
+ "ngmapsto;", "ngrightarrow;", "oparrowleft;", "oparrowright;",
+ "par;", "pf;", "plus;", "times;", "wast;", "wbar;", "z;", "zenge;",
+ "zf;", "ar;", "arlt;", "arr;", "corner;", "har;", "hard;", "m;",
+ "tri;", "aquo;", "cr;", "h;", "im;", "ime;", "img;", "qb;", "quo;",
+ "quor;", "trok;", "", ";", "cc;", "cir;", "dot;", "hree;", "imes;",
+ "larr;", "quest;", "rPar;", "ri;", "rie;", "rif;", "rdshar;",
+ "ruhar;", "ertneqq;", "nE;", "Dot;", "cr", "cr;", "le;", "lt;",
+ "ltese;", "p;", "psto;", "pstodown;", "pstoleft;", "pstoup;",
+ "rker;", "omma;", "y;", "ash;", "asuredangle;", "r;", "o;", "cro",
+ "cro;", "d;", "dast;", "dcir;", "ddot", "ddot;", "nus;", "nusb;",
+ "nusd;", "nusdu;", "cp;", "dr;", "plus;", "dels;", "pf;", ";",
+ "cr;", "tpos;", ";", "ltimap;", "map;", "g;", "t;", "tv;",
+ "eftarrow;", "eftrightarrow;", "l;", "t;", "tv;", "ightarrow;",
+ "Dash;", "dash;", "bla;", "cute;", "ng;", "p;", "pE;", "pid;",
+ "pos;", "pprox;", "tur;", "tural;", "turals;", "sp", "sp;", "ump;",
+ "umpe;", "ap;", "aron;", "edil;", "ong;", "ongdot;", "up;", "y;",
+ "ash;", ";", "Arr;", "arhk;", "arr;", "arrow;", "dot;", "quiv;",
+ "sear;", "sim;", "xist;", "xists;", "r;", "E;", "e;", "eq;",
+ "eqq;", "eqslant;", "es;", "sim;", "t;", "tr;", "Arr;", "arr;",
+ "par;", ";", "s;", "sd;", "v;", "cy;", "Arr;", "E;", "arr;", "dr;",
+ "e;", "eftarrow;", "eftrightarrow;", "eq;", "eqq;", "eqslant;",
+ "es;", "ess;", "sim;", "t;", "tri;", "trie;", "id;", "pf;", "t",
+ "t;", "tin;", "tinE;", "tindot;", "tinva;", "tinvb;", "tinvc;",
+ "tni;", "tniva;", "tnivb;", "tnivc;", "ar;", "arallel;", "arsl;",
+ "art;", "olint;", "r;", "rcue;", "re;", "rec;", "receq;", "Arr;",
+ "arr;", "arrc;", "arrw;", "ightarrow;", "tri;", "trie;", "c;",
+ "ccue;", "ce;", "cr;", "hortmid;", "hortparallel;", "im;", "ime;",
+ "imeq;", "mid;", "par;", "qsube;", "qsupe;", "ub;", "ubE;", "ube;",
+ "ubset;", "ubseteq;", "ubseteqq;", "ucc;", "ucceq;", "up;", "upE;",
+ "upe;", "upset;", "upseteq;", "upseteqq;", "gl;", "ilde", "ilde;",
+ "lg;", "riangleleft;", "rianglelefteq;", "riangleright;",
+ "rianglerighteq;", ";", "m;", "mero;", "msp;", "Dash;", "Harr;",
+ "ap;", "dash;", "ge;", "gt;", "infin;", "lArr;", "le;", "lt;",
+ "ltrie;", "rArr;", "rtrie;", "sim;", "Arr;", "arhk;", "arr;",
+ "arrow;", "near;", ";", "cute", "cute;", "st;", "ir;", "irc",
+ "irc;", "y;", "ash;", "blac;", "iv;", "ot;", "sold;", "lig;",
+ "cir;", "r;", "on;", "rave", "rave;", "t;", "bar;", "m;", "nt;",
+ "arr;", "cir;", "cross;", "ine;", "t;", "acr;", "ega;", "icron;",
+ "id;", "inus;", "pf;", "ar;", "erp;", "lus;", ";", "arr;", "d;",
+ "der;", "derof;", "df", "df;", "dm", "dm;", "igof;", "or;",
+ "slope;", "v;", "cr;", "lash", "lash;", "ol;", "ilde", "ilde;",
+ "imes;", "imesas;", "ml", "ml;", "bar;", "r;", "ra", "ra;",
+ "rallel;", "rsim;", "rsl;", "rt;", "y;", "rcnt;", "riod;", "rmil;",
+ "rp;", "rtenk;", "r;", "i;", "iv;", "mmat;", "one;", ";",
+ "tchfork;", "v;", "anck;", "anckh;", "ankv;", "us;", "usacir;",
+ "usb;", "uscir;", "usdo;", "usdu;", "use;", "usmn", "usmn;",
+ "ussim;", "ustwo;", ";", "intint;", "pf;", "und", "und;", ";",
+ "E;", "ap;", "cue;", "e;", "ec;", "ecapprox;", "eccurlyeq;",
+ "eceq;", "ecnapprox;", "ecneqq;", "ecnsim;", "ecsim;", "ime;",
+ "imes;", "nE;", "nap;", "nsim;", "od;", "ofalar;", "ofline;",
+ "ofsurf;", "op;", "opto;", "sim;", "urel;", "cr;", "i;", "ncsp;",
+ "r;", "nt;", "pf;", "rime;", "cr;", "aternions;", "atint;", "est;",
+ "esteq;", "ot", "ot;", "arr;", "rr;", "tail;", "arr;", "ar;",
+ "ce;", "cute;", "dic;", "emptyv;", "ng;", "ngd;", "nge;", "ngle;",
+ "quo", "quo;", "rr;", "rrap;", "rrb;", "rrbfs;", "rrc;", "rrfs;",
+ "rrhk;", "rrlp;", "rrpl;", "rrsim;", "rrtl;", "rrw;", "tail;",
+ "tio;", "tionals;", "arr;", "brk;", "race;", "rack;", "rke;",
+ "rksld;", "rkslu;", "aron;", "edil;", "eil;", "ub;", "y;", "ca;",
+ "ldhar;", "quo;", "quor;", "sh;", "al;", "aline;", "alpart;",
+ "als;", "ct;", "g", "g;", "isht;", "loor;", "r;", "ard;", "aru;",
+ "arul;", "o;", "ov;", "ghtarrow;", "ghtarrowtail;",
+ "ghtharpoondown;", "ghtharpoonup;", "ghtleftarrows;",
+ "ghtleftharpoons;", "ghtrightarrows;", "ghtsquigarrow;",
+ "ghtthreetimes;", "ng;", "singdotseq;", "arr;", "har;", "m;",
+ "oust;", "oustache;", "mid;", "ang;", "arr;", "brk;", "par;",
+ "pf;", "plus;", "times;", "ar;", "argt;", "polint;", "arr;",
+ "aquo;", "cr;", "h;", "qb;", "quo;", "quor;", "hree;", "imes;",
+ "ri;", "rie;", "rif;", "riltri;", "luhar;", ";", "cute;", "quo;",
+ ";", "E;", "ap;", "aron;", "cue;", "e;", "edil;", "irc;", "nE;",
+ "nap;", "nsim;", "polint;", "sim;", "y;", "ot;", "otb;", "ote;",
+ "Arr;", "arhk;", "arr;", "arrow;", "ct", "ct;", "mi;", "swar;",
+ "tminus;", "tmn;", "xt;", "r;", "rown;", "arp;", "chcy;", "cy;",
+ "ortmid;", "ortparallel;", "y", "y;", "gma;", "gmaf;", "gmav;",
+ "m;", "mdot;", "me;", "meq;", "mg;", "mgE;", "ml;", "mlE;", "mne;",
+ "mplus;", "mrarr;", "arr;", "allsetminus;", "ashp;", "eparsl;",
+ "id;", "ile;", "t;", "te;", "tes;", "ftcy;", "l;", "lb;", "lbar;",
+ "pf;", "ades;", "adesuit;", "ar;", "cap;", "caps;", "cup;",
+ "cups;", "sub;", "sube;", "subset;", "subseteq;", "sup;", "supe;",
+ "supset;", "supseteq;", "u;", "uare;", "uarf;", "uf;", "arr;",
+ "cr;", "etmn;", "mile;", "tarf;", "ar;", "arf;", "raightepsilon;",
+ "raightphi;", "rns;", "b;", "bE;", "bdot;", "be;", "bedot;",
+ "bmult;", "bnE;", "bne;", "bplus;", "brarr;", "bset;", "bseteq;",
+ "bseteqq;", "bsetneq;", "bsetneqq;", "bsim;", "bsub;", "bsup;",
+ "cc;", "ccapprox;", "cccurlyeq;", "cceq;", "ccnapprox;", "ccneqq;",
+ "ccnsim;", "ccsim;", "m;", "ng;", "p1", "p1;", "p2", "p2;", "p3",
+ "p3;", "p;", "pE;", "pdot;", "pdsub;", "pe;", "pedot;", "phsol;",
+ "phsub;", "plarr;", "pmult;", "pnE;", "pne;", "pplus;", "pset;",
+ "pseteq;", "pseteqq;", "psetneq;", "psetneqq;", "psim;", "psub;",
+ "psup;", "Arr;", "arhk;", "arr;", "arrow;", "nwar;", "lig", "lig;",
+ "rget;", "u;", "rk;", "aron;", "edil;", "y;", "ot;", "lrec;", "r;",
+ "ere4;", "erefore;", "eta;", "etasym;", "etav;", "ickapprox;",
+ "icksim;", "insp;", "kap;", "ksim;", "orn", "orn;", "lde;", "mes",
+ "mes;", "mesb;", "mesbar;", "mesd;", "nt;", "ea;", "p;", "pbot;",
+ "pcir;", "pf;", "pfork;", "sa;", "rime;", "ade;", "iangle;",
+ "iangledown;", "iangleleft;", "ianglelefteq;", "iangleq;",
+ "iangleright;", "ianglerighteq;", "idot;", "ie;", "iminus;",
+ "iplus;", "isb;", "itime;", "pezium;", "cr;", "cy;", "hcy;",
+ "trok;", "ixt;", "oheadleftarrow;", "oheadrightarrow;", "rr;",
+ "ar;", "cute", "cute;", "rr;", "rcy;", "reve;", "irc", "irc;",
+ "y;", "arr;", "blac;", "har;", "isht;", "r;", "rave", "rave;",
+ "arl;", "arr;", "blk;", "corn;", "corner;", "crop;", "tri;",
+ "acr;", "l", "l;", "gon;", "pf;", "arrow;", "downarrow;",
+ "harpoonleft;", "harpoonright;", "lus;", "si;", "sih;", "silon;",
+ "uparrows;", "corn;", "corner;", "crop;", "ing;", "tri;", "cr;",
+ "dot;", "ilde;", "ri;", "rif;", "arr;", "ml", "ml;", "angle;",
+ "rr;", "ar;", "arv;", "ash;", "ngrt;", "repsilon;", "rkappa;",
+ "rnothing;", "rphi;", "rpi;", "rpropto;", "rr;", "rrho;",
+ "rsigma;", "rsubsetneq;", "rsubsetneqq;", "rsupsetneq;",
+ "rsupsetneqq;", "rtheta;", "rtriangleleft;", "rtriangleright;",
+ "y;", "ash;", "e;", "ebar;", "eeq;", "llip;", "rbar;", "rt;", "r;",
+ "tri;", "sub;", "sup;", "pf;", "rop;", "tri;", "cr;", "ubnE;",
+ "ubne;", "upnE;", "upne;", "igzag;", "irc;", "dbar;", "dge;",
+ "dgeq;", "ierp;", "r;", "pf;", ";", ";", "eath;", "cr;", "ap;",
+ "irc;", "up;", "tri;", "r;", "Arr;", "arr;", ";", "Arr;", "arr;",
+ "ap;", "is;", "dot;", "pf;", "plus;", "time;", "Arr;", "arr;",
+ "cr;", "qcup;", "plus;", "tri;", "ee;", "edge;", "cute", "cute;",
+ "cy;", "irc;", "y;", "n", "n;", "r;", "cy;", "pf;", "cr;", "cy;",
+ "ml", "ml;", "cute;", "aron;", "y;", "ot;", "etrf;", "ta;", "r;",
+ "cy;", "grarr;", "pf;", "cr;", "j;", "nj;", };
+
+ static final @NoLength char[][] VALUES = { { '\u00c6' }, { '\u00c6' },
+ { '\u0026' }, { '\u0026' }, { '\u00c1' }, { '\u00c1' },
+ { '\u0102' }, { '\u00c2' }, { '\u00c2' }, { '\u0410' },
+ { '\ud835', '\udd04' }, { '\u00c0' }, { '\u00c0' }, { '\u0391' },
+ { '\u0100' }, { '\u2a53' }, { '\u0104' }, { '\ud835', '\udd38' },
+ { '\u2061' }, { '\u00c5' }, { '\u00c5' }, { '\ud835', '\udc9c' },
+ { '\u2254' }, { '\u00c3' }, { '\u00c3' }, { '\u00c4' },
+ { '\u00c4' }, { '\u2216' }, { '\u2ae7' }, { '\u2306' },
+ { '\u0411' }, { '\u2235' }, { '\u212c' }, { '\u0392' },
+ { '\ud835', '\udd05' }, { '\ud835', '\udd39' }, { '\u02d8' },
+ { '\u212c' }, { '\u224e' }, { '\u0427' }, { '\u00a9' },
+ { '\u00a9' }, { '\u0106' }, { '\u22d2' }, { '\u2145' },
+ { '\u212d' }, { '\u010c' }, { '\u00c7' }, { '\u00c7' },
+ { '\u0108' }, { '\u2230' }, { '\u010a' }, { '\u00b8' },
+ { '\u00b7' }, { '\u212d' }, { '\u03a7' }, { '\u2299' },
+ { '\u2296' }, { '\u2295' }, { '\u2297' }, { '\u2232' },
+ { '\u201d' }, { '\u2019' }, { '\u2237' }, { '\u2a74' },
+ { '\u2261' }, { '\u222f' }, { '\u222e' }, { '\u2102' },
+ { '\u2210' }, { '\u2233' }, { '\u2a2f' }, { '\ud835', '\udc9e' },
+ { '\u22d3' }, { '\u224d' }, { '\u2145' }, { '\u2911' },
+ { '\u0402' }, { '\u0405' }, { '\u040f' }, { '\u2021' },
+ { '\u21a1' }, { '\u2ae4' }, { '\u010e' }, { '\u0414' },
+ { '\u2207' }, { '\u0394' }, { '\ud835', '\udd07' }, { '\u00b4' },
+ { '\u02d9' }, { '\u02dd' }, { '\u0060' }, { '\u02dc' },
+ { '\u22c4' }, { '\u2146' }, { '\ud835', '\udd3b' }, { '\u00a8' },
+ { '\u20dc' }, { '\u2250' }, { '\u222f' }, { '\u00a8' },
+ { '\u21d3' }, { '\u21d0' }, { '\u21d4' }, { '\u2ae4' },
+ { '\u27f8' }, { '\u27fa' }, { '\u27f9' }, { '\u21d2' },
+ { '\u22a8' }, { '\u21d1' }, { '\u21d5' }, { '\u2225' },
+ { '\u2193' }, { '\u2913' }, { '\u21f5' }, { '\u0311' },
+ { '\u2950' }, { '\u295e' }, { '\u21bd' }, { '\u2956' },
+ { '\u295f' }, { '\u21c1' }, { '\u2957' }, { '\u22a4' },
+ { '\u21a7' }, { '\u21d3' }, { '\ud835', '\udc9f' }, { '\u0110' },
+ { '\u014a' }, { '\u00d0' }, { '\u00d0' }, { '\u00c9' },
+ { '\u00c9' }, { '\u011a' }, { '\u00ca' }, { '\u00ca' },
+ { '\u042d' }, { '\u0116' }, { '\ud835', '\udd08' }, { '\u00c8' },
+ { '\u00c8' }, { '\u2208' }, { '\u0112' }, { '\u25fb' },
+ { '\u25ab' }, { '\u0118' }, { '\ud835', '\udd3c' }, { '\u0395' },
+ { '\u2a75' }, { '\u2242' }, { '\u21cc' }, { '\u2130' },
+ { '\u2a73' }, { '\u0397' }, { '\u00cb' }, { '\u00cb' },
+ { '\u2203' }, { '\u2147' }, { '\u0424' }, { '\ud835', '\udd09' },
+ { '\u25fc' }, { '\u25aa' }, { '\ud835', '\udd3d' }, { '\u2200' },
+ { '\u2131' }, { '\u2131' }, { '\u0403' }, { '\u003e' },
+ { '\u003e' }, { '\u0393' }, { '\u03dc' }, { '\u011e' },
+ { '\u0122' }, { '\u011c' }, { '\u0413' }, { '\u0120' },
+ { '\ud835', '\udd0a' }, { '\u22d9' }, { '\ud835', '\udd3e' },
+ { '\u2265' }, { '\u22db' }, { '\u2267' }, { '\u2aa2' },
+ { '\u2277' }, { '\u2a7e' }, { '\u2273' }, { '\ud835', '\udca2' },
+ { '\u226b' }, { '\u042a' }, { '\u02c7' }, { '\u005e' },
+ { '\u0124' }, { '\u210c' }, { '\u210b' }, { '\u210d' },
+ { '\u2500' }, { '\u210b' }, { '\u0126' }, { '\u224e' },
+ { '\u224f' }, { '\u0415' }, { '\u0132' }, { '\u0401' },
+ { '\u00cd' }, { '\u00cd' }, { '\u00ce' }, { '\u00ce' },
+ { '\u0418' }, { '\u0130' }, { '\u2111' }, { '\u00cc' },
+ { '\u00cc' }, { '\u2111' }, { '\u012a' }, { '\u2148' },
+ { '\u21d2' }, { '\u222c' }, { '\u222b' }, { '\u22c2' },
+ { '\u2063' }, { '\u2062' }, { '\u012e' }, { '\ud835', '\udd40' },
+ { '\u0399' }, { '\u2110' }, { '\u0128' }, { '\u0406' },
+ { '\u00cf' }, { '\u00cf' }, { '\u0134' }, { '\u0419' },
+ { '\ud835', '\udd0d' }, { '\ud835', '\udd41' },
+ { '\ud835', '\udca5' }, { '\u0408' }, { '\u0404' }, { '\u0425' },
+ { '\u040c' }, { '\u039a' }, { '\u0136' }, { '\u041a' },
+ { '\ud835', '\udd0e' }, { '\ud835', '\udd42' },
+ { '\ud835', '\udca6' }, { '\u0409' }, { '\u003c' }, { '\u003c' },
+ { '\u0139' }, { '\u039b' }, { '\u27ea' }, { '\u2112' },
+ { '\u219e' }, { '\u013d' }, { '\u013b' }, { '\u041b' },
+ { '\u27e8' }, { '\u2190' }, { '\u21e4' }, { '\u21c6' },
+ { '\u2308' }, { '\u27e6' }, { '\u2961' }, { '\u21c3' },
+ { '\u2959' }, { '\u230a' }, { '\u2194' }, { '\u294e' },
+ { '\u22a3' }, { '\u21a4' }, { '\u295a' }, { '\u22b2' },
+ { '\u29cf' }, { '\u22b4' }, { '\u2951' }, { '\u2960' },
+ { '\u21bf' }, { '\u2958' }, { '\u21bc' }, { '\u2952' },
+ { '\u21d0' }, { '\u21d4' }, { '\u22da' }, { '\u2266' },
+ { '\u2276' }, { '\u2aa1' }, { '\u2a7d' }, { '\u2272' },
+ { '\ud835', '\udd0f' }, { '\u22d8' }, { '\u21da' }, { '\u013f' },
+ { '\u27f5' }, { '\u27f7' }, { '\u27f6' }, { '\u27f8' },
+ { '\u27fa' }, { '\u27f9' }, { '\ud835', '\udd43' }, { '\u2199' },
+ { '\u2198' }, { '\u2112' }, { '\u21b0' }, { '\u0141' },
+ { '\u226a' }, { '\u2905' }, { '\u041c' }, { '\u205f' },
+ { '\u2133' }, { '\ud835', '\udd10' }, { '\u2213' },
+ { '\ud835', '\udd44' }, { '\u2133' }, { '\u039c' }, { '\u040a' },
+ { '\u0143' }, { '\u0147' }, { '\u0145' }, { '\u041d' },
+ { '\u200b' }, { '\u200b' }, { '\u200b' }, { '\u200b' },
+ { '\u226b' }, { '\u226a' }, { '\n' }, { '\ud835', '\udd11' },
+ { '\u2060' }, { '\u00a0' }, { '\u2115' }, { '\u2aec' },
+ { '\u2262' }, { '\u226d' }, { '\u2226' }, { '\u2209' },
+ { '\u2260' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u226f' },
+ { '\u2271' }, { '\u2267', '\u0338' }, { '\u226b', '\u0338' },
+ { '\u2279' }, { '\u2a7e', '\u0338' }, { '\u2275' },
+ { '\u224e', '\u0338' }, { '\u224f', '\u0338' }, { '\u22ea' },
+ { '\u29cf', '\u0338' }, { '\u22ec' }, { '\u226e' }, { '\u2270' },
+ { '\u2278' }, { '\u226a', '\u0338' }, { '\u2a7d', '\u0338' },
+ { '\u2274' }, { '\u2aa2', '\u0338' }, { '\u2aa1', '\u0338' },
+ { '\u2280' }, { '\u2aaf', '\u0338' }, { '\u22e0' }, { '\u220c' },
+ { '\u22eb' }, { '\u29d0', '\u0338' }, { '\u22ed' },
+ { '\u228f', '\u0338' }, { '\u22e2' }, { '\u2290', '\u0338' },
+ { '\u22e3' }, { '\u2282', '\u20d2' }, { '\u2288' }, { '\u2281' },
+ { '\u2ab0', '\u0338' }, { '\u22e1' }, { '\u227f', '\u0338' },
+ { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2241' }, { '\u2244' },
+ { '\u2247' }, { '\u2249' }, { '\u2224' }, { '\ud835', '\udca9' },
+ { '\u00d1' }, { '\u00d1' }, { '\u039d' }, { '\u0152' },
+ { '\u00d3' }, { '\u00d3' }, { '\u00d4' }, { '\u00d4' },
+ { '\u041e' }, { '\u0150' }, { '\ud835', '\udd12' }, { '\u00d2' },
+ { '\u00d2' }, { '\u014c' }, { '\u03a9' }, { '\u039f' },
+ { '\ud835', '\udd46' }, { '\u201c' }, { '\u2018' }, { '\u2a54' },
+ { '\ud835', '\udcaa' }, { '\u00d8' }, { '\u00d8' }, { '\u00d5' },
+ { '\u00d5' }, { '\u2a37' }, { '\u00d6' }, { '\u00d6' },
+ { '\u203e' }, { '\u23de' }, { '\u23b4' }, { '\u23dc' },
+ { '\u2202' }, { '\u041f' }, { '\ud835', '\udd13' }, { '\u03a6' },
+ { '\u03a0' }, { '\u00b1' }, { '\u210c' }, { '\u2119' },
+ { '\u2abb' }, { '\u227a' }, { '\u2aaf' }, { '\u227c' },
+ { '\u227e' }, { '\u2033' }, { '\u220f' }, { '\u2237' },
+ { '\u221d' }, { '\ud835', '\udcab' }, { '\u03a8' }, { '\u0022' },
+ { '\u0022' }, { '\ud835', '\udd14' }, { '\u211a' },
+ { '\ud835', '\udcac' }, { '\u2910' }, { '\u00ae' }, { '\u00ae' },
+ { '\u0154' }, { '\u27eb' }, { '\u21a0' }, { '\u2916' },
+ { '\u0158' }, { '\u0156' }, { '\u0420' }, { '\u211c' },
+ { '\u220b' }, { '\u21cb' }, { '\u296f' }, { '\u211c' },
+ { '\u03a1' }, { '\u27e9' }, { '\u2192' }, { '\u21e5' },
+ { '\u21c4' }, { '\u2309' }, { '\u27e7' }, { '\u295d' },
+ { '\u21c2' }, { '\u2955' }, { '\u230b' }, { '\u22a2' },
+ { '\u21a6' }, { '\u295b' }, { '\u22b3' }, { '\u29d0' },
+ { '\u22b5' }, { '\u294f' }, { '\u295c' }, { '\u21be' },
+ { '\u2954' }, { '\u21c0' }, { '\u2953' }, { '\u21d2' },
+ { '\u211d' }, { '\u2970' }, { '\u21db' }, { '\u211b' },
+ { '\u21b1' }, { '\u29f4' }, { '\u0429' }, { '\u0428' },
+ { '\u042c' }, { '\u015a' }, { '\u2abc' }, { '\u0160' },
+ { '\u015e' }, { '\u015c' }, { '\u0421' }, { '\ud835', '\udd16' },
+ { '\u2193' }, { '\u2190' }, { '\u2192' }, { '\u2191' },
+ { '\u03a3' }, { '\u2218' }, { '\ud835', '\udd4a' }, { '\u221a' },
+ { '\u25a1' }, { '\u2293' }, { '\u228f' }, { '\u2291' },
+ { '\u2290' }, { '\u2292' }, { '\u2294' }, { '\ud835', '\udcae' },
+ { '\u22c6' }, { '\u22d0' }, { '\u22d0' }, { '\u2286' },
+ { '\u227b' }, { '\u2ab0' }, { '\u227d' }, { '\u227f' },
+ { '\u220b' }, { '\u2211' }, { '\u22d1' }, { '\u2283' },
+ { '\u2287' }, { '\u22d1' }, { '\u00de' }, { '\u00de' },
+ { '\u2122' }, { '\u040b' }, { '\u0426' }, { '\u0009' },
+ { '\u03a4' }, { '\u0164' }, { '\u0162' }, { '\u0422' },
+ { '\ud835', '\udd17' }, { '\u2234' }, { '\u0398' },
+ { '\u205f', '\u200a' }, { '\u2009' }, { '\u223c' }, { '\u2243' },
+ { '\u2245' }, { '\u2248' }, { '\ud835', '\udd4b' }, { '\u20db' },
+ { '\ud835', '\udcaf' }, { '\u0166' }, { '\u00da' }, { '\u00da' },
+ { '\u219f' }, { '\u2949' }, { '\u040e' }, { '\u016c' },
+ { '\u00db' }, { '\u00db' }, { '\u0423' }, { '\u0170' },
+ { '\ud835', '\udd18' }, { '\u00d9' }, { '\u00d9' }, { '\u016a' },
+ { '\u005f' }, { '\u23df' }, { '\u23b5' }, { '\u23dd' },
+ { '\u22c3' }, { '\u228e' }, { '\u0172' }, { '\ud835', '\udd4c' },
+ { '\u2191' }, { '\u2912' }, { '\u21c5' }, { '\u2195' },
+ { '\u296e' }, { '\u22a5' }, { '\u21a5' }, { '\u21d1' },
+ { '\u21d5' }, { '\u2196' }, { '\u2197' }, { '\u03d2' },
+ { '\u03a5' }, { '\u016e' }, { '\ud835', '\udcb0' }, { '\u0168' },
+ { '\u00dc' }, { '\u00dc' }, { '\u22ab' }, { '\u2aeb' },
+ { '\u0412' }, { '\u22a9' }, { '\u2ae6' }, { '\u22c1' },
+ { '\u2016' }, { '\u2016' }, { '\u2223' }, { '\u007c' },
+ { '\u2758' }, { '\u2240' }, { '\u200a' }, { '\ud835', '\udd19' },
+ { '\ud835', '\udd4d' }, { '\ud835', '\udcb1' }, { '\u22aa' },
+ { '\u0174' }, { '\u22c0' }, { '\ud835', '\udd1a' },
+ { '\ud835', '\udd4e' }, { '\ud835', '\udcb2' },
+ { '\ud835', '\udd1b' }, { '\u039e' }, { '\ud835', '\udd4f' },
+ { '\ud835', '\udcb3' }, { '\u042f' }, { '\u0407' }, { '\u042e' },
+ { '\u00dd' }, { '\u00dd' }, { '\u0176' }, { '\u042b' },
+ { '\ud835', '\udd1c' }, { '\ud835', '\udd50' },
+ { '\ud835', '\udcb4' }, { '\u0178' }, { '\u0416' }, { '\u0179' },
+ { '\u017d' }, { '\u0417' }, { '\u017b' }, { '\u200b' },
+ { '\u0396' }, { '\u2128' }, { '\u2124' }, { '\ud835', '\udcb5' },
+ { '\u00e1' }, { '\u00e1' }, { '\u0103' }, { '\u223e' },
+ { '\u223e', '\u0333' }, { '\u223f' }, { '\u00e2' }, { '\u00e2' },
+ { '\u00b4' }, { '\u00b4' }, { '\u0430' }, { '\u00e6' },
+ { '\u00e6' }, { '\u2061' }, { '\ud835', '\udd1e' }, { '\u00e0' },
+ { '\u00e0' }, { '\u2135' }, { '\u2135' }, { '\u03b1' },
+ { '\u0101' }, { '\u2a3f' }, { '\u0026' }, { '\u0026' },
+ { '\u2227' }, { '\u2a55' }, { '\u2a5c' }, { '\u2a58' },
+ { '\u2a5a' }, { '\u2220' }, { '\u29a4' }, { '\u2220' },
+ { '\u2221' }, { '\u29a8' }, { '\u29a9' }, { '\u29aa' },
+ { '\u29ab' }, { '\u29ac' }, { '\u29ad' }, { '\u29ae' },
+ { '\u29af' }, { '\u221f' }, { '\u22be' }, { '\u299d' },
+ { '\u2222' }, { '\u00c5' }, { '\u237c' }, { '\u0105' },
+ { '\ud835', '\udd52' }, { '\u2248' }, { '\u2a70' }, { '\u2a6f' },
+ { '\u224a' }, { '\u224b' }, { '\'' }, { '\u2248' }, { '\u224a' },
+ { '\u00e5' }, { '\u00e5' }, { '\ud835', '\udcb6' }, { '\u002a' },
+ { '\u2248' }, { '\u224d' }, { '\u00e3' }, { '\u00e3' },
+ { '\u00e4' }, { '\u00e4' }, { '\u2233' }, { '\u2a11' },
+ { '\u2aed' }, { '\u224c' }, { '\u03f6' }, { '\u2035' },
+ { '\u223d' }, { '\u22cd' }, { '\u22bd' }, { '\u2305' },
+ { '\u2305' }, { '\u23b5' }, { '\u23b6' }, { '\u224c' },
+ { '\u0431' }, { '\u201e' }, { '\u2235' }, { '\u2235' },
+ { '\u29b0' }, { '\u03f6' }, { '\u212c' }, { '\u03b2' },
+ { '\u2136' }, { '\u226c' }, { '\ud835', '\udd1f' }, { '\u22c2' },
+ { '\u25ef' }, { '\u22c3' }, { '\u2a00' }, { '\u2a01' },
+ { '\u2a02' }, { '\u2a06' }, { '\u2605' }, { '\u25bd' },
+ { '\u25b3' }, { '\u2a04' }, { '\u22c1' }, { '\u22c0' },
+ { '\u290d' }, { '\u29eb' }, { '\u25aa' }, { '\u25b4' },
+ { '\u25be' }, { '\u25c2' }, { '\u25b8' }, { '\u2423' },
+ { '\u2592' }, { '\u2591' }, { '\u2593' }, { '\u2588' },
+ { '\u003d', '\u20e5' }, { '\u2261', '\u20e5' }, { '\u2310' },
+ { '\ud835', '\udd53' }, { '\u22a5' }, { '\u22a5' }, { '\u22c8' },
+ { '\u2557' }, { '\u2554' }, { '\u2556' }, { '\u2553' },
+ { '\u2550' }, { '\u2566' }, { '\u2569' }, { '\u2564' },
+ { '\u2567' }, { '\u255d' }, { '\u255a' }, { '\u255c' },
+ { '\u2559' }, { '\u2551' }, { '\u256c' }, { '\u2563' },
+ { '\u2560' }, { '\u256b' }, { '\u2562' }, { '\u255f' },
+ { '\u29c9' }, { '\u2555' }, { '\u2552' }, { '\u2510' },
+ { '\u250c' }, { '\u2500' }, { '\u2565' }, { '\u2568' },
+ { '\u252c' }, { '\u2534' }, { '\u229f' }, { '\u229e' },
+ { '\u22a0' }, { '\u255b' }, { '\u2558' }, { '\u2518' },
+ { '\u2514' }, { '\u2502' }, { '\u256a' }, { '\u2561' },
+ { '\u255e' }, { '\u253c' }, { '\u2524' }, { '\u251c' },
+ { '\u2035' }, { '\u02d8' }, { '\u00a6' }, { '\u00a6' },
+ { '\ud835', '\udcb7' }, { '\u204f' }, { '\u223d' }, { '\u22cd' },
+ { '\\' }, { '\u29c5' }, { '\u27c8' }, { '\u2022' }, { '\u2022' },
+ { '\u224e' }, { '\u2aae' }, { '\u224f' }, { '\u224f' },
+ { '\u0107' }, { '\u2229' }, { '\u2a44' }, { '\u2a49' },
+ { '\u2a4b' }, { '\u2a47' }, { '\u2a40' }, { '\u2229', '\ufe00' },
+ { '\u2041' }, { '\u02c7' }, { '\u2a4d' }, { '\u010d' },
+ { '\u00e7' }, { '\u00e7' }, { '\u0109' }, { '\u2a4c' },
+ { '\u2a50' }, { '\u010b' }, { '\u00b8' }, { '\u00b8' },
+ { '\u29b2' }, { '\u00a2' }, { '\u00a2' }, { '\u00b7' },
+ { '\ud835', '\udd20' }, { '\u0447' }, { '\u2713' }, { '\u2713' },
+ { '\u03c7' }, { '\u25cb' }, { '\u29c3' }, { '\u02c6' },
+ { '\u2257' }, { '\u21ba' }, { '\u21bb' }, { '\u00ae' },
+ { '\u24c8' }, { '\u229b' }, { '\u229a' }, { '\u229d' },
+ { '\u2257' }, { '\u2a10' }, { '\u2aef' }, { '\u29c2' },
+ { '\u2663' }, { '\u2663' }, { '\u003a' }, { '\u2254' },
+ { '\u2254' }, { '\u002c' }, { '\u0040' }, { '\u2201' },
+ { '\u2218' }, { '\u2201' }, { '\u2102' }, { '\u2245' },
+ { '\u2a6d' }, { '\u222e' }, { '\ud835', '\udd54' }, { '\u2210' },
+ { '\u00a9' }, { '\u00a9' }, { '\u2117' }, { '\u21b5' },
+ { '\u2717' }, { '\ud835', '\udcb8' }, { '\u2acf' }, { '\u2ad1' },
+ { '\u2ad0' }, { '\u2ad2' }, { '\u22ef' }, { '\u2938' },
+ { '\u2935' }, { '\u22de' }, { '\u22df' }, { '\u21b6' },
+ { '\u293d' }, { '\u222a' }, { '\u2a48' }, { '\u2a46' },
+ { '\u2a4a' }, { '\u228d' }, { '\u2a45' }, { '\u222a', '\ufe00' },
+ { '\u21b7' }, { '\u293c' }, { '\u22de' }, { '\u22df' },
+ { '\u22ce' }, { '\u22cf' }, { '\u00a4' }, { '\u00a4' },
+ { '\u21b6' }, { '\u21b7' }, { '\u22ce' }, { '\u22cf' },
+ { '\u2232' }, { '\u2231' }, { '\u232d' }, { '\u21d3' },
+ { '\u2965' }, { '\u2020' }, { '\u2138' }, { '\u2193' },
+ { '\u2010' }, { '\u22a3' }, { '\u290f' }, { '\u02dd' },
+ { '\u010f' }, { '\u0434' }, { '\u2146' }, { '\u2021' },
+ { '\u21ca' }, { '\u2a77' }, { '\u00b0' }, { '\u00b0' },
+ { '\u03b4' }, { '\u29b1' }, { '\u297f' }, { '\ud835', '\udd21' },
+ { '\u21c3' }, { '\u21c2' }, { '\u22c4' }, { '\u22c4' },
+ { '\u2666' }, { '\u2666' }, { '\u00a8' }, { '\u03dd' },
+ { '\u22f2' }, { '\u00f7' }, { '\u00f7' }, { '\u00f7' },
+ { '\u22c7' }, { '\u22c7' }, { '\u0452' }, { '\u231e' },
+ { '\u230d' }, { '\u0024' }, { '\ud835', '\udd55' }, { '\u02d9' },
+ { '\u2250' }, { '\u2251' }, { '\u2238' }, { '\u2214' },
+ { '\u22a1' }, { '\u2306' }, { '\u2193' }, { '\u21ca' },
+ { '\u21c3' }, { '\u21c2' }, { '\u2910' }, { '\u231f' },
+ { '\u230c' }, { '\ud835', '\udcb9' }, { '\u0455' }, { '\u29f6' },
+ { '\u0111' }, { '\u22f1' }, { '\u25bf' }, { '\u25be' },
+ { '\u21f5' }, { '\u296f' }, { '\u29a6' }, { '\u045f' },
+ { '\u27ff' }, { '\u2a77' }, { '\u2251' }, { '\u00e9' },
+ { '\u00e9' }, { '\u2a6e' }, { '\u011b' }, { '\u2256' },
+ { '\u00ea' }, { '\u00ea' }, { '\u2255' }, { '\u044d' },
+ { '\u0117' }, { '\u2147' }, { '\u2252' }, { '\ud835', '\udd22' },
+ { '\u2a9a' }, { '\u00e8' }, { '\u00e8' }, { '\u2a96' },
+ { '\u2a98' }, { '\u2a99' }, { '\u23e7' }, { '\u2113' },
+ { '\u2a95' }, { '\u2a97' }, { '\u0113' }, { '\u2205' },
+ { '\u2205' }, { '\u2205' }, { '\u2004' }, { '\u2005' },
+ { '\u2003' }, { '\u014b' }, { '\u2002' }, { '\u0119' },
+ { '\ud835', '\udd56' }, { '\u22d5' }, { '\u29e3' }, { '\u2a71' },
+ { '\u03b5' }, { '\u03b5' }, { '\u03f5' }, { '\u2256' },
+ { '\u2255' }, { '\u2242' }, { '\u2a96' }, { '\u2a95' },
+ { '\u003d' }, { '\u225f' }, { '\u2261' }, { '\u2a78' },
+ { '\u29e5' }, { '\u2253' }, { '\u2971' }, { '\u212f' },
+ { '\u2250' }, { '\u2242' }, { '\u03b7' }, { '\u00f0' },
+ { '\u00f0' }, { '\u00eb' }, { '\u00eb' }, { '\u20ac' },
+ { '\u0021' }, { '\u2203' }, { '\u2130' }, { '\u2147' },
+ { '\u2252' }, { '\u0444' }, { '\u2640' }, { '\ufb03' },
+ { '\ufb00' }, { '\ufb04' }, { '\ud835', '\udd23' }, { '\ufb01' },
+ { '\u0066', '\u006a' }, { '\u266d' }, { '\ufb02' }, { '\u25b1' },
+ { '\u0192' }, { '\ud835', '\udd57' }, { '\u2200' }, { '\u22d4' },
+ { '\u2ad9' }, { '\u2a0d' }, { '\u00bd' }, { '\u00bd' },
+ { '\u2153' }, { '\u00bc' }, { '\u00bc' }, { '\u2155' },
+ { '\u2159' }, { '\u215b' }, { '\u2154' }, { '\u2156' },
+ { '\u00be' }, { '\u00be' }, { '\u2157' }, { '\u215c' },
+ { '\u2158' }, { '\u215a' }, { '\u215d' }, { '\u215e' },
+ { '\u2044' }, { '\u2322' }, { '\ud835', '\udcbb' }, { '\u2267' },
+ { '\u2a8c' }, { '\u01f5' }, { '\u03b3' }, { '\u03dd' },
+ { '\u2a86' }, { '\u011f' }, { '\u011d' }, { '\u0433' },
+ { '\u0121' }, { '\u2265' }, { '\u22db' }, { '\u2265' },
+ { '\u2267' }, { '\u2a7e' }, { '\u2a7e' }, { '\u2aa9' },
+ { '\u2a80' }, { '\u2a82' }, { '\u2a84' }, { '\u22db', '\ufe00' },
+ { '\u2a94' }, { '\ud835', '\udd24' }, { '\u226b' }, { '\u22d9' },
+ { '\u2137' }, { '\u0453' }, { '\u2277' }, { '\u2a92' },
+ { '\u2aa5' }, { '\u2aa4' }, { '\u2269' }, { '\u2a8a' },
+ { '\u2a8a' }, { '\u2a88' }, { '\u2a88' }, { '\u2269' },
+ { '\u22e7' }, { '\ud835', '\udd58' }, { '\u0060' }, { '\u210a' },
+ { '\u2273' }, { '\u2a8e' }, { '\u2a90' }, { '\u003e' },
+ { '\u003e' }, { '\u2aa7' }, { '\u2a7a' }, { '\u22d7' },
+ { '\u2995' }, { '\u2a7c' }, { '\u2a86' }, { '\u2978' },
+ { '\u22d7' }, { '\u22db' }, { '\u2a8c' }, { '\u2277' },
+ { '\u2273' }, { '\u2269', '\ufe00' }, { '\u2269', '\ufe00' },
+ { '\u21d4' }, { '\u200a' }, { '\u00bd' }, { '\u210b' },
+ { '\u044a' }, { '\u2194' }, { '\u2948' }, { '\u21ad' },
+ { '\u210f' }, { '\u0125' }, { '\u2665' }, { '\u2665' },
+ { '\u2026' }, { '\u22b9' }, { '\ud835', '\udd25' }, { '\u2925' },
+ { '\u2926' }, { '\u21ff' }, { '\u223b' }, { '\u21a9' },
+ { '\u21aa' }, { '\ud835', '\udd59' }, { '\u2015' },
+ { '\ud835', '\udcbd' }, { '\u210f' }, { '\u0127' }, { '\u2043' },
+ { '\u2010' }, { '\u00ed' }, { '\u00ed' }, { '\u2063' },
+ { '\u00ee' }, { '\u00ee' }, { '\u0438' }, { '\u0435' },
+ { '\u00a1' }, { '\u00a1' }, { '\u21d4' }, { '\ud835', '\udd26' },
+ { '\u00ec' }, { '\u00ec' }, { '\u2148' }, { '\u2a0c' },
+ { '\u222d' }, { '\u29dc' }, { '\u2129' }, { '\u0133' },
+ { '\u012b' }, { '\u2111' }, { '\u2110' }, { '\u2111' },
+ { '\u0131' }, { '\u22b7' }, { '\u01b5' }, { '\u2208' },
+ { '\u2105' }, { '\u221e' }, { '\u29dd' }, { '\u0131' },
+ { '\u222b' }, { '\u22ba' }, { '\u2124' }, { '\u22ba' },
+ { '\u2a17' }, { '\u2a3c' }, { '\u0451' }, { '\u012f' },
+ { '\ud835', '\udd5a' }, { '\u03b9' }, { '\u2a3c' }, { '\u00bf' },
+ { '\u00bf' }, { '\ud835', '\udcbe' }, { '\u2208' }, { '\u22f9' },
+ { '\u22f5' }, { '\u22f4' }, { '\u22f3' }, { '\u2208' },
+ { '\u2062' }, { '\u0129' }, { '\u0456' }, { '\u00ef' },
+ { '\u00ef' }, { '\u0135' }, { '\u0439' }, { '\ud835', '\udd27' },
+ { '\u0237' }, { '\ud835', '\udd5b' }, { '\ud835', '\udcbf' },
+ { '\u0458' }, { '\u0454' }, { '\u03ba' }, { '\u03f0' },
+ { '\u0137' }, { '\u043a' }, { '\ud835', '\udd28' }, { '\u0138' },
+ { '\u0445' }, { '\u045c' }, { '\ud835', '\udd5c' },
+ { '\ud835', '\udcc0' }, { '\u21da' }, { '\u21d0' }, { '\u291b' },
+ { '\u290e' }, { '\u2266' }, { '\u2a8b' }, { '\u2962' },
+ { '\u013a' }, { '\u29b4' }, { '\u2112' }, { '\u03bb' },
+ { '\u27e8' }, { '\u2991' }, { '\u27e8' }, { '\u2a85' },
+ { '\u00ab' }, { '\u00ab' }, { '\u2190' }, { '\u21e4' },
+ { '\u291f' }, { '\u291d' }, { '\u21a9' }, { '\u21ab' },
+ { '\u2939' }, { '\u2973' }, { '\u21a2' }, { '\u2aab' },
+ { '\u2919' }, { '\u2aad' }, { '\u2aad', '\ufe00' }, { '\u290c' },
+ { '\u2772' }, { '\u007b' }, { '\u005b' }, { '\u298b' },
+ { '\u298f' }, { '\u298d' }, { '\u013e' }, { '\u013c' },
+ { '\u2308' }, { '\u007b' }, { '\u043b' }, { '\u2936' },
+ { '\u201c' }, { '\u201e' }, { '\u2967' }, { '\u294b' },
+ { '\u21b2' }, { '\u2264' }, { '\u2190' }, { '\u21a2' },
+ { '\u21bd' }, { '\u21bc' }, { '\u21c7' }, { '\u2194' },
+ { '\u21c6' }, { '\u21cb' }, { '\u21ad' }, { '\u22cb' },
+ { '\u22da' }, { '\u2264' }, { '\u2266' }, { '\u2a7d' },
+ { '\u2a7d' }, { '\u2aa8' }, { '\u2a7f' }, { '\u2a81' },
+ { '\u2a83' }, { '\u22da', '\ufe00' }, { '\u2a93' }, { '\u2a85' },
+ { '\u22d6' }, { '\u22da' }, { '\u2a8b' }, { '\u2276' },
+ { '\u2272' }, { '\u297c' }, { '\u230a' }, { '\ud835', '\udd29' },
+ { '\u2276' }, { '\u2a91' }, { '\u21bd' }, { '\u21bc' },
+ { '\u296a' }, { '\u2584' }, { '\u0459' }, { '\u226a' },
+ { '\u21c7' }, { '\u231e' }, { '\u296b' }, { '\u25fa' },
+ { '\u0140' }, { '\u23b0' }, { '\u23b0' }, { '\u2268' },
+ { '\u2a89' }, { '\u2a89' }, { '\u2a87' }, { '\u2a87' },
+ { '\u2268' }, { '\u22e6' }, { '\u27ec' }, { '\u21fd' },
+ { '\u27e6' }, { '\u27f5' }, { '\u27f7' }, { '\u27fc' },
+ { '\u27f6' }, { '\u21ab' }, { '\u21ac' }, { '\u2985' },
+ { '\ud835', '\udd5d' }, { '\u2a2d' }, { '\u2a34' }, { '\u2217' },
+ { '\u005f' }, { '\u25ca' }, { '\u25ca' }, { '\u29eb' },
+ { '\u0028' }, { '\u2993' }, { '\u21c6' }, { '\u231f' },
+ { '\u21cb' }, { '\u296d' }, { '\u200e' }, { '\u22bf' },
+ { '\u2039' }, { '\ud835', '\udcc1' }, { '\u21b0' }, { '\u2272' },
+ { '\u2a8d' }, { '\u2a8f' }, { '\u005b' }, { '\u2018' },
+ { '\u201a' }, { '\u0142' }, { '\u003c' }, { '\u003c' },
+ { '\u2aa6' }, { '\u2a79' }, { '\u22d6' }, { '\u22cb' },
+ { '\u22c9' }, { '\u2976' }, { '\u2a7b' }, { '\u2996' },
+ { '\u25c3' }, { '\u22b4' }, { '\u25c2' }, { '\u294a' },
+ { '\u2966' }, { '\u2268', '\ufe00' }, { '\u2268', '\ufe00' },
+ { '\u223a' }, { '\u00af' }, { '\u00af' }, { '\u2642' },
+ { '\u2720' }, { '\u2720' }, { '\u21a6' }, { '\u21a6' },
+ { '\u21a7' }, { '\u21a4' }, { '\u21a5' }, { '\u25ae' },
+ { '\u2a29' }, { '\u043c' }, { '\u2014' }, { '\u2221' },
+ { '\ud835', '\udd2a' }, { '\u2127' }, { '\u00b5' }, { '\u00b5' },
+ { '\u2223' }, { '\u002a' }, { '\u2af0' }, { '\u00b7' },
+ { '\u00b7' }, { '\u2212' }, { '\u229f' }, { '\u2238' },
+ { '\u2a2a' }, { '\u2adb' }, { '\u2026' }, { '\u2213' },
+ { '\u22a7' }, { '\ud835', '\udd5e' }, { '\u2213' },
+ { '\ud835', '\udcc2' }, { '\u223e' }, { '\u03bc' }, { '\u22b8' },
+ { '\u22b8' }, { '\u22d9', '\u0338' }, { '\u226b', '\u20d2' },
+ { '\u226b', '\u0338' }, { '\u21cd' }, { '\u21ce' },
+ { '\u22d8', '\u0338' }, { '\u226a', '\u20d2' },
+ { '\u226a', '\u0338' }, { '\u21cf' }, { '\u22af' }, { '\u22ae' },
+ { '\u2207' }, { '\u0144' }, { '\u2220', '\u20d2' }, { '\u2249' },
+ { '\u2a70', '\u0338' }, { '\u224b', '\u0338' }, { '\u0149' },
+ { '\u2249' }, { '\u266e' }, { '\u266e' }, { '\u2115' },
+ { '\u00a0' }, { '\u00a0' }, { '\u224e', '\u0338' },
+ { '\u224f', '\u0338' }, { '\u2a43' }, { '\u0148' }, { '\u0146' },
+ { '\u2247' }, { '\u2a6d', '\u0338' }, { '\u2a42' }, { '\u043d' },
+ { '\u2013' }, { '\u2260' }, { '\u21d7' }, { '\u2924' },
+ { '\u2197' }, { '\u2197' }, { '\u2250', '\u0338' }, { '\u2262' },
+ { '\u2928' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u2204' },
+ { '\ud835', '\udd2b' }, { '\u2267', '\u0338' }, { '\u2271' },
+ { '\u2271' }, { '\u2267', '\u0338' }, { '\u2a7e', '\u0338' },
+ { '\u2a7e', '\u0338' }, { '\u2275' }, { '\u226f' }, { '\u226f' },
+ { '\u21ce' }, { '\u21ae' }, { '\u2af2' }, { '\u220b' },
+ { '\u22fc' }, { '\u22fa' }, { '\u220b' }, { '\u045a' },
+ { '\u21cd' }, { '\u2266', '\u0338' }, { '\u219a' }, { '\u2025' },
+ { '\u2270' }, { '\u219a' }, { '\u21ae' }, { '\u2270' },
+ { '\u2266', '\u0338' }, { '\u2a7d', '\u0338' },
+ { '\u2a7d', '\u0338' }, { '\u226e' }, { '\u2274' }, { '\u226e' },
+ { '\u22ea' }, { '\u22ec' }, { '\u2224' }, { '\ud835', '\udd5f' },
+ { '\u00ac' }, { '\u00ac' }, { '\u2209' }, { '\u22f9', '\u0338' },
+ { '\u22f5', '\u0338' }, { '\u2209' }, { '\u22f7' }, { '\u22f6' },
+ { '\u220c' }, { '\u220c' }, { '\u22fe' }, { '\u22fd' },
+ { '\u2226' }, { '\u2226' }, { '\u2afd', '\u20e5' },
+ { '\u2202', '\u0338' }, { '\u2a14' }, { '\u2280' }, { '\u22e0' },
+ { '\u2aaf', '\u0338' }, { '\u2280' }, { '\u2aaf', '\u0338' },
+ { '\u21cf' }, { '\u219b' }, { '\u2933', '\u0338' },
+ { '\u219d', '\u0338' }, { '\u219b' }, { '\u22eb' }, { '\u22ed' },
+ { '\u2281' }, { '\u22e1' }, { '\u2ab0', '\u0338' },
+ { '\ud835', '\udcc3' }, { '\u2224' }, { '\u2226' }, { '\u2241' },
+ { '\u2244' }, { '\u2244' }, { '\u2224' }, { '\u2226' },
+ { '\u22e2' }, { '\u22e3' }, { '\u2284' }, { '\u2ac5', '\u0338' },
+ { '\u2288' }, { '\u2282', '\u20d2' }, { '\u2288' },
+ { '\u2ac5', '\u0338' }, { '\u2281' }, { '\u2ab0', '\u0338' },
+ { '\u2285' }, { '\u2ac6', '\u0338' }, { '\u2289' },
+ { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2ac6', '\u0338' },
+ { '\u2279' }, { '\u00f1' }, { '\u00f1' }, { '\u2278' },
+ { '\u22ea' }, { '\u22ec' }, { '\u22eb' }, { '\u22ed' },
+ { '\u03bd' }, { '\u0023' }, { '\u2116' }, { '\u2007' },
+ { '\u22ad' }, { '\u2904' }, { '\u224d', '\u20d2' }, { '\u22ac' },
+ { '\u2265', '\u20d2' }, { '\u003e', '\u20d2' }, { '\u29de' },
+ { '\u2902' }, { '\u2264', '\u20d2' }, { '\u003c', '\u20d2' },
+ { '\u22b4', '\u20d2' }, { '\u2903' }, { '\u22b5', '\u20d2' },
+ { '\u223c', '\u20d2' }, { '\u21d6' }, { '\u2923' }, { '\u2196' },
+ { '\u2196' }, { '\u2927' }, { '\u24c8' }, { '\u00f3' },
+ { '\u00f3' }, { '\u229b' }, { '\u229a' }, { '\u00f4' },
+ { '\u00f4' }, { '\u043e' }, { '\u229d' }, { '\u0151' },
+ { '\u2a38' }, { '\u2299' }, { '\u29bc' }, { '\u0153' },
+ { '\u29bf' }, { '\ud835', '\udd2c' }, { '\u02db' }, { '\u00f2' },
+ { '\u00f2' }, { '\u29c1' }, { '\u29b5' }, { '\u03a9' },
+ { '\u222e' }, { '\u21ba' }, { '\u29be' }, { '\u29bb' },
+ { '\u203e' }, { '\u29c0' }, { '\u014d' }, { '\u03c9' },
+ { '\u03bf' }, { '\u29b6' }, { '\u2296' }, { '\ud835', '\udd60' },
+ { '\u29b7' }, { '\u29b9' }, { '\u2295' }, { '\u2228' },
+ { '\u21bb' }, { '\u2a5d' }, { '\u2134' }, { '\u2134' },
+ { '\u00aa' }, { '\u00aa' }, { '\u00ba' }, { '\u00ba' },
+ { '\u22b6' }, { '\u2a56' }, { '\u2a57' }, { '\u2a5b' },
+ { '\u2134' }, { '\u00f8' }, { '\u00f8' }, { '\u2298' },
+ { '\u00f5' }, { '\u00f5' }, { '\u2297' }, { '\u2a36' },
+ { '\u00f6' }, { '\u00f6' }, { '\u233d' }, { '\u2225' },
+ { '\u00b6' }, { '\u00b6' }, { '\u2225' }, { '\u2af3' },
+ { '\u2afd' }, { '\u2202' }, { '\u043f' }, { '\u0025' },
+ { '\u002e' }, { '\u2030' }, { '\u22a5' }, { '\u2031' },
+ { '\ud835', '\udd2d' }, { '\u03c6' }, { '\u03d5' }, { '\u2133' },
+ { '\u260e' }, { '\u03c0' }, { '\u22d4' }, { '\u03d6' },
+ { '\u210f' }, { '\u210e' }, { '\u210f' }, { '\u002b' },
+ { '\u2a23' }, { '\u229e' }, { '\u2a22' }, { '\u2214' },
+ { '\u2a25' }, { '\u2a72' }, { '\u00b1' }, { '\u00b1' },
+ { '\u2a26' }, { '\u2a27' }, { '\u00b1' }, { '\u2a15' },
+ { '\ud835', '\udd61' }, { '\u00a3' }, { '\u00a3' }, { '\u227a' },
+ { '\u2ab3' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' },
+ { '\u227a' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' },
+ { '\u2ab9' }, { '\u2ab5' }, { '\u22e8' }, { '\u227e' },
+ { '\u2032' }, { '\u2119' }, { '\u2ab5' }, { '\u2ab9' },
+ { '\u22e8' }, { '\u220f' }, { '\u232e' }, { '\u2312' },
+ { '\u2313' }, { '\u221d' }, { '\u221d' }, { '\u227e' },
+ { '\u22b0' }, { '\ud835', '\udcc5' }, { '\u03c8' }, { '\u2008' },
+ { '\ud835', '\udd2e' }, { '\u2a0c' }, { '\ud835', '\udd62' },
+ { '\u2057' }, { '\ud835', '\udcc6' }, { '\u210d' }, { '\u2a16' },
+ { '\u003f' }, { '\u225f' }, { '\u0022' }, { '\u0022' },
+ { '\u21db' }, { '\u21d2' }, { '\u291c' }, { '\u290f' },
+ { '\u2964' }, { '\u223d', '\u0331' }, { '\u0155' }, { '\u221a' },
+ { '\u29b3' }, { '\u27e9' }, { '\u2992' }, { '\u29a5' },
+ { '\u27e9' }, { '\u00bb' }, { '\u00bb' }, { '\u2192' },
+ { '\u2975' }, { '\u21e5' }, { '\u2920' }, { '\u2933' },
+ { '\u291e' }, { '\u21aa' }, { '\u21ac' }, { '\u2945' },
+ { '\u2974' }, { '\u21a3' }, { '\u219d' }, { '\u291a' },
+ { '\u2236' }, { '\u211a' }, { '\u290d' }, { '\u2773' },
+ { '\u007d' }, { '\u005d' }, { '\u298c' }, { '\u298e' },
+ { '\u2990' }, { '\u0159' }, { '\u0157' }, { '\u2309' },
+ { '\u007d' }, { '\u0440' }, { '\u2937' }, { '\u2969' },
+ { '\u201d' }, { '\u201d' }, { '\u21b3' }, { '\u211c' },
+ { '\u211b' }, { '\u211c' }, { '\u211d' }, { '\u25ad' },
+ { '\u00ae' }, { '\u00ae' }, { '\u297d' }, { '\u230b' },
+ { '\ud835', '\udd2f' }, { '\u21c1' }, { '\u21c0' }, { '\u296c' },
+ { '\u03c1' }, { '\u03f1' }, { '\u2192' }, { '\u21a3' },
+ { '\u21c1' }, { '\u21c0' }, { '\u21c4' }, { '\u21cc' },
+ { '\u21c9' }, { '\u219d' }, { '\u22cc' }, { '\u02da' },
+ { '\u2253' }, { '\u21c4' }, { '\u21cc' }, { '\u200f' },
+ { '\u23b1' }, { '\u23b1' }, { '\u2aee' }, { '\u27ed' },
+ { '\u21fe' }, { '\u27e7' }, { '\u2986' }, { '\ud835', '\udd63' },
+ { '\u2a2e' }, { '\u2a35' }, { '\u0029' }, { '\u2994' },
+ { '\u2a12' }, { '\u21c9' }, { '\u203a' }, { '\ud835', '\udcc7' },
+ { '\u21b1' }, { '\u005d' }, { '\u2019' }, { '\u2019' },
+ { '\u22cc' }, { '\u22ca' }, { '\u25b9' }, { '\u22b5' },
+ { '\u25b8' }, { '\u29ce' }, { '\u2968' }, { '\u211e' },
+ { '\u015b' }, { '\u201a' }, { '\u227b' }, { '\u2ab4' },
+ { '\u2ab8' }, { '\u0161' }, { '\u227d' }, { '\u2ab0' },
+ { '\u015f' }, { '\u015d' }, { '\u2ab6' }, { '\u2aba' },
+ { '\u22e9' }, { '\u2a13' }, { '\u227f' }, { '\u0441' },
+ { '\u22c5' }, { '\u22a1' }, { '\u2a66' }, { '\u21d8' },
+ { '\u2925' }, { '\u2198' }, { '\u2198' }, { '\u00a7' },
+ { '\u00a7' }, { '\u003b' }, { '\u2929' }, { '\u2216' },
+ { '\u2216' }, { '\u2736' }, { '\ud835', '\udd30' }, { '\u2322' },
+ { '\u266f' }, { '\u0449' }, { '\u0448' }, { '\u2223' },
+ { '\u2225' }, { '\u00ad' }, { '\u00ad' }, { '\u03c3' },
+ { '\u03c2' }, { '\u03c2' }, { '\u223c' }, { '\u2a6a' },
+ { '\u2243' }, { '\u2243' }, { '\u2a9e' }, { '\u2aa0' },
+ { '\u2a9d' }, { '\u2a9f' }, { '\u2246' }, { '\u2a24' },
+ { '\u2972' }, { '\u2190' }, { '\u2216' }, { '\u2a33' },
+ { '\u29e4' }, { '\u2223' }, { '\u2323' }, { '\u2aaa' },
+ { '\u2aac' }, { '\u2aac', '\ufe00' }, { '\u044c' }, { '\u002f' },
+ { '\u29c4' }, { '\u233f' }, { '\ud835', '\udd64' }, { '\u2660' },
+ { '\u2660' }, { '\u2225' }, { '\u2293' }, { '\u2293', '\ufe00' },
+ { '\u2294' }, { '\u2294', '\ufe00' }, { '\u228f' }, { '\u2291' },
+ { '\u228f' }, { '\u2291' }, { '\u2290' }, { '\u2292' },
+ { '\u2290' }, { '\u2292' }, { '\u25a1' }, { '\u25a1' },
+ { '\u25aa' }, { '\u25aa' }, { '\u2192' }, { '\ud835', '\udcc8' },
+ { '\u2216' }, { '\u2323' }, { '\u22c6' }, { '\u2606' },
+ { '\u2605' }, { '\u03f5' }, { '\u03d5' }, { '\u00af' },
+ { '\u2282' }, { '\u2ac5' }, { '\u2abd' }, { '\u2286' },
+ { '\u2ac3' }, { '\u2ac1' }, { '\u2acb' }, { '\u228a' },
+ { '\u2abf' }, { '\u2979' }, { '\u2282' }, { '\u2286' },
+ { '\u2ac5' }, { '\u228a' }, { '\u2acb' }, { '\u2ac7' },
+ { '\u2ad5' }, { '\u2ad3' }, { '\u227b' }, { '\u2ab8' },
+ { '\u227d' }, { '\u2ab0' }, { '\u2aba' }, { '\u2ab6' },
+ { '\u22e9' }, { '\u227f' }, { '\u2211' }, { '\u266a' },
+ { '\u00b9' }, { '\u00b9' }, { '\u00b2' }, { '\u00b2' },
+ { '\u00b3' }, { '\u00b3' }, { '\u2283' }, { '\u2ac6' },
+ { '\u2abe' }, { '\u2ad8' }, { '\u2287' }, { '\u2ac4' },
+ { '\u27c9' }, { '\u2ad7' }, { '\u297b' }, { '\u2ac2' },
+ { '\u2acc' }, { '\u228b' }, { '\u2ac0' }, { '\u2283' },
+ { '\u2287' }, { '\u2ac6' }, { '\u228b' }, { '\u2acc' },
+ { '\u2ac8' }, { '\u2ad4' }, { '\u2ad6' }, { '\u21d9' },
+ { '\u2926' }, { '\u2199' }, { '\u2199' }, { '\u292a' },
+ { '\u00df' }, { '\u00df' }, { '\u2316' }, { '\u03c4' },
+ { '\u23b4' }, { '\u0165' }, { '\u0163' }, { '\u0442' },
+ { '\u20db' }, { '\u2315' }, { '\ud835', '\udd31' }, { '\u2234' },
+ { '\u2234' }, { '\u03b8' }, { '\u03d1' }, { '\u03d1' },
+ { '\u2248' }, { '\u223c' }, { '\u2009' }, { '\u2248' },
+ { '\u223c' }, { '\u00fe' }, { '\u00fe' }, { '\u02dc' },
+ { '\u00d7' }, { '\u00d7' }, { '\u22a0' }, { '\u2a31' },
+ { '\u2a30' }, { '\u222d' }, { '\u2928' }, { '\u22a4' },
+ { '\u2336' }, { '\u2af1' }, { '\ud835', '\udd65' }, { '\u2ada' },
+ { '\u2929' }, { '\u2034' }, { '\u2122' }, { '\u25b5' },
+ { '\u25bf' }, { '\u25c3' }, { '\u22b4' }, { '\u225c' },
+ { '\u25b9' }, { '\u22b5' }, { '\u25ec' }, { '\u225c' },
+ { '\u2a3a' }, { '\u2a39' }, { '\u29cd' }, { '\u2a3b' },
+ { '\u23e2' }, { '\ud835', '\udcc9' }, { '\u0446' }, { '\u045b' },
+ { '\u0167' }, { '\u226c' }, { '\u219e' }, { '\u21a0' },
+ { '\u21d1' }, { '\u2963' }, { '\u00fa' }, { '\u00fa' },
+ { '\u2191' }, { '\u045e' }, { '\u016d' }, { '\u00fb' },
+ { '\u00fb' }, { '\u0443' }, { '\u21c5' }, { '\u0171' },
+ { '\u296e' }, { '\u297e' }, { '\ud835', '\udd32' }, { '\u00f9' },
+ { '\u00f9' }, { '\u21bf' }, { '\u21be' }, { '\u2580' },
+ { '\u231c' }, { '\u231c' }, { '\u230f' }, { '\u25f8' },
+ { '\u016b' }, { '\u00a8' }, { '\u00a8' }, { '\u0173' },
+ { '\ud835', '\udd66' }, { '\u2191' }, { '\u2195' }, { '\u21bf' },
+ { '\u21be' }, { '\u228e' }, { '\u03c5' }, { '\u03d2' },
+ { '\u03c5' }, { '\u21c8' }, { '\u231d' }, { '\u231d' },
+ { '\u230e' }, { '\u016f' }, { '\u25f9' }, { '\ud835', '\udcca' },
+ { '\u22f0' }, { '\u0169' }, { '\u25b5' }, { '\u25b4' },
+ { '\u21c8' }, { '\u00fc' }, { '\u00fc' }, { '\u29a7' },
+ { '\u21d5' }, { '\u2ae8' }, { '\u2ae9' }, { '\u22a8' },
+ { '\u299c' }, { '\u03f5' }, { '\u03f0' }, { '\u2205' },
+ { '\u03d5' }, { '\u03d6' }, { '\u221d' }, { '\u2195' },
+ { '\u03f1' }, { '\u03c2' }, { '\u228a', '\ufe00' },
+ { '\u2acb', '\ufe00' }, { '\u228b', '\ufe00' },
+ { '\u2acc', '\ufe00' }, { '\u03d1' }, { '\u22b2' }, { '\u22b3' },
+ { '\u0432' }, { '\u22a2' }, { '\u2228' }, { '\u22bb' },
+ { '\u225a' }, { '\u22ee' }, { '\u007c' }, { '\u007c' },
+ { '\ud835', '\udd33' }, { '\u22b2' }, { '\u2282', '\u20d2' },
+ { '\u2283', '\u20d2' }, { '\ud835', '\udd67' }, { '\u221d' },
+ { '\u22b3' }, { '\ud835', '\udccb' }, { '\u2acb', '\ufe00' },
+ { '\u228a', '\ufe00' }, { '\u2acc', '\ufe00' },
+ { '\u228b', '\ufe00' }, { '\u299a' }, { '\u0175' }, { '\u2a5f' },
+ { '\u2227' }, { '\u2259' }, { '\u2118' }, { '\ud835', '\udd34' },
+ { '\ud835', '\udd68' }, { '\u2118' }, { '\u2240' }, { '\u2240' },
+ { '\ud835', '\udccc' }, { '\u22c2' }, { '\u25ef' }, { '\u22c3' },
+ { '\u25bd' }, { '\ud835', '\udd35' }, { '\u27fa' }, { '\u27f7' },
+ { '\u03be' }, { '\u27f8' }, { '\u27f5' }, { '\u27fc' },
+ { '\u22fb' }, { '\u2a00' }, { '\ud835', '\udd69' }, { '\u2a01' },
+ { '\u2a02' }, { '\u27f9' }, { '\u27f6' }, { '\ud835', '\udccd' },
+ { '\u2a06' }, { '\u2a04' }, { '\u25b3' }, { '\u22c1' },
+ { '\u22c0' }, { '\u00fd' }, { '\u00fd' }, { '\u044f' },
+ { '\u0177' }, { '\u044b' }, { '\u00a5' }, { '\u00a5' },
+ { '\ud835', '\udd36' }, { '\u0457' }, { '\ud835', '\udd6a' },
+ { '\ud835', '\udcce' }, { '\u044e' }, { '\u00ff' }, { '\u00ff' },
+ { '\u017a' }, { '\u017e' }, { '\u0437' }, { '\u017c' },
+ { '\u2128' }, { '\u03b6' }, { '\ud835', '\udd37' }, { '\u0436' },
+ { '\u21dd' }, { '\ud835', '\udd6b' }, { '\ud835', '\udccf' },
+ { '\u200d' }, { '\u200c' }, };
+
+ final static char[][] WINDOWS_1252 = { { '\u20AC' }, { '\u0081' },
+ { '\u201A' }, { '\u0192' }, { '\u201E' }, { '\u2026' },
+ { '\u2020' }, { '\u2021' }, { '\u02C6' }, { '\u2030' },
+ { '\u0160' }, { '\u2039' }, { '\u0152' }, { '\u008D' },
+ { '\u017D' }, { '\u008F' }, { '\u0090' }, { '\u2018' },
+ { '\u2019' }, { '\u201C' }, { '\u201D' }, { '\u2022' },
+ { '\u2013' }, { '\u2014' }, { '\u02DC' }, { '\u2122' },
+ { '\u0161' }, { '\u203A' }, { '\u0153' }, { '\u009D' },
+ { '\u017E' }, { '\u0178' } };
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java
new file mode 100644
index 000000000..311f8f77f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java
@@ -0,0 +1,311 @@
+/*
+ * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera
+ * Software ASA.
+ *
+ * You are granted a license to use, reproduce and create derivative works of
+ * this document.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.NoLength;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class NamedCharactersAccel {
+
+ static final @NoLength int[][] HILO_ACCEL = {
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 12386493, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 40174181, 0, 0, 0, 0, 60162966, 0, 0, 0,
+ 75367550, 0, 0, 0, 82183396, 0, 0, 0, 0, 0, 115148507, 0,
+ 0, 135989275, 139397199, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28770743, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 82248935, 0, 0, 0, 0, 0, 115214046, 0, 0, 0, 139528272, 0,
+ 0, 0, 0, },
+ null,
+ { 0, 0, 0, 4980811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 38470219, 0, 0, 0, 0, 0, 0, 0, 0, 64553944, 0, 0, 0, 0,
+ 0, 0, 0, 92145022, 0, 0, 0, 0, 0, 0, 0, 0, 139593810, 0, 0,
+ 0, 0, },
+ { 65536, 0, 0, 0, 0, 0, 0, 0, 13172937, 0, 0, 0, 0, 0, 25297282, 0,
+ 0, 28901816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 71500866, 0, 0, 0, 0, 82380008, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, },
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 94897574, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 2555943, 0, 0, 0, 0, 0, 0, 0, 15532269, 0, 0, 0, 0, 0, 0,
+ 0, 31785444, 34406924, 0, 0, 0, 0, 0, 40895088, 0, 0, 0,
+ 60228503, 0, 0, 0, 0, 0, 0, 0, 82445546, 0, 0, 0, 0, 0,
+ 115279583, 0, 0, 136054812, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 40239718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 5046349, 0, 0, 10944679, 0, 13238474, 0, 15597806,
+ 16056565, 0, 20578618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, },
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 95225257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 196610, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 8454273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 46072511, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 2687016, 0, 0, 0, 0, 0, 13304011, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 31850982, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ null,
+ null,
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 34472462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 95290798, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 5111886, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 34603535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 105776718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 8585346, 0, 11075752, 0, 0, 0, 0, 16187638, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28508594, 0, 0,
+ 0, 0, 0, 0, 0, 40305255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 95421871, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ null,
+ null,
+ null,
+ { 0, 0, 0, 5177423, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ { 327684, 1900571, 2949162, 5374032, 8716420, 0, 11206826,
+ 12517566, 13435084, 0, 15663343, 16515320, 19988785,
+ 20644155, 25428355, 27197855, 0, 29163962, 31916519,
+ 34734609, 36045347, 0, 0, 0, 40436328, 40960625, 41615994,
+ 46596800, 54264627, 60556184, 64750554, 68879387, 71763012,
+ 75826303, 77268122, 0, 81462490, 83952875, 92865919,
+ 96142769, 105973327, 110167691, 0, 116917984, 121833283,
+ 132253665, 136251421, 140707923, 0, 0, 144574620,
+ 145361066, },
+ { 393222, 0, 0, 0, 0, 0, 11272364, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 36176423, 38535756, 0, 0, 0, 0, 41681532, 46727880,
+ 0, 60687261, 0, 0, 71828552, 75891846, 0, 0, 0, 84411650,
+ 0, 96404924, 0, 0, 0, 117376761, 121898820, 132319203,
+ 136382496, 0, 0, 0, 0, 0, },
+ { 589831, 1966110, 3276846, 5505107, 8978566, 10420383, 11468973,
+ 12583104, 13631694, 15139046, 15794416, 16711933, 20054322,
+ 20840764, 25624965, 27263392, 0, 29360574, 32244200,
+ 34931219, 36373033, 38601293, 39584348, 0, 40567402,
+ 41091698, 42205821, 46858954, 54723389, 60818335, 65143773,
+ 68944924, 71959625, 75957383, 77530268, 80938194, 81593564,
+ 84739337, 92997002, 96863680, 106235474, 110233234, 0,
+ 117704448, 122816325, 132515812, 136579106, 140773476,
+ 142149753, 143001732, 144705695, 145492139, },
+ { 0, 0, 3342387, 0, 9044106, 0, 11534512, 0, 13697233, 0, 0, 0, 0,
+ 0, 25690504, 0, 0, 0, 0, 0, 36438572, 38732366, 0, 0, 0,
+ 41157236, 0, 46924492, 54788932, 61080481, 65209315, 0,
+ 72025163, 0, 0, 0, 0, 85132558, 93062540, 96929223,
+ 106563158, 0, 0, 118032133, 123012947, 132581351,
+ 136775717, 140839013, 0, 143067271, 0, 145557677, },
+ { 0, 2162719, 3473460, 5636181, 0, 0, 0, 0, 0, 0, 0, 18809088,
+ 20185395, 21299519, 0, 0, 0, 29622721, 0, 0, 0, 39256656,
+ 39649885, 0, 0, 41288309, 42336901, 47448781, 55182149,
+ 61342629, 65274852, 69010461, 72811596, 76219528, 77726880,
+ 0, 0, 86967572, 93128077, 97650120, 106628699, 110560915,
+ 0, 118490890, 123733846, 132646888, 0, 141232230,
+ 142411898, 0, 144836769, 145688750, },
+ { 655370, 2228258, 3538998, 5701719, 9109643, 10485920, 11600049,
+ 12648641, 13762770, 15204584, 15859954, 18874656, 20250933,
+ 21365062, 25756041, 27328929, 28574132, 29688261, 32309741,
+ 34996758, 36504109, 39322200, 39715422, 39912033, 40632940,
+ 41353847, 42467975, 47514325, 55247691, 61473705, 65405925,
+ 69272606, 72877144, 76285068, 77857955, 81003732, 81659102,
+ 87164208, 93193614, 97715667, 106759772, 110626456,
+ 114296528, 118687505, 123864929, 132712425, 136906792,
+ 141297772, 142477438, 143132808, 144902307, 145754288, },
+ { 786443, 0, 0, 0, 9240716, 0, 11665586, 0, 13893843, 0, 0, 0, 0,
+ 0, 25887114, 0, 0, 0, 0, 0, 36635182, 0, 0, 0, 0, 0,
+ 42599049, 0, 0, 0, 65733607, 0, 73008217, 0, 77989029, 0,
+ 81724639, 87295283, 0, 98305492, 107021918, 0, 0, 0, 0, 0,
+ 137037866, 0, 0, 0, 0, 0, },
+ { 0, 0, 3604535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27394466, 0,
+ 29753798, 32571886, 35258903, 0, 0, 0, 0, 0, 0, 0, 0,
+ 55509836, 61604779, 0, 0, 0, 0, 0, 0, 81790176, 87557429,
+ 93259151, 98502109, 107152994, 110888601, 0, 119015188,
+ 124323683, 133498858, 137234476, 0, 0, 143263881, 0,
+ 145819825, },
+ { 0, 0, 3866680, 6160472, 0, 10616993, 0, 12714178, 0, 0, 0, 0,
+ 20316470, 0, 0, 27460003, 0, 31261127, 32637426, 35521051,
+ 0, 0, 0, 39977570, 0, 0, 0, 48366294, 56492880, 62391213,
+ 0, 69338146, 73073755, 0, 78316711, 0, 0, 0, 93980048,
+ 98764256, 107218532, 111085213, 114362065, 119736089,
+ 125241194, 133957622, 0, 0, 0, 143329419, 144967844,
+ 145885362, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 62456761, 0, 69403683, 73139292, 0,
+ 78382252, 0, 81855713, 87622969, 0, 98829796, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 48431843, 0, 0, 0, 0, 0, 76416141, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 851981, 0, 4063292, 0, 9306254, 0, 0, 0, 0, 0, 0, 19005729, 0, 0,
+ 0, 27525540, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42795659,
+ 49152740, 56623967, 62587834, 66061292, 69600292, 73401437,
+ 0, 0, 0, 0, 87950650, 94111131, 99878373, 107546213,
+ 112002720, 0, 119932708, 125306744, 0, 137496623,
+ 141363309, 0, 143460492, 0, 0, },
+ { 917518, 0, 0, 0, 9502863, 0, 0, 0, 14155989, 0, 0, 19071267, 0,
+ 0, 26083724, 0, 0, 0, 32702963, 0, 36700720, 0, 0, 0, 0, 0,
+ 43057806, 0, 0, 0, 66520049, 0, 0, 0, 78841005, 81069269,
+ 0, 88147263, 0, 99943925, 107873898, 112068270, 0,
+ 120063783, 125831033, 0, 137693235, 0, 0, 143526030, 0, 0, },
+ { 983055, 0, 0, 0, 0, 0, 0, 0, 14483673, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 37093937, 0, 0, 0, 0, 0, 44565138, 49349359, 0, 0,
+ 66651128, 69665831, 73860193, 0, 79561908, 0, 0, 88606018,
+ 94176669, 0, 0, 0, 0, 120129321, 0, 0, 0, 141494382, 0,
+ 143591567, 0, 0, },
+ { 1114128, 2293795, 4587583, 8257631, 9633938, 10813603, 11731123,
+ 12845251, 14680286, 15270121, 15925491, 19661092, 20382007,
+ 24969543, 26149263, 27656613, 28639669, 31392222, 32768500,
+ 35586591, 37225015, 39387737, 39780959, 40043107, 40698477,
+ 41419384, 44696233, 52495090, 57738081, 63439804, 66782202,
+ 69927976, 73925736, 76809359, 79824063, 81134806, 81921250,
+ 89785673, 94307742, 100795894, 107939439, 112330415,
+ 114427602, 120588074, 126158721, 134416381, 137824310,
+ 141559920, 142542975, 143853712, 145033381, 145950899, },
+ { 1179666, 0, 0, 0, 9699476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26280336,
+ 0, 0, 0, 0, 0, 38076985, 0, 0, 0, 0, 0, 45220523, 52560674,
+ 0, 0, 67175420, 69993516, 0, 0, 79889603, 0, 0, 89916763,
+ 94373280, 101451267, 108136048, 0, 114493139, 120784689,
+ 126355334, 134481924, 138414136, 141625457, 142608512, 0,
+ 0, 0, },
+ { 0, 0, 0, 0, 9896085, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 33292789, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67830786, 0, 0,
+ 0, 80020676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127403913, 0, 0, 0,
+ 0, 0, 0, 0, },
+ { 1310739, 2359332, 4653127, 0, 0, 0, 12189876, 0, 0, 0, 0, 0, 0,
+ 0, 26345874, 28246439, 0, 31457760, 0, 35652128, 38142534,
+ 0, 0, 0, 0, 0, 45351603, 52757283, 57869170, 63636425,
+ 67961868, 71304237, 73991273, 0, 0, 0, 0, 90309981, 0,
+ 101910029, 108988019, 114034355, 0, 120850228, 127469465,
+ 135464965, 138741825, 141690994, 142739585, 143984788, 0,
+ 0, },
+ { 1441813, 2424869, 4718664, 8388735, 10027160, 10879142, 12255419,
+ 12976325, 14745825, 15401194, 15991028, 19857709, 20447544,
+ 25035134, 26542483, 28377520, 28705206, 31588833, 33358333,
+ 35783201, 38208071, 39453274, 39846496, 40108644, 40764014,
+ 41484921, 45613749, 53216038, 58196852, 63898572, 68158478,
+ 71369793, 74253418, 77005973, 80479430, 81265879, 81986787,
+ 90965347, 94504353, 103679508, 109250176, 114165453,
+ 114558676, 121243445, 127731610, 135727124, 138807366,
+ 142018675, 142805123, 144115862, 145098918, 146016436, },
+ { 1572887, 0, 0, 0, 10092698, 0, 12320956, 0, 14811362, 0, 0,
+ 19923248, 0, 25166207, 26739094, 0, 0, 0, 33423870, 0,
+ 38273608, 0, 0, 0, 0, 0, 45744825, 0, 58262393, 64095184,
+ 68355089, 0, 75170926, 0, 80610509, 0, 0, 91817325, 0,
+ 104203823, 109512324, 0, 0, 121636667, 128059294, 0,
+ 139069511, 0, 0, 0, 0, 0, },
+ { 1703961, 2490406, 4849737, 0, 10223771, 0, 0, 13107399, 15007971,
+ 15466732, 0, 0, 20513081, 25231745, 26870169, 0, 0,
+ 31654371, 34275839, 0, 38404681, 0, 0, 0, 40829551, 0,
+ 45875899, 53609261, 59900794, 64226259, 68551700, 0, 0, 0,
+ 80807119, 81331417, 0, 91948410, 94700963, 104465975,
+ 109643400, 114230991, 114951893, 121702209, 131663779, 0,
+ 139266123, 0, 0, 144246936, 145295527, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27132315, 0, 0, 0, 0,
+ 0, 0, 39518811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75302012, 0,
+ 0, 0, 0, 92079484, 0, 105383483, 109708938, 0, 0, 0, 0, 0,
+ 0, 0, 0, 144312474, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 46006973, 0, 60031891, 64291797, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 105711177, 0, 0, 0, 0, 131991514, 135923736,
+ 139331662, 0, 0, 144378011, 0, 146147509, },
+ { 0, 0, 0, 0, 10354845, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68813847, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 121767746, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 60097429, 0, 0, 0, 0, 77137048, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 64422870, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 132122591, 0, 0, 142084216, 0, 0, 0, 0, }, };
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java
new file mode 100644
index 000000000..485684ea1
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2008-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Literal;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.common.Interner;
+
+public final class Portability {
+
+ // Allocating methods
+
+ /**
+ * Allocates a new local name object. In C++, the refcount must be set up in such a way that
+ * calling <code>releaseLocal</code> on the return value balances the refcount set by this method.
+ */
+ public static @Local String newLocalNameFromBuffer(@NoLength char[] buf, int offset, int length, Interner interner) {
+ return new String(buf, offset, length).intern();
+ }
+
+ public static String newStringFromBuffer(@NoLength char[] buf, int offset, int length
+ // CPPONLY: , TreeBuilder treeBuilder
+ ) {
+ return new String(buf, offset, length);
+ }
+
+ public static String newEmptyString() {
+ return "";
+ }
+
+ public static String newStringFromLiteral(@Literal String literal) {
+ return literal;
+ }
+
+ public static String newStringFromString(String string) {
+ return string;
+ }
+
+ // XXX get rid of this
+ public static char[] newCharArrayFromLocal(@Local String local) {
+ return local.toCharArray();
+ }
+
+ public static char[] newCharArrayFromString(String string) {
+ return string.toCharArray();
+ }
+
+ public static @Local String newLocalFromLocal(@Local String local, Interner interner) {
+ return local;
+ }
+
+ // Deallocation methods
+
+ public static void releaseString(String str) {
+ // No-op in Java
+ }
+
+ // Comparison methods
+
+ public static boolean localEqualsBuffer(@Local String local, @NoLength char[] buf, int offset, int length) {
+ if (local.length() != length) {
+ return false;
+ }
+ for (int i = 0; i < length; i++) {
+ if (local.charAt(i) != buf[offset + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(@Literal String lowerCaseLiteral,
+ String string) {
+ if (string == null) {
+ return false;
+ }
+ if (lowerCaseLiteral.length() > string.length()) {
+ return false;
+ }
+ for (int i = 0; i < lowerCaseLiteral.length(); i++) {
+ char c0 = lowerCaseLiteral.charAt(i);
+ char c1 = string.charAt(i);
+ if (c1 >= 'A' && c1 <= 'Z') {
+ c1 += 0x20;
+ }
+ if (c0 != c1) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean lowerCaseLiteralEqualsIgnoreAsciiCaseString(@Literal String lowerCaseLiteral,
+ String string) {
+ if (string == null) {
+ return false;
+ }
+ if (lowerCaseLiteral.length() != string.length()) {
+ return false;
+ }
+ for (int i = 0; i < lowerCaseLiteral.length(); i++) {
+ char c0 = lowerCaseLiteral.charAt(i);
+ char c1 = string.charAt(i);
+ if (c1 >= 'A' && c1 <= 'Z') {
+ c1 += 0x20;
+ }
+ if (c0 != c1) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean literalEqualsString(@Literal String literal, String string) {
+ return literal.equals(string);
+ }
+
+ public static boolean stringEqualsString(String one, String other) {
+ return one.equals(other);
+ }
+
+ public static void delete(Object o) {
+
+ }
+
+ public static void deleteArray(Object o) {
+
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java
new file mode 100644
index 000000000..fad5f43db
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+public class PushedLocation {
+ private final int line;
+
+ private final int linePrev;
+
+ private final int col;
+
+ private final int colPrev;
+
+ private final boolean nextCharOnNewLine;
+
+ private final String publicId;
+
+ private final String systemId;
+
+ private final PushedLocation next;
+
+ /**
+ * @param line
+ * @param linePrev
+ * @param col
+ * @param colPrev
+ * @param nextCharOnNewLine
+ * @param publicId
+ * @param systemId
+ * @param next
+ */
+ public PushedLocation(int line, int linePrev, int col, int colPrev,
+ boolean nextCharOnNewLine, String publicId, String systemId,
+ PushedLocation next) {
+ this.line = line;
+ this.linePrev = linePrev;
+ this.col = col;
+ this.colPrev = colPrev;
+ this.nextCharOnNewLine = nextCharOnNewLine;
+ this.publicId = publicId;
+ this.systemId = systemId;
+ this.next = next;
+ }
+
+ /**
+ * Returns the line.
+ *
+ * @return the line
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Returns the linePrev.
+ *
+ * @return the linePrev
+ */
+ public int getLinePrev() {
+ return linePrev;
+ }
+
+ /**
+ * Returns the col.
+ *
+ * @return the col
+ */
+ public int getCol() {
+ return col;
+ }
+
+ /**
+ * Returns the colPrev.
+ *
+ * @return the colPrev
+ */
+ public int getColPrev() {
+ return colPrev;
+ }
+
+ /**
+ * Returns the nextCharOnNewLine.
+ *
+ * @return the nextCharOnNewLine
+ */
+ public boolean isNextCharOnNewLine() {
+ return nextCharOnNewLine;
+ }
+
+ /**
+ * Returns the publicId.
+ *
+ * @return the publicId
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ * Returns the systemId.
+ *
+ * @return the systemId
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ /**
+ * Returns the next.
+ *
+ * @return the next
+ */
+ public PushedLocation getNext() {
+ return next;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java
new file mode 100644
index 000000000..9aeaba0be
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NsUri;
+
+final class StackNode<T> {
+ final int flags;
+
+ final @Local String name;
+
+ final @Local String popName;
+
+ final @NsUri String ns;
+
+ final T node;
+
+ // Only used on the list of formatting elements
+ HtmlAttributes attributes;
+
+ private int refcount = 1;
+
+ // [NOCPP[
+
+ private final TaintableLocatorImpl locator;
+
+ public TaintableLocatorImpl getLocator() {
+ return locator;
+ }
+
+ // ]NOCPP]
+
+ @Inline public int getFlags() {
+ return flags;
+ }
+
+ public int getGroup() {
+ return flags & ElementName.GROUP_MASK;
+ }
+
+ public boolean isScoping() {
+ return (flags & ElementName.SCOPING) != 0;
+ }
+
+ public boolean isSpecial() {
+ return (flags & ElementName.SPECIAL) != 0;
+ }
+
+ public boolean isFosterParenting() {
+ return (flags & ElementName.FOSTER_PARENTING) != 0;
+ }
+
+ public boolean isHtmlIntegrationPoint() {
+ return (flags & ElementName.HTML_INTEGRATION_POINT) != 0;
+ }
+
+ // [NOCPP[
+
+ public boolean isOptionalEndTag() {
+ return (flags & ElementName.OPTIONAL_END_TAG) != 0;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * Constructor for copying. This doesn't take another <code>StackNode</code>
+ * because in C++ the caller is reponsible for reobtaining the local names
+ * from another interner.
+ *
+ * @param flags
+ * @param ns
+ * @param name
+ * @param node
+ * @param popName
+ * @param attributes
+ */
+ StackNode(int flags, @NsUri String ns, @Local String name, T node,
+ @Local String popName, HtmlAttributes attributes
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = flags;
+ this.name = name;
+ this.popName = popName;
+ this.ns = ns;
+ this.node = node;
+ this.attributes = attributes;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Short hand for well-known HTML elements.
+ *
+ * @param elementName
+ * @param node
+ */
+ StackNode(ElementName elementName, T node
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = elementName.getFlags();
+ this.name = elementName.name;
+ this.popName = elementName.name;
+ this.ns = "http://www.w3.org/1999/xhtml";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ assert !elementName.isCustom() : "Don't use this constructor for custom elements.";
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Constructor for HTML formatting elements.
+ *
+ * @param elementName
+ * @param node
+ * @param attributes
+ */
+ StackNode(ElementName elementName, T node, HtmlAttributes attributes
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = elementName.getFlags();
+ this.name = elementName.name;
+ this.popName = elementName.name;
+ this.ns = "http://www.w3.org/1999/xhtml";
+ this.node = node;
+ this.attributes = attributes;
+ this.refcount = 1;
+ assert !elementName.isCustom() : "Don't use this constructor for custom elements.";
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * The common-case HTML constructor.
+ *
+ * @param elementName
+ * @param node
+ * @param popName
+ */
+ StackNode(ElementName elementName, T node, @Local String popName
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = elementName.getFlags();
+ this.name = elementName.name;
+ this.popName = popName;
+ this.ns = "http://www.w3.org/1999/xhtml";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Constructor for SVG elements. Note that the order of the arguments is
+ * what distinguishes this from the HTML constructor. This is ugly, but
+ * AFAICT the least disruptive way to make this work with Java's generics
+ * and without unnecessary branches. :-(
+ *
+ * @param elementName
+ * @param popName
+ * @param node
+ */
+ StackNode(ElementName elementName, @Local String popName, T node
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = prepareSvgFlags(elementName.getFlags());
+ this.name = elementName.name;
+ this.popName = popName;
+ this.ns = "http://www.w3.org/2000/svg";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ /**
+ * Constructor for MathML.
+ *
+ * @param elementName
+ * @param node
+ * @param popName
+ * @param markAsIntegrationPoint
+ */
+ StackNode(ElementName elementName, T node, @Local String popName,
+ boolean markAsIntegrationPoint
+ // [NOCPP[
+ , TaintableLocatorImpl locator
+ // ]NOCPP]
+ ) {
+ this.flags = prepareMathFlags(elementName.getFlags(),
+ markAsIntegrationPoint);
+ this.name = elementName.name;
+ this.popName = popName;
+ this.ns = "http://www.w3.org/1998/Math/MathML";
+ this.node = node;
+ this.attributes = null;
+ this.refcount = 1;
+ // [NOCPP[
+ this.locator = locator;
+ // ]NOCPP]
+ }
+
+ private static int prepareSvgFlags(int flags) {
+ flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING
+ | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG);
+ if ((flags & ElementName.SCOPING_AS_SVG) != 0) {
+ flags |= (ElementName.SCOPING | ElementName.SPECIAL | ElementName.HTML_INTEGRATION_POINT);
+ }
+ return flags;
+ }
+
+ private static int prepareMathFlags(int flags,
+ boolean markAsIntegrationPoint) {
+ flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING
+ | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG);
+ if ((flags & ElementName.SCOPING_AS_MATHML) != 0) {
+ flags |= (ElementName.SCOPING | ElementName.SPECIAL);
+ }
+ if (markAsIntegrationPoint) {
+ flags |= ElementName.HTML_INTEGRATION_POINT;
+ }
+ return flags;
+ }
+
+ @SuppressWarnings("unused") private void destructor() {
+ Portability.delete(attributes);
+ }
+
+ public void dropAttributes() {
+ attributes = null;
+ }
+
+ // [NOCPP[
+ /**
+ * @see java.lang.Object#toString()
+ */
+ @Override public @Local String toString() {
+ return name;
+ }
+
+ // ]NOCPP]
+
+ public void retain() {
+ refcount++;
+ }
+
+ public void release() {
+ refcount--;
+ if (refcount == 0) {
+ Portability.delete(this);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java
new file mode 100644
index 000000000..ff89e0443
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.Auto;
+
+
+public class StateSnapshot<T> implements TreeBuilderState<T> {
+
+ private final @Auto StackNode<T>[] stack;
+
+ private final @Auto StackNode<T>[] listOfActiveFormattingElements;
+
+ private final @Auto int[] templateModeStack;
+
+ private final T formPointer;
+
+ private final T headPointer;
+
+ private final T deepTreeSurrogateParent;
+
+ private final int mode;
+
+ private final int originalMode;
+
+ private final boolean framesetOk;
+
+ private final boolean needToDropLF;
+
+ private final boolean quirks;
+
+ /**
+ * @param stack
+ * @param listOfActiveFormattingElements
+ * @param templateModeStack
+ * @param formPointer
+ * @param headPointer
+ * @param deepTreeSurrogateParent
+ * @param mode
+ * @param originalMode
+ * @param framesetOk
+ * @param needToDropLF
+ * @param quirks
+ */
+ StateSnapshot(StackNode<T>[] stack,
+ StackNode<T>[] listOfActiveFormattingElements, int[] templateModeStack, T formPointer,
+ T headPointer, T deepTreeSurrogateParent, int mode, int originalMode,
+ boolean framesetOk, boolean needToDropLF, boolean quirks) {
+ this.stack = stack;
+ this.listOfActiveFormattingElements = listOfActiveFormattingElements;
+ this.templateModeStack = templateModeStack;
+ this.formPointer = formPointer;
+ this.headPointer = headPointer;
+ this.deepTreeSurrogateParent = deepTreeSurrogateParent;
+ this.mode = mode;
+ this.originalMode = originalMode;
+ this.framesetOk = framesetOk;
+ this.needToDropLF = needToDropLF;
+ this.quirks = quirks;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack()
+ */
+ public StackNode<T>[] getStack() {
+ return stack;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack()
+ */
+ public int[] getTemplateModeStack() {
+ return templateModeStack;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements()
+ */
+ public StackNode<T>[] getListOfActiveFormattingElements() {
+ return listOfActiveFormattingElements;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer()
+ */
+ public T getFormPointer() {
+ return formPointer;
+ }
+
+ /**
+ * Returns the headPointer.
+ *
+ * @return the headPointer
+ */
+ public T getHeadPointer() {
+ return headPointer;
+ }
+
+ /**
+ * Returns the deepTreeSurrogateParent.
+ *
+ * @return the deepTreeSurrogateParent
+ */
+ public T getDeepTreeSurrogateParent() {
+ return deepTreeSurrogateParent;
+ }
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public int getMode() {
+ return mode;
+ }
+
+ /**
+ * Returns the originalMode.
+ *
+ * @return the originalMode
+ */
+ public int getOriginalMode() {
+ return originalMode;
+ }
+
+ /**
+ * Returns the framesetOk.
+ *
+ * @return the framesetOk
+ */
+ public boolean isFramesetOk() {
+ return framesetOk;
+ }
+
+ /**
+ * Returns the needToDropLF.
+ *
+ * @return the needToDropLF
+ */
+ public boolean isNeedToDropLF() {
+ return needToDropLF;
+ }
+
+ /**
+ * Returns the quirks.
+ *
+ * @return the quirks
+ */
+ public boolean isQuirks() {
+ return quirks;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength()
+ */
+ public int getListOfActiveFormattingElementsLength() {
+ return listOfActiveFormattingElements.length;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength()
+ */
+ public int getStackLength() {
+ return stack.length;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength()
+ */
+ public int getTemplateModeStackLength() {
+ return templateModeStack.length;
+ }
+
+ @SuppressWarnings("unused") private void destructor() {
+ for (int i = 0; i < stack.length; i++) {
+ stack[i].release();
+ }
+ for (int i = 0; i < listOfActiveFormattingElements.length; i++) {
+ if (listOfActiveFormattingElements[i] != null) {
+ listOfActiveFormattingElements[i].release();
+ }
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java
new file mode 100644
index 000000000..37cdb75d3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import org.xml.sax.Locator;
+
+public class TaintableLocatorImpl extends LocatorImpl {
+
+ private boolean tainted;
+
+ public TaintableLocatorImpl(Locator locator) {
+ super(locator);
+ this.tainted = false;
+ }
+
+ public void markTainted() {
+ tainted = true;
+ }
+
+ public boolean isTainted() {
+ return tainted;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
new file mode 100644
index 000000000..d9eaafeb3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
@@ -0,0 +1,7067 @@
+/*
+ * Copyright (c) 2005-2007 Henri Sivonen
+ * Copyright (c) 2007-2015 Mozilla Foundation
+ * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
+ * Foundation, and Opera Software ASA.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * The comments following this one that use the same comment syntax as this
+ * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007
+ * amended as of June 18 2008 and May 31 2010.
+ * That document came with this statement:
+ * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and
+ * Opera Software ASA. You are granted a license to use, reproduce and
+ * create derivative works of this document."
+ */
+
+package nu.validator.htmlparser.impl;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.CharacterName;
+import nu.validator.htmlparser.annotation.Const;
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.common.EncodingDeclarationHandler;
+import nu.validator.htmlparser.common.Interner;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+/**
+ * An implementation of
+ * https://html.spec.whatwg.org/multipage/syntax.html#tokenization
+ *
+ * This class implements the <code>Locator</code> interface. This is not an
+ * incidental implementation detail: Users of this class are encouraged to make
+ * use of the <code>Locator</code> nature.
+ *
+ * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
+ * can be configured to treat these conditions as fatal or to coerce the infoset
+ * to something that XML 1.0 allows.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class Tokenizer implements Locator {
+
+ private static final int DATA_AND_RCDATA_MASK = ~1;
+
+ public static final int DATA = 0;
+
+ public static final int RCDATA = 1;
+
+ public static final int SCRIPT_DATA = 2;
+
+ public static final int RAWTEXT = 3;
+
+ public static final int SCRIPT_DATA_ESCAPED = 4;
+
+ public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
+
+ public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
+
+ public static final int ATTRIBUTE_VALUE_UNQUOTED = 7;
+
+ public static final int PLAINTEXT = 8;
+
+ public static final int TAG_OPEN = 9;
+
+ public static final int CLOSE_TAG_OPEN = 10;
+
+ public static final int TAG_NAME = 11;
+
+ public static final int BEFORE_ATTRIBUTE_NAME = 12;
+
+ public static final int ATTRIBUTE_NAME = 13;
+
+ public static final int AFTER_ATTRIBUTE_NAME = 14;
+
+ public static final int BEFORE_ATTRIBUTE_VALUE = 15;
+
+ public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
+
+ public static final int BOGUS_COMMENT = 17;
+
+ public static final int MARKUP_DECLARATION_OPEN = 18;
+
+ public static final int DOCTYPE = 19;
+
+ public static final int BEFORE_DOCTYPE_NAME = 20;
+
+ public static final int DOCTYPE_NAME = 21;
+
+ public static final int AFTER_DOCTYPE_NAME = 22;
+
+ public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
+
+ public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
+
+ public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
+
+ public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
+
+ public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
+
+ public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
+
+ public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
+
+ public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
+
+ public static final int BOGUS_DOCTYPE = 31;
+
+ public static final int COMMENT_START = 32;
+
+ public static final int COMMENT_START_DASH = 33;
+
+ public static final int COMMENT = 34;
+
+ public static final int COMMENT_END_DASH = 35;
+
+ public static final int COMMENT_END = 36;
+
+ public static final int COMMENT_END_BANG = 37;
+
+ public static final int NON_DATA_END_TAG_NAME = 38;
+
+ public static final int MARKUP_DECLARATION_HYPHEN = 39;
+
+ public static final int MARKUP_DECLARATION_OCTYPE = 40;
+
+ public static final int DOCTYPE_UBLIC = 41;
+
+ public static final int DOCTYPE_YSTEM = 42;
+
+ public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
+
+ public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
+
+ public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
+
+ public static final int CONSUME_CHARACTER_REFERENCE = 46;
+
+ public static final int CONSUME_NCR = 47;
+
+ public static final int CHARACTER_REFERENCE_TAIL = 48;
+
+ public static final int HEX_NCR_LOOP = 49;
+
+ public static final int DECIMAL_NRC_LOOP = 50;
+
+ public static final int HANDLE_NCR_VALUE = 51;
+
+ public static final int HANDLE_NCR_VALUE_RECONSUME = 52;
+
+ public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53;
+
+ public static final int SELF_CLOSING_START_TAG = 54;
+
+ public static final int CDATA_START = 55;
+
+ public static final int CDATA_SECTION = 56;
+
+ public static final int CDATA_RSQB = 57;
+
+ public static final int CDATA_RSQB_RSQB = 58;
+
+ public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59;
+
+ public static final int SCRIPT_DATA_ESCAPE_START = 60;
+
+ public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61;
+
+ public static final int SCRIPT_DATA_ESCAPED_DASH = 62;
+
+ public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
+
+ public static final int BOGUS_COMMENT_HYPHEN = 64;
+
+ public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
+
+ public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
+
+ public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
+
+ public static final int PROCESSING_INSTRUCTION = 73;
+
+ public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
+
+ /**
+ * Magic value for UTF-16 operations.
+ */
+ private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
+
+ /**
+ * UTF-16 code unit array containing less than and greater than for emitting
+ * those characters on certain parse errors.
+ */
+ private static final @NoLength char[] LT_GT = { '<', '>' };
+
+ /**
+ * UTF-16 code unit array containing less than and solidus for emitting
+ * those characters on certain parse errors.
+ */
+ private static final @NoLength char[] LT_SOLIDUS = { '<', '/' };
+
+ /**
+ * UTF-16 code unit array containing ]] for emitting those characters on
+ * state transitions.
+ */
+ private static final @NoLength char[] RSQB_RSQB = { ']', ']' };
+
+ /**
+ * Array version of U+FFFD.
+ */
+ private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+
+ // [NOCPP[
+
+ /**
+ * Array version of space.
+ */
+ private static final @NoLength char[] SPACE = { ' ' };
+
+ // ]NOCPP]
+
+ /**
+ * Array version of line feed.
+ */
+ private static final @NoLength char[] LF = { '\n' };
+
+ /**
+ * "CDATA[" as <code>char[]</code>
+ */
+ private static final @NoLength char[] CDATA_LSQB = { 'C', 'D', 'A', 'T',
+ 'A', '[' };
+
+ /**
+ * "octype" as <code>char[]</code>
+ */
+ private static final @NoLength char[] OCTYPE = { 'o', 'c', 't', 'y', 'p',
+ 'e' };
+
+ /**
+ * "ublic" as <code>char[]</code>
+ */
+ private static final @NoLength char[] UBLIC = { 'u', 'b', 'l', 'i', 'c' };
+
+ /**
+ * "ystem" as <code>char[]</code>
+ */
+ private static final @NoLength char[] YSTEM = { 'y', 's', 't', 'e', 'm' };
+
+ private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' };
+
+ private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' };
+
+ private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' };
+
+ private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't',
+ 'e', 'x', 't' };
+
+ private static final char[] XMP_ARR = { 'x', 'm', 'p' };
+
+ private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r',
+ 'e', 'a' };
+
+ private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' };
+
+ private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e',
+ 'd' };
+
+ private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i',
+ 'p', 't' };
+
+ private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm',
+ 'e', 's' };
+
+ /**
+ * The token handler.
+ */
+ protected final TokenHandler tokenHandler;
+
+ protected EncodingDeclarationHandler encodingDeclarationHandler;
+
+ // [NOCPP[
+
+ /**
+ * The error handler.
+ */
+ protected ErrorHandler errorHandler;
+
+ // ]NOCPP]
+
+ /**
+ * Whether the previous char read was CR.
+ */
+ protected boolean lastCR;
+
+ protected int stateSave;
+
+ private int returnStateSave;
+
+ protected int index;
+
+ private boolean forceQuirks;
+
+ private char additional;
+
+ private int entCol;
+
+ private int firstCharKey;
+
+ private int lo;
+
+ private int hi;
+
+ private int candidate;
+
+ private int charRefBufMark;
+
+ protected int value;
+
+ private boolean seenDigits;
+
+ protected int cstart;
+
+ /**
+ * The SAX public id for the resource being tokenized. (Only passed to back
+ * as part of locator data.)
+ */
+ private String publicId;
+
+ /**
+ * The SAX system id for the resource being tokenized. (Only passed to back
+ * as part of locator data.)
+ */
+ private String systemId;
+
+ /**
+ * Buffer for bufferable things other than those that fit the description
+ * of <code>charRefBuf</code>.
+ */
+ private @Auto char[] strBuf;
+
+ /**
+ * Number of significant <code>char</code>s in <code>strBuf</code>.
+ */
+ private int strBufLen;
+
+ /**
+ * Buffer for characters that might form a character reference but may
+ * end up not forming one.
+ */
+ private final @Auto char[] charRefBuf;
+
+ /**
+ * Number of significant <code>char</code>s in <code>charRefBuf</code>.
+ */
+ private int charRefBufLen;
+
+ /**
+ * Buffer for expanding NCRs falling into the Basic Multilingual Plane.
+ */
+ private final @Auto char[] bmpChar;
+
+ /**
+ * Buffer for expanding astral NCRs.
+ */
+ private final @Auto char[] astralChar;
+
+ /**
+ * The element whose end tag closes the current CDATA or RCDATA element.
+ */
+ protected ElementName endTagExpectation = null;
+
+ private char[] endTagExpectationAsArray; // not @Auto!
+
+ /**
+ * <code>true</code> if tokenizing an end tag
+ */
+ protected boolean endTag;
+
+ /**
+ * The current tag token name.
+ */
+ private ElementName tagName = null;
+
+ /**
+ * The current attribute name.
+ */
+ protected AttributeName attributeName = null;
+
+ // [NOCPP[
+
+ /**
+ * Whether comment tokens are emitted.
+ */
+ private boolean wantsComments = false;
+
+ /**
+ * <code>true</code> when HTML4-specific additional errors are requested.
+ */
+ protected boolean html4;
+
+ /**
+ * Whether the stream is past the first 1024 bytes.
+ */
+ private boolean metaBoundaryPassed;
+
+ // ]NOCPP]
+
+ /**
+ * The name of the current doctype token.
+ */
+ private @Local String doctypeName;
+
+ /**
+ * The public id of the current doctype token.
+ */
+ private String publicIdentifier;
+
+ /**
+ * The system id of the current doctype token.
+ */
+ private String systemIdentifier;
+
+ /**
+ * The attribute holder.
+ */
+ private HtmlAttributes attributes;
+
+ // [NOCPP[
+
+ /**
+ * The policy for vertical tab and form feed.
+ */
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ /**
+ * The policy for comments.
+ */
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata;
+
+ private int mappingLangToXmlLang;
+
+ // ]NOCPP]
+
+ private final boolean newAttributesEachTime;
+
+ private boolean shouldSuspend;
+
+ protected boolean confident;
+
+ private int line;
+
+ /*
+ * The line number of the current attribute. First set to the line of the
+ * attribute name and if there is a value, set to the line the value
+ * started on.
+ */
+ // CPPONLY: private int attributeLine;
+
+ private Interner interner;
+
+ // CPPONLY: private boolean viewingXmlSource;
+
+ // [NOCPP[
+
+ protected LocatorImpl ampersandLocation;
+
+ public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) {
+ this.tokenHandler = tokenHandler;
+ this.encodingDeclarationHandler = null;
+ this.newAttributesEachTime = newAttributesEachTime;
+ // &CounterClockwiseContourIntegral; is the longest valid char ref and
+ // the semicolon never gets appended to the buffer.
+ this.charRefBuf = new char[32];
+ this.bmpChar = new char[1];
+ this.astralChar = new char[2];
+ this.tagName = null;
+ this.attributeName = null;
+ this.doctypeName = null;
+ this.publicIdentifier = null;
+ this.systemIdentifier = null;
+ this.attributes = null;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * The constructor.
+ *
+ * @param tokenHandler
+ * the handler for receiving tokens
+ */
+ public Tokenizer(TokenHandler tokenHandler
+ // CPPONLY: , boolean viewingXmlSource
+ ) {
+ this.tokenHandler = tokenHandler;
+ this.encodingDeclarationHandler = null;
+ // [NOCPP[
+ this.newAttributesEachTime = false;
+ // ]NOCPP]
+ // &CounterClockwiseContourIntegral; is the longest valid char ref and
+ // the semicolon never gets appended to the buffer.
+ this.charRefBuf = new char[32];
+ this.bmpChar = new char[1];
+ this.astralChar = new char[2];
+ this.tagName = null;
+ this.attributeName = null;
+ this.doctypeName = null;
+ this.publicIdentifier = null;
+ this.systemIdentifier = null;
+ // [NOCPP[
+ this.attributes = null;
+ // ]NOCPP]
+ // CPPONLY: this.attributes = tokenHandler.HasBuilder() ? new HtmlAttributes(mappingLangToXmlLang) : null;
+ // CPPONLY: this.newAttributesEachTime = !tokenHandler.HasBuilder();
+ // CPPONLY: this.viewingXmlSource = viewingXmlSource;
+ }
+
+ public void setInterner(Interner interner) {
+ this.interner = interner;
+ }
+
+ public void initLocation(String newPublicId, String newSystemId) {
+ this.systemId = newSystemId;
+ this.publicId = newPublicId;
+
+ }
+
+ // CPPONLY: boolean isViewingXmlSource() {
+ // CPPONLY: return viewingXmlSource;
+ // CPPONLY: }
+
+ // [NOCPP[
+
+ /**
+ * Returns the mappingLangToXmlLang.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang == AttributeName.HTML_LANG;
+ }
+
+ /**
+ * Sets the mappingLangToXmlLang.
+ *
+ * @param mappingLangToXmlLang
+ * the mappingLangToXmlLang to set
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG
+ : AttributeName.HTML;
+ }
+
+ /**
+ * Sets the error handler.
+ *
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler eh) {
+ this.errorHandler = eh;
+ }
+
+ public ErrorHandler getErrorHandler() {
+ return this.errorHandler;
+ }
+
+ /**
+ * Sets the commentPolicy.
+ *
+ * @param commentPolicy
+ * the commentPolicy to set
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ }
+
+ /**
+ * Sets the contentNonXmlCharPolicy.
+ *
+ * @param contentNonXmlCharPolicy
+ * the contentNonXmlCharPolicy to set
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) {
+ throw new IllegalArgumentException(
+ "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
+ }
+ }
+
+ /**
+ * Sets the contentSpacePolicy.
+ *
+ * @param contentSpacePolicy
+ * the contentSpacePolicy to set
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ }
+
+ /**
+ * Sets the xmlnsPolicy.
+ *
+ * @param xmlnsPolicy
+ * the xmlnsPolicy to set
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ }
+
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ }
+
+ /**
+ * Sets the html4ModeCompatibleWithXhtml1Schemata.
+ *
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ * the html4ModeCompatibleWithXhtml1Schemata to set
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ // ]NOCPP]
+
+ // For the token handler to call
+ /**
+ * Sets the tokenizer state and the associated element name. This should
+ * only ever used to put the tokenizer into one of the states that have
+ * a special end tag expectation.
+ *
+ * @param specialTokenizerState
+ * the tokenizer state to set
+ * @param endTagExpectation
+ * the expected end tag for transitioning back to normal
+ */
+ public void setStateAndEndTagExpectation(int specialTokenizerState,
+ @Local String endTagExpectation) {
+ this.stateSave = specialTokenizerState;
+ if (specialTokenizerState == Tokenizer.DATA) {
+ return;
+ }
+ @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation);
+ this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0,
+ asArray.length, interner);
+ endTagExpectationToArray();
+ }
+
+ /**
+ * Sets the tokenizer state and the associated element name. This should
+ * only ever used to put the tokenizer into one of the states that have
+ * a special end tag expectation.
+ *
+ * @param specialTokenizerState
+ * the tokenizer state to set
+ * @param endTagExpectation
+ * the expected end tag for transitioning back to normal
+ */
+ public void setStateAndEndTagExpectation(int specialTokenizerState,
+ ElementName endTagExpectation) {
+ this.stateSave = specialTokenizerState;
+ this.endTagExpectation = endTagExpectation;
+ endTagExpectationToArray();
+ }
+
+ private void endTagExpectationToArray() {
+ switch (endTagExpectation.getGroup()) {
+ case TreeBuilder.TITLE:
+ endTagExpectationAsArray = TITLE_ARR;
+ return;
+ case TreeBuilder.SCRIPT:
+ endTagExpectationAsArray = SCRIPT_ARR;
+ return;
+ case TreeBuilder.STYLE:
+ endTagExpectationAsArray = STYLE_ARR;
+ return;
+ case TreeBuilder.PLAINTEXT:
+ endTagExpectationAsArray = PLAINTEXT_ARR;
+ return;
+ case TreeBuilder.XMP:
+ endTagExpectationAsArray = XMP_ARR;
+ return;
+ case TreeBuilder.TEXTAREA:
+ endTagExpectationAsArray = TEXTAREA_ARR;
+ return;
+ case TreeBuilder.IFRAME:
+ endTagExpectationAsArray = IFRAME_ARR;
+ return;
+ case TreeBuilder.NOEMBED:
+ endTagExpectationAsArray = NOEMBED_ARR;
+ return;
+ case TreeBuilder.NOSCRIPT:
+ endTagExpectationAsArray = NOSCRIPT_ARR;
+ return;
+ case TreeBuilder.NOFRAMES:
+ endTagExpectationAsArray = NOFRAMES_ARR;
+ return;
+ default:
+ assert false: "Bad end tag expectation.";
+ return;
+ }
+ }
+
+ /**
+ * For C++ use only.
+ */
+ public void setLineNumber(int line) {
+ // CPPONLY: this.attributeLine = line; // XXX is this needed?
+ this.line = line;
+ }
+
+ // start Locator impl
+
+ /**
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ @Inline public int getLineNumber() {
+ return line;
+ }
+
+ // [NOCPP[
+
+ /**
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ @Inline public int getColumnNumber() {
+ return -1;
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ // end Locator impl
+
+ // end public API
+
+ public void notifyAboutMetaBoundary() {
+ metaBoundaryPassed = true;
+ }
+
+ void turnOnAdditionalHtml4Errors() {
+ html4 = true;
+ }
+
+ // ]NOCPP]
+
+ HtmlAttributes emptyAttributes() {
+ // [NOCPP[
+ if (newAttributesEachTime) {
+ return new HtmlAttributes(mappingLangToXmlLang);
+ } else {
+ // ]NOCPP]
+ return HtmlAttributes.EMPTY_ATTRIBUTES;
+ // [NOCPP[
+ }
+ // ]NOCPP]
+ }
+
+ @Inline private void appendCharRefBuf(char c) {
+ // CPPONLY: assert charRefBufLen < charRefBuf.length:
+ // CPPONLY: "RELEASE: Attempted to overrun charRefBuf!";
+ charRefBuf[charRefBufLen++] = c;
+ }
+
+ private void emitOrAppendCharRefBuf(int returnState) throws SAXException {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendCharRefBufToStrBuf();
+ } else {
+ if (charRefBufLen > 0) {
+ tokenHandler.characters(charRefBuf, 0, charRefBufLen);
+ charRefBufLen = 0;
+ }
+ }
+ }
+
+ @Inline private void clearStrBufAfterUse() {
+ strBufLen = 0;
+ }
+
+ @Inline private void clearStrBufBeforeUse() {
+ assert strBufLen == 0: "strBufLen not reset after previous use!";
+ strBufLen = 0; // no-op in the absence of bugs
+ }
+
+ @Inline private void clearStrBufAfterOneHyphen() {
+ assert strBufLen == 1: "strBufLen length not one!";
+ assert strBuf[0] == '-': "strBuf does not start with a hyphen!";
+ strBufLen = 0;
+ }
+
+ /**
+ * Appends to the buffer.
+ *
+ * @param c
+ * the UTF-16 code unit to append
+ */
+ @Inline private void appendStrBuf(char c) {
+ // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient.";
+ // CPPONLY: if (strBufLen == strBuf.length) {
+ // CPPONLY: if (!EnsureBufferSpace(1)) {
+ // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure";
+ // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not
+ // CPPONLY: }
+ strBuf[strBufLen++] = c;
+ }
+
+ /**
+ * The buffer as a String. Currently only used for error reporting.
+ *
+ * <p>
+ * C++ memory note: The return value must be released.
+ *
+ * @return the buffer as a string
+ */
+ protected String strBufToString() {
+ String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , tokenHandler
+ );
+ clearStrBufAfterUse();
+ return str;
+ }
+
+ /**
+ * Returns the buffer as a local name. The return value is released in
+ * emitDoctypeToken().
+ *
+ * @return the buffer as local name
+ */
+ private void strBufToDoctypeName() {
+ doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
+ interner);
+ clearStrBufAfterUse();
+ }
+
+ /**
+ * Emits the buffer as character tokens.
+ *
+ * @throws SAXException
+ * if the token handler threw
+ */
+ private void emitStrBuf() throws SAXException {
+ if (strBufLen > 0) {
+ tokenHandler.characters(strBuf, 0, strBufLen);
+ clearStrBufAfterUse();
+ }
+ }
+
+ @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
+ // [NOCPP[
+ switch (commentPolicy) {
+ case ALTER_INFOSET:
+ appendStrBuf(' ');
+ // FALLTHROUGH
+ case ALLOW:
+ warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ // ]NOCPP]
+ appendStrBuf('-');
+ // [NOCPP[
+ break;
+ case FATAL:
+ fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ break;
+ }
+ // ]NOCPP]
+ }
+
+ // [NOCPP[
+ private void maybeAppendSpaceToBogusComment() throws SAXException {
+ switch (commentPolicy) {
+ case ALTER_INFOSET:
+ appendStrBuf(' ');
+ // FALLTHROUGH
+ case ALLOW:
+ warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
+ break;
+ case FATAL:
+ fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
+ break;
+ }
+ }
+
+ // ]NOCPP]
+
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
+ throws SAXException {
+ errConsecutiveHyphens();
+ // [NOCPP[
+ switch (commentPolicy) {
+ case ALTER_INFOSET:
+ strBufLen--;
+ // WARNING!!! This expands the worst case of the buffer length
+ // given the length of input!
+ appendStrBuf(' ');
+ appendStrBuf('-');
+ // FALLTHROUGH
+ case ALLOW:
+ warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ // ]NOCPP]
+ appendStrBuf(c);
+ // [NOCPP[
+ break;
+ case FATAL:
+ fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
+ break;
+ }
+ // ]NOCPP]
+ }
+
+ private void appendStrBuf(@NoLength char[] buffer, int offset, int length) {
+ int newLen = strBufLen + length;
+ // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient.";
+ // CPPONLY: if (strBuf.length < newLen) {
+ // CPPONLY: if (!EnsureBufferSpace(length)) {
+ // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure";
+ // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not
+ // CPPONLY: }
+ System.arraycopy(buffer, offset, strBuf, strBufLen, length);
+ strBufLen = newLen;
+ }
+
+ /**
+ * Append the contents of the char reference buffer to the main one.
+ */
+ @Inline private void appendCharRefBufToStrBuf() {
+ appendStrBuf(charRefBuf, 0, charRefBufLen);
+ charRefBufLen = 0;
+ }
+
+ /**
+ * Emits the current comment token.
+ *
+ * @param pos
+ * TODO
+ *
+ * @throws SAXException
+ */
+ private void emitComment(int provisionalHyphens, int pos)
+ throws SAXException {
+ // [NOCPP[
+ if (wantsComments) {
+ // ]NOCPP]
+ tokenHandler.comment(strBuf, 0, strBufLen
+ - provisionalHyphens);
+ // [NOCPP[
+ }
+ // ]NOCPP]
+ clearStrBufAfterUse();
+ cstart = pos + 1;
+ }
+
+ /**
+ * Flushes coalesced character tokens.
+ *
+ * @param buf
+ * TODO
+ * @param pos
+ * TODO
+ *
+ * @throws SAXException
+ */
+ protected void flushChars(@NoLength char[] buf, int pos)
+ throws SAXException {
+ if (pos > cstart) {
+ tokenHandler.characters(buf, cstart, pos - cstart);
+ }
+ cstart = Integer.MAX_VALUE;
+ }
+
+ /**
+ * Reports an condition that would make the infoset incompatible with XML
+ * 1.0 as fatal.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ * @throws SAXParseException
+ */
+ public void fatal(String message) throws SAXException {
+ SAXParseException spe = new SAXParseException(message, this);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ /**
+ * Reports a Parse Error.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ public void err(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.error(spe);
+ }
+
+ public void errTreeBuilder(String message) throws SAXException {
+ ErrorHandler eh = null;
+ if (tokenHandler instanceof TreeBuilder<?>) {
+ TreeBuilder<?> treeBuilder = (TreeBuilder<?>) tokenHandler;
+ eh = treeBuilder.getErrorHandler();
+ }
+ if (eh == null) {
+ eh = errorHandler;
+ }
+ if (eh == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, this);
+ eh.error(spe);
+ }
+
+ /**
+ * Reports a warning
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ public void warn(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.warning(spe);
+ }
+
+ private void strBufToElementNameString() {
+ tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
+ interner);
+ clearStrBufAfterUse();
+ }
+
+ private int emitCurrentTagToken(boolean selfClosing, int pos)
+ throws SAXException {
+ cstart = pos + 1;
+ maybeErrSlashInEndTag(selfClosing);
+ stateSave = Tokenizer.DATA;
+ HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
+ : attributes);
+ if (endTag) {
+ /*
+ * When an end tag token is emitted, the content model flag must be
+ * switched to the PCDATA state.
+ */
+ maybeErrAttributesOnEndTag(attrs);
+ // CPPONLY: if (!viewingXmlSource) {
+ tokenHandler.endTag(tagName);
+ // CPPONLY: }
+ // CPPONLY: if (newAttributesEachTime) {
+ // CPPONLY: Portability.delete(attributes);
+ // CPPONLY: attributes = null;
+ // CPPONLY: }
+ } else {
+ // CPPONLY: if (viewingXmlSource) {
+ // CPPONLY: assert newAttributesEachTime;
+ // CPPONLY: Portability.delete(attributes);
+ // CPPONLY: attributes = null;
+ // CPPONLY: } else {
+ tokenHandler.startTag(tagName, attrs, selfClosing);
+ // CPPONLY: }
+ }
+ tagName.release();
+ tagName = null;
+ if (newAttributesEachTime) {
+ attributes = null;
+ } else {
+ attributes.clear(mappingLangToXmlLang);
+ }
+ /*
+ * The token handler may have called setStateAndEndTagExpectation
+ * and changed stateSave since the start of this method.
+ */
+ return stateSave;
+ }
+
+ private void attributeNameComplete() throws SAXException {
+ attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen
+ // [NOCPP[
+ , namePolicy != XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ , interner);
+ clearStrBufAfterUse();
+
+ if (attributes == null) {
+ attributes = new HtmlAttributes(mappingLangToXmlLang);
+ }
+
+ /*
+ * When the user agent leaves the attribute name state (and before
+ * emitting the tag token, if appropriate), the complete attribute's
+ * name must be compared to the other attributes on the same token; if
+ * there is already an attribute on the token with the exact same name,
+ * then this is a parse error and the new attribute must be dropped,
+ * along with the value that gets associated with it (if any).
+ */
+ if (attributes.contains(attributeName)) {
+ errDuplicateAttribute();
+ attributeName.release();
+ attributeName = null;
+ }
+ }
+
+ private void addAttributeWithoutValue() throws SAXException {
+ noteAttributeWithoutValue();
+
+ // [NOCPP[
+ if (metaBoundaryPassed && AttributeName.CHARSET == attributeName
+ && ElementName.META == tagName) {
+ err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
+ }
+ // ]NOCPP]
+ if (attributeName != null) {
+ // [NOCPP[
+ if (html4) {
+ if (attributeName.isBoolean()) {
+ if (html4ModeCompatibleWithXhtml1Schemata) {
+ attributes.addAttribute(attributeName,
+ attributeName.getLocal(AttributeName.HTML),
+ xmlnsPolicy);
+ } else {
+ attributes.addAttribute(attributeName, "", xmlnsPolicy);
+ }
+ } else {
+ if (AttributeName.BORDER != attributeName) {
+ err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
+ attributes.addAttribute(attributeName, "", xmlnsPolicy);
+ }
+ }
+ } else {
+ if (AttributeName.SRC == attributeName
+ || AttributeName.HREF == attributeName) {
+ warn("Attribute \u201C"
+ + attributeName.getLocal(AttributeName.HTML)
+ + "\u201D without an explicit value seen. The attribute may be dropped by IE7.");
+ }
+ // ]NOCPP]
+ attributes.addAttribute(attributeName,
+ Portability.newEmptyString()
+ // [NOCPP[
+ , xmlnsPolicy
+ // ]NOCPP]
+ // CPPONLY: , attributeLine
+ );
+ // [NOCPP[
+ }
+ // ]NOCPP]
+ attributeName = null; // attributeName has been adopted by the
+ // |attributes| object
+ } else {
+ clearStrBufAfterUse();
+ }
+ }
+
+ private void addAttributeWithValue() throws SAXException {
+ // [NOCPP[
+ if (metaBoundaryPassed && ElementName.META == tagName
+ && AttributeName.CHARSET == attributeName) {
+ err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
+ }
+ // ]NOCPP]
+ if (attributeName != null) {
+ String val = strBufToString(); // Ownership transferred to
+ // HtmlAttributes
+ // CPPONLY: if (mViewSource) {
+ // CPPONLY: mViewSource.MaybeLinkifyAttributeValue(attributeName, val);
+ // CPPONLY: }
+ // [NOCPP[
+ if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
+ && attributeName.isCaseFolded()) {
+ val = newAsciiLowerCaseStringFromString(val);
+ }
+ // ]NOCPP]
+ attributes.addAttribute(attributeName, val
+ // [NOCPP[
+ , xmlnsPolicy
+ // ]NOCPP]
+ // CPPONLY: , attributeLine
+ );
+ attributeName = null; // attributeName has been adopted by the
+ // |attributes| object
+ } else {
+ // We have a duplicate attribute. Explicitly discard its value.
+ clearStrBufAfterUse();
+ }
+ }
+
+ // [NOCPP[
+
+ private static String newAsciiLowerCaseStringFromString(String str) {
+ if (str == null) {
+ return null;
+ }
+ char[] buf = new char[str.length()];
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ buf[i] = c;
+ }
+ return new String(buf);
+ }
+
+ protected void startErrorReporting() throws SAXException {
+
+ }
+
+ // ]NOCPP]
+
+ public void start() throws SAXException {
+ initializeWithoutStarting();
+ tokenHandler.startTokenization(this);
+ // [NOCPP[
+ startErrorReporting();
+ // ]NOCPP]
+ }
+
+ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
+ int state = stateSave;
+ int returnState = returnStateSave;
+ char c = '\u0000';
+ shouldSuspend = false;
+ lastCR = false;
+
+ int start = buffer.getStart();
+ int end = buffer.getEnd();
+
+ // In C++, the caller of tokenizeBuffer needs to do this explicitly.
+ // [NOCPP[
+ ensureBufferSpace(end - start);
+ // ]NOCPP]
+
+ /**
+ * The index of the last <code>char</code> read from <code>buf</code>.
+ */
+ int pos = start - 1;
+
+ /**
+ * The index of the first <code>char</code> in <code>buf</code> that is
+ * part of a coalesced run of character tokens or
+ * <code>Integer.MAX_VALUE</code> if there is not a current run being
+ * coalesced.
+ */
+ switch (state) {
+ case DATA:
+ case RCDATA:
+ case SCRIPT_DATA:
+ case PLAINTEXT:
+ case RAWTEXT:
+ case CDATA_SECTION:
+ case SCRIPT_DATA_ESCAPED:
+ case SCRIPT_DATA_ESCAPE_START:
+ case SCRIPT_DATA_ESCAPE_START_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START:
+ case SCRIPT_DATA_DOUBLE_ESCAPED:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END:
+ cstart = start;
+ break;
+ default:
+ cstart = Integer.MAX_VALUE;
+ break;
+ }
+
+ /**
+ * The number of <code>char</code>s in <code>buf</code> that have
+ * meaning. (The rest of the array is garbage and should not be
+ * examined.)
+ */
+ // CPPONLY: if (mViewSource) {
+ // CPPONLY: mViewSource.SetBuffer(buffer);
+ // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
+ // CPPONLY: } else {
+ // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: }
+ // [NOCPP[
+ pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
+ end);
+ // ]NOCPP]
+ if (pos == end) {
+ // exiting due to end of buffer
+ buffer.setStart(pos);
+ } else {
+ buffer.setStart(pos + 1);
+ }
+ return lastCR;
+ }
+
+ // [NOCPP[
+ private void ensureBufferSpace(int inputLength) throws SAXException {
+ // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
+ // Adding to the general worst case instead of only the
+ // TreeBuilder-exposed worst case to avoid re-introducing a bug when
+ // unifying the tokenizer and tree builder buffers in the future.
+ int worstCase = strBufLen + inputLength + charRefBufLen + 2;
+ tokenHandler.ensureBufferSpace(worstCase);
+ if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) {
+ // When altering infoset, if the comment contents are consecutive
+ // hyphens, each hyphen generates a space, too. These buffer
+ // contents never get emitted as characters() to the tokenHandler,
+ // which is why this calculation happens after the call to
+ // ensureBufferSpace on tokenHandler.
+ worstCase *= 2;
+ }
+ if (strBuf == null) {
+ // Add an arbitrary small value to avoid immediate reallocation
+ // once there are a few characters in the buffer.
+ strBuf = new char[worstCase + 128];
+ } else if (worstCase > strBuf.length) {
+ // HotSpot reportedly allocates memory with 8-byte accuracy, so
+ // there's no point in trying to do math here to avoid slop.
+ // Maybe we should add some small constant to worstCase here
+ // but not doing that without profiling. In C++ with jemalloc,
+ // the corresponding method should do math to round up here
+ // to avoid slop.
+ char[] newBuf = new char[worstCase];
+ System.arraycopy(strBuf, 0, newBuf, 0, strBufLen);
+ strBuf = newBuf;
+ }
+ }
+ // ]NOCPP]
+
+ @SuppressWarnings("unused") private int stateLoop(int state, char c,
+ int pos, @NoLength char[] buf, boolean reconsume, int returnState,
+ int endPos) throws SAXException {
+ /*
+ * Idioms used in this code:
+ *
+ *
+ * Consuming the next input character
+ *
+ * To consume the next input character, the code does this: if (++pos ==
+ * endPos) { break stateloop; } c = checkChar(buf, pos);
+ *
+ *
+ * Staying in a state
+ *
+ * When there's a state that the tokenizer may stay in over multiple
+ * input characters, the state has a wrapper |for(;;)| loop and staying
+ * in the state continues the loop.
+ *
+ *
+ * Switching to another state
+ *
+ * To switch to another state, the code sets the state variable to the
+ * magic number of the new state. Then it either continues stateloop or
+ * breaks out of the state's own wrapper loop if the target state is
+ * right after the current state in source order. (This is a partial
+ * workaround for Java's lack of goto.)
+ *
+ *
+ * Reconsume support
+ *
+ * The spec sometimes says that an input character is reconsumed in
+ * another state. If a state can ever be entered so that an input
+ * character can be reconsumed in it, the state's code starts with an
+ * |if (reconsume)| that sets reconsume to false and skips over the
+ * normal code for consuming a new character.
+ *
+ * To reconsume the current character in another state, the code sets
+ * |reconsume| to true and then switches to the other state.
+ *
+ *
+ * Emitting character tokens
+ *
+ * This method emits character tokens lazily. Whenever a new range of
+ * character tokens starts, the field cstart must be set to the start
+ * index of the range. The flushChars() method must be called at the end
+ * of a range to flush it.
+ *
+ *
+ * U+0000 handling
+ *
+ * The various states have to handle the replacement of U+0000 with
+ * U+FFFD. However, if U+0000 would be reconsumed in another state, the
+ * replacement doesn't need to happen, because it's handled by the
+ * reconsuming state.
+ *
+ *
+ * LF handling
+ *
+ * Every state needs to increment the line number upon LF unless the LF
+ * gets reconsumed by another state which increments the line number.
+ *
+ *
+ * CR handling
+ *
+ * Every state needs to handle CR unless the CR gets reconsumed and is
+ * handled by the reconsuming state. The CR needs to be handled as if it
+ * were and LF, the lastCR field must be set to true and then this
+ * method must return. The IO driver will then swallow the next
+ * character if it is an LF to coalesce CRLF.
+ */
+ stateloop: for (;;) {
+ switch (state) {
+ case DATA:
+ dataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in data state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the tag
+ * open state.
+ */
+ flushChars(buf, pos);
+
+ state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
+ break dataloop; // FALL THROUGH continue
+ // stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the input character as a
+ * character token.
+ *
+ * Stay in the data state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case TAG_OPEN:
+ tagopenloop: for (;;) {
+ /*
+ * The behavior of this state depends on the content
+ * model flag.
+ */
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * If the content model flag is set to the PCDATA state
+ * Consume the next input character:
+ */
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to U+005A
+ * LATIN CAPITAL LETTER Z Create a new start tag
+ * token,
+ */
+ endTag = false;
+ /*
+ * set its tag name to the lowercase version of the
+ * input character (add 0x0020 to the character's
+ * code point),
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf((char) (c + 0x20));
+ /* then switch to the tag name state. */
+ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
+ /*
+ * (Don't emit the token yet; further details will
+ * be filled in before it is emitted.)
+ */
+ break tagopenloop;
+ // continue stateloop;
+ } else if (c >= 'a' && c <= 'z') {
+ /*
+ * U+0061 LATIN SMALL LETTER A through to U+007A
+ * LATIN SMALL LETTER Z Create a new start tag
+ * token,
+ */
+ endTag = false;
+ /*
+ * set its tag name to the input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /* then switch to the tag name state. */
+ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
+ /*
+ * (Don't emit the token yet; further details will
+ * be filled in before it is emitted.)
+ */
+ break tagopenloop;
+ // continue stateloop;
+ }
+ switch (c) {
+ case '!':
+ /*
+ * U+0021 EXCLAMATION MARK (!) Switch to the
+ * markup declaration open state.
+ */
+ state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the close tag
+ * open state.
+ */
+ state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
+ continue stateloop;
+ case '?':
+ // CPPONLY: if (viewingXmlSource) {
+ // CPPONLY: state = transition(state,
+ // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
+ // CPPONLY: reconsume,
+ // CPPONLY: pos);
+ // CPPONLY: continue stateloop;
+ // CPPONLY: }
+ /*
+ * U+003F QUESTION MARK (?) Parse error.
+ */
+ errProcessingInstruction();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errLtGt();
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ * and a U+003E GREATER-THAN SIGN character
+ * token.
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 2);
+ /* Switch to the data state. */
+ cstart = pos + 1;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Anything else Parse error.
+ */
+ errBadCharAfterLt(c);
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in
+ * the data state.
+ */
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALL THROUGH DON'T REORDER
+ case TAG_NAME:
+ tagnameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ strBufToElementNameString();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before attribute name state.
+ */
+ strBufToElementNameString();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break tagnameloop;
+ // continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ strBufToElementNameString();
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ strBufToElementNameString();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Append the
+ * lowercase version of the current input
+ * character (add 0x0020 to the character's
+ * code point) to the current tag token's
+ * tag name.
+ */
+ c += 0x20;
+ }
+ /*
+ * Anything else Append the current input
+ * character to the current tag token's tag
+ * name.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the tag name state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_NAME:
+ beforeattributenameloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before attribute name state.
+ */
+ continue;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '\"':
+ case '\'':
+ case '<':
+ case '=':
+ /*
+ * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
+ * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
+ * SIGN (=) Parse error.
+ */
+ errBadCharBeforeAttributeNameOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ /*
+ * Anything else Start a new attribute in the
+ * current tag token.
+ */
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Set that
+ * attribute's name to the lowercase version
+ * of the current input character (add
+ * 0x0020 to the character's code point)
+ */
+ c += 0x20;
+ }
+ // CPPONLY: attributeLine = line;
+ /*
+ * Set that attribute's name to the current
+ * input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * and its value to the empty string.
+ */
+ // Will do later.
+ /*
+ * Switch to the attribute name state.
+ */
+ state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
+ break beforeattributenameloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_NAME:
+ attributenameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ attributeNameComplete();
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the after attribute name state.
+ */
+ attributeNameComplete();
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '=':
+ /*
+ * U+003D EQUALS SIGN (=) Switch to the before
+ * attribute value state.
+ */
+ attributeNameComplete();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
+ break attributenameloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '\"':
+ case '\'':
+ case '<':
+ /*
+ * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
+ * (') U+003C LESS-THAN SIGN (<) Parse error.
+ */
+ errQuoteOrLtInAttributeNameOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Append the
+ * lowercase version of the current input
+ * character (add 0x0020 to the character's
+ * code point) to the current attribute's
+ * name.
+ */
+ c += 0x20;
+ }
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's name.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute name state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_VALUE:
+ beforeattributevalueloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before attribute value state.
+ */
+ continue;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the
+ * attribute value (double-quoted) state.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
+ break beforeattributevalueloop;
+ // continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the attribute
+ * value (unquoted) state and reconsume this
+ * input character.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
+ noteUnquotedAttributeValue();
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the attribute
+ * value (single-quoted) state.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errAttributeValueMissing();
+ /*
+ * Emit the current tag token.
+ */
+ addAttributeWithoutValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '<':
+ case '=':
+ case '`':
+ /*
+ * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
+ * (=) U+0060 GRAVE ACCENT (`)
+ */
+ errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ // [NOCPP[
+ errHtml4NonNameInUnquotedAttribute(c);
+ // ]NOCPP]
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ // CPPONLY: attributeLine = line;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * Switch to the attribute value (unquoted)
+ * state.
+ */
+
+ state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
+ noteUnquotedAttributeValue();
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ attributevaluedoublequotedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the after
+ * attribute value (quoted) state.
+ */
+ addAttributeWithValue();
+
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
+ break attributevaluedoublequotedloop;
+ // continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in attribute value state, with the
+ * additional allowed character being U+0022
+ * QUOTATION MARK (").
+ */
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\"');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute value (double-quoted)
+ * state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_ATTRIBUTE_VALUE_QUOTED:
+ afterattributevaluequotedloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before attribute name state.
+ */
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ break afterattributevaluequotedloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ default:
+ /*
+ * Anything else Parse error.
+ */
+ errNoSpaceBetweenAttributes();
+ /*
+ * Reconsume the character in the before
+ * attribute name state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case SELF_CLOSING_START_TAG:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Set the self-closing
+ * flag of the current tag token. Emit the current
+ * tag token.
+ */
+ // [NOCPP[
+ errHtml4XmlVoidSyntax();
+ // ]NOCPP]
+ state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ default:
+ /* Anything else Parse error. */
+ errSlashNotFollowedByGt();
+ /*
+ * Reconsume the character in the before attribute
+ * name state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_UNQUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ addAttributeWithValue();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before attribute name state.
+ */
+ addAttributeWithValue();
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in attribute value state, with the
+ * additional allowed character being U+003E
+ * GREATER-THAN SIGN (>)
+ */
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('>');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ addAttributeWithValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '<':
+ case '\"':
+ case '\'':
+ case '=':
+ case '`':
+ /*
+ * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
+ * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
+ * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
+ */
+ errUnquotedAttributeValOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ // fall through
+ default:
+ // [NOCPP]
+ errHtml4NonNameInUnquotedAttribute(c);
+ // ]NOCPP]
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute value (unquoted) state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case AFTER_ATTRIBUTE_NAME:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the after attribute name state.
+ */
+ continue;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Switch to the self-closing
+ * start tag state.
+ */
+ addAttributeWithoutValue();
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '=':
+ /*
+ * U+003D EQUALS SIGN (=) Switch to the before
+ * attribute value state.
+ */
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * tag token.
+ */
+ addAttributeWithoutValue();
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ /*
+ * Switch to the data state.
+ */
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ case '\"':
+ case '\'':
+ case '<':
+ errQuoteOrLtInAttributeNameOrNull(c);
+ /*
+ * Treat it as per the "anything else" entry
+ * below.
+ */
+ default:
+ addAttributeWithoutValue();
+ /*
+ * Anything else Start a new attribute in the
+ * current tag token.
+ */
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Set that
+ * attribute's name to the lowercase version
+ * of the current input character (add
+ * 0x0020 to the character's code point)
+ */
+ c += 0x20;
+ }
+ /*
+ * Set that attribute's name to the current
+ * input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * and its value to the empty string.
+ */
+ // Will do later.
+ /*
+ * Switch to the attribute name state.
+ */
+ state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case MARKUP_DECLARATION_OPEN:
+ markupdeclarationopenloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * If the next two characters are both U+002D
+ * HYPHEN-MINUS characters (-), consume those two
+ * characters, create a comment token whose data is the
+ * empty string, and switch to the comment start state.
+ *
+ * Otherwise, if the next seven characters are an ASCII
+ * case-insensitive match for the word "DOCTYPE", then
+ * consume those characters and switch to the DOCTYPE
+ * state.
+ *
+ * Otherwise, if the insertion mode is
+ * "in foreign content" and the current node is not an
+ * element in the HTML namespace and the next seven
+ * characters are an case-sensitive match for the string
+ * "[CDATA[" (the five uppercase letters "CDATA" with a
+ * U+005B LEFT SQUARE BRACKET character before and
+ * after), then consume those characters and switch to
+ * the CDATA section state.
+ *
+ * Otherwise, is is a parse error. Switch to the bogus
+ * comment state. The next character that is consumed,
+ * if any, is the first character that will be in the
+ * comment.
+ */
+ switch (c) {
+ case '-':
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
+ break markupdeclarationopenloop;
+ // continue stateloop;
+ case 'd':
+ case 'D':
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
+ continue stateloop;
+ case '[':
+ if (tokenHandler.cdataSectionAllowed()) {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state = transition(state, Tokenizer.CDATA_START, reconsume, pos);
+ continue stateloop;
+ }
+ // else fall through
+ default:
+ errBogusComment();
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case MARKUP_DECLARATION_HYPHEN:
+ markupdeclarationhyphenloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '\u0000':
+ break stateloop;
+ case '-':
+ clearStrBufAfterOneHyphen();
+ state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
+ break markupdeclarationhyphenloop;
+ // continue stateloop;
+ default:
+ errBogusComment();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_START:
+ commentstartloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment start state
+ *
+ *
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment
+ * start dash state.
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errPrematureEndOfComment();
+ /* Emit the comment token. */
+ emitComment(0, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break commentstartloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the input character to
+ * the comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break commentstartloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT:
+ commentloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment state Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment
+ * end dash state
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
+ break commentloop;
+ // continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the input character to
+ * the comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the comment state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END_DASH:
+ commentenddashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment end dash state Consume the next input
+ * character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment
+ * end state
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
+ break commentenddashloop;
+ // continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append a U+002D HYPHEN-MINUS
+ * (-) character and the input character to the
+ * comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END:
+ commentendloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment end dash state Consume the next input
+ * character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the comment
+ * token.
+ */
+ emitComment(2, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ /* U+002D HYPHEN-MINUS (-) Parse error. */
+ /*
+ * Append a U+002D HYPHEN-MINUS (-) character to
+ * the comment token's data.
+ */
+ adjustDoubleHyphenAndAppendToStrBufAndErr(c);
+ /*
+ * Stay in the comment end state.
+ */
+ continue;
+ case '\r':
+ adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ adjustDoubleHyphenAndAppendToStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ case '!':
+ errHyphenHyphenBang();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Append two U+002D HYPHEN-MINUS (-) characters
+ * and the input character to the comment
+ * token's data.
+ */
+ adjustDoubleHyphenAndAppendToStrBufAndErr(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case COMMENT_END_BANG:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment end bang state
+ *
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the comment
+ * token.
+ */
+ emitComment(3, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ /*
+ * Append two U+002D HYPHEN-MINUS (-) characters
+ * and a U+0021 EXCLAMATION MARK (!) character
+ * to the comment token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment end dash state.
+ */
+ state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append two U+002D HYPHEN-MINUS
+ * (-) characters, a U+0021 EXCLAMATION MARK (!)
+ * character, and the input character to the
+ * comment token's data. Switch to the comment
+ * state.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case COMMENT_START_DASH:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Comment start dash state
+ *
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Switch to the comment end
+ * state
+ */
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
+ continue stateloop;
+ case '>':
+ errPrematureEndOfComment();
+ /* Emit the comment token. */
+ emitComment(1, pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Append a U+002D HYPHEN-MINUS character (-) and
+ * the current input character to the comment
+ * token's data.
+ */
+ appendStrBuf(c);
+ /*
+ * Switch to the comment state.
+ */
+ state = transition(state, Tokenizer.COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ // XXX reorder point
+ case CDATA_START:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (index < 6) { // CDATA_LSQB.length
+ if (c == Tokenizer.CDATA_LSQB[index]) {
+ appendStrBuf(c);
+ } else {
+ errBogusComment();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ } else {
+ clearStrBufAfterUse();
+ cstart = pos; // start coalescing
+ reconsume = true;
+ state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
+ break; // FALL THROUGH continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CDATA_SECTION:
+ cdatasectionloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case ']':
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
+ break cdatasectionloop; // FALL THROUGH
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ default:
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CDATA_RSQB:
+ cdatarsqb: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case ']':
+ state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
+ break cdatarsqb;
+ default:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0,
+ 1);
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CDATA_RSQB_RSQB:
+ cdatarsqbrsqb: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case ']':
+ // Saw a third ]. Emit one ] (logically the
+ // first one) and stay in this state to
+ // remember that the last two characters seen
+ // have been ]].
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
+ continue;
+ case '>':
+ cstart = pos + 1;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ attributevaluesinglequotedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the after
+ * attribute value (quoted) state.
+ */
+ addAttributeWithValue();
+
+ state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in attribute value state, with the
+ * + additional allowed character being U+0027
+ * APOSTROPHE (').
+ */
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\'');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ break attributevaluesinglequotedloop;
+ // continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current attribute's value.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the attribute value (double-quoted)
+ * state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case CONSUME_CHARACTER_REFERENCE:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (c == '\u0000') {
+ break stateloop;
+ }
+ /*
+ * Unlike the definition is the spec, this state does not
+ * return a value and never requires the caller to
+ * backtrack. This state takes care of emitting characters
+ * or appending to the current attribute value. It also
+ * takes care of that in the case when consuming the
+ * character reference fails.
+ */
+ /*
+ * This section defines how to consume a character
+ * reference. This definition is used when parsing character
+ * references in text and in attributes.
+ *
+ * The behavior depends on the identity of the next
+ * character (the one immediately after the U+0026 AMPERSAND
+ * character):
+ */
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r': // we'll reconsume!
+ case '\u000C':
+ case '<':
+ case '&':
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ case '#':
+ /*
+ * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
+ * SIGN.
+ */
+ appendCharRefBuf('#');
+ state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
+ continue stateloop;
+ default:
+ if (c == additional) {
+ emitOrAppendCharRefBuf(returnState);
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ if (c >= 'a' && c <= 'z') {
+ firstCharKey = c - 'a' + 26;
+ } else if (c >= 'A' && c <= 'Z') {
+ firstCharKey = c - 'A';
+ } else {
+ // No match
+ /*
+ * If no match can be made, then this is a parse
+ * error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ // Didn't fail yet
+ appendCharRefBuf(c);
+ state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case CHARACTER_REFERENCE_HILO_LOOKUP:
+ {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (c == '\u0000') {
+ break stateloop;
+ }
+ /*
+ * The data structure is as follows:
+ *
+ * HILO_ACCEL is a two-dimensional int array whose major
+ * index corresponds to the second character of the
+ * character reference (code point as index) and the
+ * minor index corresponds to the first character of the
+ * character reference (packed so that A-Z runs from 0
+ * to 25 and a-z runs from 26 to 51). This layout makes
+ * it easier to use the sparseness of the data structure
+ * to omit parts of it: The second dimension of the
+ * table is null when no character reference starts with
+ * the character corresponding to that row.
+ *
+ * The int value HILO_ACCEL (by these indeces) is zero
+ * if there exists no character reference starting with
+ * that two-letter prefix. Otherwise, the value is an
+ * int that packs two shorts so that the higher short is
+ * the index of the highest character reference name
+ * with that prefix in NAMES and the lower short
+ * corresponds to the index of the lowest character
+ * reference name with that prefix. (It happens that the
+ * first two character reference names share their
+ * prefix so the packed int cannot be 0 by packing the
+ * two shorts.)
+ *
+ * NAMES is an array of byte arrays where each byte
+ * array encodes the name of a character references as
+ * ASCII. The names omit the first two letters of the
+ * name. (Since storing the first two letters would be
+ * redundant with the data contained in HILO_ACCEL.) The
+ * entries are lexically sorted.
+ *
+ * For a given index in NAMES, the same index in VALUES
+ * contains the corresponding expansion as an array of
+ * two UTF-16 code units (either the character and
+ * U+0000 or a suggogate pair).
+ */
+ int hilo = 0;
+ if (c <= 'z') {
+ @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c];
+ if (row != null) {
+ hilo = row[firstCharKey];
+ }
+ }
+ if (hilo == 0) {
+ /*
+ * If no match can be made, then this is a parse
+ * error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ // Didn't fail yet
+ appendCharRefBuf(c);
+ lo = hilo & 0xFFFF;
+ hi = hilo >> 16;
+ entCol = -1;
+ candidate = -1;
+ charRefBufMark = 0;
+ state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ }
+ case CHARACTER_REFERENCE_TAIL:
+ outer: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (c == '\u0000') {
+ break stateloop;
+ }
+ entCol++;
+ /*
+ * Consume the maximum number of characters possible,
+ * with the consumed characters matching one of the
+ * identifiers in the first column of the named
+ * character references table (in a case-sensitive
+ * manner).
+ */
+ loloop: for (;;) {
+ if (hi < lo) {
+ break outer;
+ }
+ if (entCol == NamedCharacters.NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ lo++;
+ } else if (entCol > NamedCharacters.NAMES[lo].length()) {
+ break outer;
+ } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
+ lo++;
+ } else {
+ break loloop;
+ }
+ }
+
+ hiloop: for (;;) {
+ if (hi < lo) {
+ break outer;
+ }
+ if (entCol == NamedCharacters.NAMES[hi].length()) {
+ break hiloop;
+ }
+ if (entCol > NamedCharacters.NAMES[hi].length()) {
+ break outer;
+ } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
+ hi--;
+ } else {
+ break hiloop;
+ }
+ }
+
+ if (c == ';') {
+ // If we see a semicolon, there cannot be a
+ // longer match. Break the loop. However, before
+ // breaking, take the longest match so far as the
+ // candidate, if we are just about to complete a
+ // match.
+ if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ }
+ break outer;
+ }
+
+ if (hi < lo) {
+ break outer;
+ }
+ appendCharRefBuf(c);
+ continue;
+ }
+
+ if (candidate == -1) {
+ // reconsume deals with CR, LF or nul
+ /*
+ * If no match can be made, then this is a parse error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ } else {
+ // c can't be CR, LF or nul if we got here
+ @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
+ if (candidateName.length() == 0
+ || candidateName.charAt(candidateName.length() - 1) != ';') {
+ /*
+ * If the last character matched is not a U+003B
+ * SEMICOLON (;), there is a parse error.
+ */
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ /*
+ * If the entity is being consumed as part of an
+ * attribute, and the last character matched is
+ * not a U+003B SEMICOLON (;),
+ */
+ char ch;
+ if (charRefBufMark == charRefBufLen) {
+ ch = c;
+ } else {
+ ch = charRefBuf[charRefBufMark];
+ }
+ if (ch == '=' || (ch >= '0' && ch <= '9')
+ || (ch >= 'A' && ch <= 'Z')
+ || (ch >= 'a' && ch <= 'z')) {
+ /*
+ * and the next character is either a U+003D
+ * EQUALS SIGN character (=) or in the range
+ * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
+ * U+0041 LATIN CAPITAL LETTER A to U+005A
+ * LATIN CAPITAL LETTER Z, or U+0061 LATIN
+ * SMALL LETTER A to U+007A LATIN SMALL
+ * LETTER Z, then, for historical reasons,
+ * all the characters that were matched
+ * after the U+0026 AMPERSAND (&) must be
+ * unconsumed, and nothing is returned.
+ */
+ errNoNamedCharacterMatch();
+ appendCharRefBufToStrBuf();
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ errUnescapedAmpersandInterpretedAsCharacterReference();
+ } else {
+ errNotSemicolonTerminated();
+ }
+ }
+
+ /*
+ * Otherwise, return a character token for the character
+ * corresponding to the entity name (as given by the
+ * second column of the named character references
+ * table).
+ */
+ // CPPONLY: completedNamedCharacterReference();
+ @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
+ if (
+ // [NOCPP[
+ val.length == 1
+ // ]NOCPP]
+ // CPPONLY: val[1] == 0
+ ) {
+ emitOrAppendOne(val, returnState);
+ } else {
+ emitOrAppendTwo(val, returnState);
+ }
+ // this is so complicated!
+ if (charRefBufMark < charRefBufLen) {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ } else {
+ tokenHandler.characters(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ }
+ }
+ // charRefBufLen will be zeroed below!
+
+ // Check if we broke out early with c being the last
+ // character that matched as opposed to being the
+ // first one that didn't match. In the case of an
+ // early break, the next run on text should start
+ // *after* the current character and the current
+ // character shouldn't be reconsumed.
+ boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
+ charRefBufLen = 0;
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = earlyBreak ? pos + 1 : pos;
+ }
+ reconsume = !earlyBreak;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ /*
+ * If the markup contains I'm &notit; I tell you, the
+ * entity is parsed as "not", as in, I'm ÂŹit; I tell
+ * you. But if the markup was I'm &notin; I tell you,
+ * the entity would be parsed as "notin;", resulting in
+ * I'm ∉ I tell you.
+ */
+ }
+ // XXX reorder point
+ case CONSUME_NCR:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ value = 0;
+ seenDigits = false;
+ /*
+ * The behavior further depends on the character after the
+ * U+0023 NUMBER SIGN:
+ */
+ switch (c) {
+ case 'x':
+ case 'X':
+
+ /*
+ * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
+ * LETTER X Consume the X.
+ *
+ * Follow the steps below, but using the range of
+ * characters U+0030 DIGIT ZERO through to U+0039
+ * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
+ * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
+ * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
+ * LETTER F (in other words, 0-9, A-F, a-f).
+ *
+ * When it comes to interpreting the number,
+ * interpret it as a hexadecimal number.
+ */
+ appendCharRefBuf(c);
+ state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Anything else Follow the steps below, but using
+ * the range of characters U+0030 DIGIT ZERO through
+ * to U+0039 DIGIT NINE (i.e. just 0-9).
+ *
+ * When it comes to interpreting the number,
+ * interpret it as a decimal number.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case DECIMAL_NRC_LOOP:
+ decimalloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume as many characters as match the range of
+ * characters given above.
+ */
+ assert value >= 0: "value must not become negative.";
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 10;
+ value += c - '0';
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ break decimalloop;
+ } else {
+ errNoDigitsInNCR();
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ } else {
+ /*
+ * If no characters match the range, then don't
+ * consume any characters (and unconsume the U+0023
+ * NUMBER SIGN character and, if appropriate, the X
+ * character). This is a parse error; nothing is
+ * returned.
+ *
+ * Otherwise, if the next character is a U+003B
+ * SEMICOLON, consume that too. If it isn't, there
+ * is a parse error.
+ */
+ if (!seenDigits) {
+ errNoDigitsInNCR();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ } else {
+ errCharRefLacksSemicolon();
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ // FALL THROUGH continue stateloop;
+ break decimalloop;
+ }
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case HANDLE_NCR_VALUE:
+ // WARNING previous state sets reconsume
+ // We are not going to emit the contents of charRefBuf.
+ charRefBufLen = 0;
+ // XXX inline this case if the method size can take it
+ handleNcrValue(returnState);
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ // XXX reorder point
+ case HEX_NCR_LOOP:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume as many characters as match the range of
+ * characters given above.
+ */
+ assert value >= 0: "value must not become negative.";
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - '0';
+ }
+ continue;
+ } else if (c >= 'A' && c <= 'F') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'A' + 10;
+ }
+ continue;
+ } else if (c >= 'a' && c <= 'f') {
+ seenDigits = true;
+ // Avoid overflow
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'a' + 10;
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ continue stateloop;
+ } else {
+ errNoDigitsInNCR();
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos + 1;
+ }
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ } else {
+ /*
+ * If no characters match the range, then don't
+ * consume any characters (and unconsume the U+0023
+ * NUMBER SIGN character and, if appropriate, the X
+ * character). This is a parse error; nothing is
+ * returned.
+ *
+ * Otherwise, if the next character is a U+003B
+ * SEMICOLON, consume that too. If it isn't, there
+ * is a parse error.
+ */
+ if (!seenDigits) {
+ errNoDigitsInNCR();
+ emitOrAppendCharRefBuf(returnState);
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ } else {
+ errCharRefLacksSemicolon();
+ if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ }
+ // XXX reorder point
+ case PLAINTEXT:
+ plaintextloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '\u0000':
+ emitPlaintextReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * RAWTEXT state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case CLOSE_TAG_OPEN:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Otherwise, if the content model flag is set to the PCDATA
+ * state, or if the next few characters do match that tag
+ * name, consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errLtSlashGt();
+ /*
+ * Switch to the data state.
+ */
+ cstart = pos + 1;
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ silentCarriageReturn();
+ /* Anything else Parse error. */
+ errGarbageAfterLtSlash();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf('\n');
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ /* Anything else Parse error. */
+ errGarbageAfterLtSlash();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (c >= 'a' && c <= 'z') {
+ /*
+ * U+0061 LATIN SMALL LETTER A through to U+007A
+ * LATIN SMALL LETTER Z Create a new end tag
+ * token,
+ */
+ endTag = true;
+ /*
+ * set its tag name to the input character,
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * then switch to the tag name state. (Don't
+ * emit the token yet; further details will be
+ * filled in before it is emitted.)
+ */
+ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
+ continue stateloop;
+ } else {
+ /* Anything else Parse error. */
+ errGarbageAfterLtSlash();
+ /*
+ * Switch to the bogus comment state.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case RCDATA:
+ rcdataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in RCDATA state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * RCDATA less-than sign state.
+ */
+ flushChars(buf, pos);
+
+ returnState = state;
+ state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Emit the current input character as a
+ * character token. Stay in the RCDATA state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case RAWTEXT:
+ rawtextloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * RAWTEXT less-than sign state.
+ */
+ flushChars(buf, pos);
+
+ returnState = state;
+ state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
+ break rawtextloop;
+ // FALL THRU continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Emit the current input character as a
+ * character token. Stay in the RAWTEXT state.
+ */
+ continue;
+ }
+ }
+ // XXX fallthru don't reorder
+ case RAWTEXT_RCDATA_LESS_THAN_SIGN:
+ rawtextrcdatalessthansignloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Set the temporary buffer
+ * to the empty string. Switch to the script
+ * data end tag open state.
+ */
+ index = 0;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
+ break rawtextrcdatalessthansignloop;
+ // FALL THRU continue stateloop;
+ default:
+ /*
+ * Otherwise, emit a U+003C LESS-THAN SIGN
+ * character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in
+ * the data state.
+ */
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX fall thru. don't reorder.
+ case NON_DATA_END_TAG_NAME:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * ASSERT! when entering this state, set index to 0 and
+ * call clearStrBufBeforeUse() assert (contentModelElement !=
+ * null); Let's implement the above without lookahead.
+ * strBuf is the 'temporary buffer'.
+ */
+ if (index < endTagExpectationAsArray.length) {
+ char e = endTagExpectationAsArray[index];
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != e) {
+ // [NOCPP[
+ errHtml4LtSlashInRcdata(folded);
+ // ]NOCPP]
+ tokenHandler.characters(Tokenizer.LT_SOLIDUS,
+ 0, 2);
+ emitStrBuf();
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ appendStrBuf(c);
+ index++;
+ continue;
+ } else {
+ endTag = true;
+ // XXX replace contentModelElement with different
+ // type
+ tagName = endTagExpectation;
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE
+ * FEED (LF) U+000C FORM FEED (FF) U+0020
+ * SPACE If the current end tag token is an
+ * appropriate end tag token, then switch to
+ * the before attribute name state.
+ */
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
+ continue stateloop;
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) If the current end tag
+ * token is an appropriate end tag token,
+ * then switch to the self-closing start tag
+ * state.
+ */
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) If the
+ * current end tag token is an appropriate
+ * end tag token, then emit the current tag
+ * token and switch to the data state.
+ */
+ clearStrBufAfterUse(); // strBuf not used
+ state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
+ if (shouldSuspend) {
+ break stateloop;
+ }
+ continue stateloop;
+ default:
+ /*
+ * Emit a U+003C LESS-THAN SIGN character
+ * token, a U+002F SOLIDUS character token,
+ * a character token for each of the
+ * characters in the temporary buffer (in
+ * the order they were added to the buffer),
+ * and reconsume the current input character
+ * in the RAWTEXT state.
+ */
+ // [NOCPP[
+ errWarnLtSlashInRcdata();
+ // ]NOCPP]
+ tokenHandler.characters(
+ Tokenizer.LT_SOLIDUS, 0, 2);
+ emitStrBuf();
+ if (c == '\u0000') {
+ emitReplacementCharacter(buf, pos);
+ } else {
+ cstart = pos; // don't drop the
+ // character
+ }
+ state = transition(state, returnState, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ }
+ // XXX reorder point
+ // BEGIN HOTSPOT WORKAROUND
+ case BOGUS_COMMENT:
+ boguscommentloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume every character up to and including the first
+ * U+003E GREATER-THAN SIGN character (>) or the end of
+ * the file (EOF), whichever comes first. Emit a comment
+ * token whose data is the concatenation of all the
+ * characters starting from and including the character
+ * that caused the state machine to switch into the
+ * bogus comment state, up to and including the
+ * character immediately before the last consumed
+ * character (i.e. up to the character just before the
+ * U+003E or EOF character). (If the comment was started
+ * by the end of the file (EOF), the token is empty.)
+ *
+ * Switch to the data state.
+ *
+ * If the end of the file was reached, reconsume the EOF
+ * character.
+ */
+ switch (c) {
+ case '>':
+ emitComment(0, pos);
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
+ break boguscommentloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BOGUS_COMMENT_HYPHEN:
+ boguscommenthyphenloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '>':
+ // [NOCPP[
+ maybeAppendSpaceToBogusComment();
+ // ]NOCPP]
+ emitComment(0, pos);
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '-':
+ appendSecondHyphenToBogusComment();
+ continue boguscommenthyphenloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ appendStrBuf(c);
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case SCRIPT_DATA:
+ scriptdataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ switch (c) {
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data less-than sign state.
+ */
+ flushChars(buf, pos);
+ returnState = state;
+ state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
+ break scriptdataloop; // FALL THRU continue
+ // stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * script data state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_LESS_THAN_SIGN:
+ scriptdatalessthansignloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Set the temporary buffer
+ * to the empty string. Switch to the script
+ * data end tag open state.
+ */
+ index = 0;
+ clearStrBufBeforeUse();
+ state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
+ continue stateloop;
+ case '!':
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ cstart = pos;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
+ break scriptdatalessthansignloop; // FALL THRU
+ // continue
+ // stateloop;
+ default:
+ /*
+ * Otherwise, emit a U+003C LESS-THAN SIGN
+ * character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in
+ * the data state.
+ */
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPE_START:
+ scriptdataescapestartloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escape start dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
+ break scriptdataescapestartloop; // FALL THRU
+ // continue
+ // stateloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPE_START_DASH:
+ scriptdataescapestartdashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escaped dash dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
+ break scriptdataescapestartdashloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED_DASH_DASH:
+ scriptdataescapeddashdashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Stay in the
+ * script data escaped dash dash state.
+ */
+ continue;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data escaped less-than sign state.
+ */
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit a U+003E
+ * GREATER-THAN SIGN character token. Switch to
+ * the script data state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break scriptdataescapeddashdashloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break scriptdataescapeddashdashloop;
+ // continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED:
+ scriptdataescapedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escaped dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
+ break scriptdataescapedloop; // FALL THRU
+ // continue
+ // stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data escaped less-than sign state.
+ */
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * script data escaped state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED_DASH:
+ scriptdataescapeddashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data escaped dash dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * script data escaped less-than sign state.
+ */
+ flushChars(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ break scriptdataescapeddashloop;
+ // continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
+ scriptdataescapedlessthanloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Set the temporary buffer
+ * to the empty string. Switch to the script
+ * data escaped end tag open state.
+ */
+ index = 0;
+ clearStrBufBeforeUse();
+ returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
+ state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
+ continue stateloop;
+ case 'S':
+ case 's':
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
+ * LESS-THAN SIGN character token and the
+ * current input character as a character token.
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ cstart = pos;
+ index = 1;
+ /*
+ * Set the temporary buffer to the empty string.
+ * Append the lowercase version of the current
+ * input character (add 0x0020 to the
+ * character's code point) to the temporary
+ * buffer. Switch to the script data double
+ * escape start state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
+ break scriptdataescapedlessthanloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Emit a U+003C LESS-THAN SIGN
+ * character token and reconsume the current
+ * input character in the script data escaped
+ * state.
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START:
+ scriptdatadoubleescapestartloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ assert index > 0;
+ if (index < 6) { // SCRIPT_ARR.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.SCRIPT_ARR[index]) {
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ case ' ':
+ case '\t':
+ case '\u000C':
+ case '/':
+ case '>':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
+ * (>) Emit the current input character as a
+ * character token. If the temporary buffer is
+ * the string "script", then switch to the
+ * script data double escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break scriptdatadoubleescapestartloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data escaped state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED:
+ scriptdatadoubleescapedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data double escaped dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
+ break scriptdatadoubleescapedloop; // FALL THRU
+ // continue
+ // stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Emit a U+003C
+ * LESS-THAN SIGN character token. Switch to the
+ * script data double escaped less-than sign
+ * state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Stay in the
+ * script data double escaped state.
+ */
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
+ scriptdatadoubleescapeddashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Switch to the
+ * script data double escaped dash dash state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
+ break scriptdatadoubleescapeddashloop;
+ // continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Emit a U+003C
+ * LESS-THAN SIGN character token. Switch to the
+ * script data double escaped less-than sign
+ * state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data double escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
+ scriptdatadoubleescapeddashdashloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '-':
+ /*
+ * U+002D HYPHEN-MINUS (-) Emit a U+002D
+ * HYPHEN-MINUS character token. Stay in the
+ * script data double escaped dash dash state.
+ */
+ continue;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Emit a U+003C
+ * LESS-THAN SIGN character token. Switch to the
+ * script data double escaped less-than sign
+ * state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
+ break scriptdatadoubleescapeddashdashloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit a U+003E
+ * GREATER-THAN SIGN character token. Switch to
+ * the script data state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ default:
+ /*
+ * Anything else Emit the current input
+ * character as a character token. Switch to the
+ * script data double escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
+ scriptdatadoubleescapedlessthanloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '/':
+ /*
+ * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
+ * character token. Set the temporary buffer to
+ * the empty string. Switch to the script data
+ * double escape end state.
+ */
+ index = 0;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
+ break scriptdatadoubleescapedlessthanloop;
+ default:
+ /*
+ * Anything else Reconsume the current input
+ * character in the script data double escaped
+ * state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END:
+ scriptdatadoubleescapeendloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (index < 6) { // SCRIPT_ARR.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.SCRIPT_ARR[index]) {
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ case ' ':
+ case '\t':
+ case '\u000C':
+ case '/':
+ case '>':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
+ * (>) Emit the current input character as a
+ * character token. If the temporary buffer is
+ * the string "script", then switch to the
+ * script data escaped state.
+ */
+ state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Reconsume the current input character in the
+ * script data double escaped state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case MARKUP_DECLARATION_OCTYPE:
+ markupdeclarationdoctypeloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ if (index < 6) { // OCTYPE.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded == Tokenizer.OCTYPE[index]) {
+ appendStrBuf(c);
+ } else {
+ errBogusComment();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
+ break markupdeclarationdoctypeloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE:
+ doctypeloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ initDoctypeFields();
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before DOCTYPE name state.
+ */
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
+ break doctypeloop;
+ // continue stateloop;
+ default:
+ /*
+ * Anything else Parse error.
+ */
+ errMissingSpaceBeforeDoctypeName();
+ /*
+ * Reconsume the current character in the before
+ * DOCTYPE name state.
+ */
+ reconsume = true;
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
+ break doctypeloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_DOCTYPE_NAME:
+ beforedoctypenameloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before DOCTYPE name state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errNamelessDoctype();
+ /*
+ * Create a new DOCTYPE token. Set its
+ * force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit the token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ if (c >= 'A' && c <= 'Z') {
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Create a
+ * new DOCTYPE token. Set the token's name
+ * to the lowercase version of the input
+ * character (add 0x0020 to the character's
+ * code point).
+ */
+ c += 0x20;
+ }
+ /* Anything else Create a new DOCTYPE token. */
+ /*
+ * Set the token's name name to the current
+ * input character.
+ */
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ /*
+ * Switch to the DOCTYPE name state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
+ break beforedoctypenameloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_NAME:
+ doctypenameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ strBufToDoctypeName();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the after DOCTYPE name state.
+ */
+ strBufToDoctypeName();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
+ break doctypenameloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ strBufToDoctypeName();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * U+0041 LATIN CAPITAL LETTER A through to
+ * U+005A LATIN CAPITAL LETTER Z Append the
+ * lowercase version of the input character (add
+ * 0x0020 to the character's code point) to the
+ * current DOCTYPE token's name.
+ */
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x0020;
+ }
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * name.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE name state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_NAME:
+ afterdoctypenameloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the after DOCTYPE name state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case 'p':
+ case 'P':
+ index = 0;
+ state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
+ break afterdoctypenameloop;
+ // continue stateloop;
+ case 's':
+ case 'S':
+ index = 0;
+ state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Otherwise, this is the parse error.
+ */
+ bogusDoctype();
+
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_UBLIC:
+ doctypeublicloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * If the six characters starting from the current input
+ * character are an ASCII case-insensitive match for the
+ * word "PUBLIC", then consume those characters and
+ * switch to the before DOCTYPE public identifier state.
+ */
+ if (index < 5) { // UBLIC.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.UBLIC[index]) {
+ bogusDoctype();
+ // forceQuirks = true;
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
+ break doctypeublicloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_PUBLIC_KEYWORD:
+ afterdoctypepublickeywordloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before DOCTYPE public
+ * identifier state.
+ */
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ break afterdoctypepublickeywordloop;
+ // FALL THROUGH continue stateloop
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Parse Error.
+ */
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's public identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Parse Error.
+ */
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's public identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
+ beforedoctypepublicidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before DOCTYPE public identifier
+ * state.
+ */
+ continue;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Set the DOCTYPE
+ * token's public identifier to the empty string
+ * (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ break beforedoctypepublicidentifierloop;
+ // continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Set the DOCTYPE token's
+ * public identifier to the empty string (not
+ * missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
+ doctypepublicidentifierdoublequotedloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the after
+ * DOCTYPE public identifier state.
+ */
+ publicIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ break doctypepublicidentifierdoublequotedloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errGtInPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * public identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
+ afterdoctypepublicidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the between DOCTYPE public and
+ * system identifiers state.
+ */
+ state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
+ break afterdoctypepublicidentifierloop;
+ // continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Parse error.
+ */
+ errNoSpaceBetweenPublicAndSystemIds();
+ /*
+ * Set the DOCTYPE token's system identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Parse error.
+ */
+ errNoSpaceBetweenPublicAndSystemIds();
+ /*
+ * Set the DOCTYPE token's system identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
+ betweendoctypepublicandsystemidentifiersloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the between DOCTYPE public and system
+ * identifiers state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Set the DOCTYPE
+ * token's system identifier to the empty string
+ * (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ break betweendoctypepublicandsystemidentifiersloop;
+ // continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Set the DOCTYPE token's
+ * system identifier to the empty string (not
+ * missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
+ doctypesystemidentifierdoublequotedloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Switch to the after
+ * DOCTYPE system identifier state.
+ */
+ systemIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Parse error.
+ */
+ errGtInSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * system identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
+ afterdoctypesystemidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the after DOCTYPE system identifier state.
+ */
+ continue;
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit the current
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ /*
+ * Switch to the bogus DOCTYPE state. (This does
+ * not set the DOCTYPE token's force-quirks flag
+ * to on.)
+ */
+ bogusDoctypeWithoutQuirks();
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ break afterdoctypesystemidentifierloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BOGUS_DOCTYPE:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '>':
+ /*
+ * U+003E GREATER-THAN SIGN (>) Emit that
+ * DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ default:
+ /*
+ * Anything else Stay in the bogus DOCTYPE
+ * state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case DOCTYPE_YSTEM:
+ doctypeystemloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Otherwise, if the six characters starting from the
+ * current input character are an ASCII case-insensitive
+ * match for the word "SYSTEM", then consume those
+ * characters and switch to the before DOCTYPE system
+ * identifier state.
+ */
+ if (index < 5) { // YSTEM.length
+ char folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != Tokenizer.YSTEM[index]) {
+ bogusDoctype();
+ reconsume = true;
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ index++;
+ continue stateloop;
+ } else {
+ reconsume = true;
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
+ break doctypeystemloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_DOCTYPE_SYSTEM_KEYWORD:
+ afterdoctypesystemkeywordloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE
+ * Switch to the before DOCTYPE public
+ * identifier state.
+ */
+ state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ break afterdoctypesystemkeywordloop;
+ // FALL THROUGH continue stateloop
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Parse Error.
+ */
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's system identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Parse Error.
+ */
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ /*
+ * Set the DOCTYPE token's public identifier to
+ * the empty string (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
+ beforedoctypesystemidentifierloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\r':
+ silentCarriageReturn();
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // fall thru
+ case ' ':
+ case '\t':
+ case '\u000C':
+ /*
+ * U+0009 CHARACTER TABULATION U+000A LINE FEED
+ * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
+ * in the before DOCTYPE system identifier
+ * state.
+ */
+ continue;
+ case '"':
+ /*
+ * U+0022 QUOTATION MARK (") Set the DOCTYPE
+ * token's system identifier to the empty string
+ * (not missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
+ continue stateloop;
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Set the DOCTYPE token's
+ * system identifier to the empty string (not
+ * missing),
+ */
+ clearStrBufBeforeUse();
+ /*
+ * then switch to the DOCTYPE system identifier
+ * (single-quoted) state.
+ */
+ state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
+ break beforedoctypesystemidentifierloop;
+ // continue stateloop;
+ case '>':
+ /* U+003E GREATER-THAN SIGN (>) Parse error. */
+ errExpectedSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ default:
+ bogusDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ // done by bogusDoctype();
+ /*
+ * Switch to the bogus DOCTYPE state.
+ */
+ state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the after
+ * DOCTYPE system identifier state.
+ */
+ systemIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
+ continue stateloop;
+ case '>':
+ errGtInSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * system identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE system identifier
+ * (double-quoted) state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
+ for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case '\'':
+ /*
+ * U+0027 APOSTROPHE (') Switch to the after
+ * DOCTYPE public identifier state.
+ */
+ publicIdentifier = strBufToString();
+ state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
+ continue stateloop;
+ case '>':
+ errGtInPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ /*
+ * Switch to the data state.
+ */
+ state = transition(state, Tokenizer.DATA, reconsume, pos);
+ continue stateloop;
+ case '\r':
+ appendStrBufCarriageReturn();
+ break stateloop;
+ case '\n':
+ appendStrBufLineFeed();
+ continue;
+ case '\u0000':
+ c = '\uFFFD';
+ // fall thru
+ default:
+ /*
+ * Anything else Append the current input
+ * character to the current DOCTYPE token's
+ * public identifier.
+ */
+ appendStrBuf(c);
+ /*
+ * Stay in the DOCTYPE public identifier
+ * (single-quoted) state.
+ */
+ continue;
+ }
+ }
+ // XXX reorder point
+ case PROCESSING_INSTRUCTION:
+ processinginstructionloop: for (;;) {
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '?':
+ state = transition(
+ state,
+ Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
+ reconsume, pos);
+ break processinginstructionloop;
+ // continue stateloop;
+ default:
+ continue;
+ }
+ }
+ case PROCESSING_INSTRUCTION_QUESTION_MARK:
+ if (++pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ switch (c) {
+ case '>':
+ state = transition(state, Tokenizer.DATA,
+ reconsume, pos);
+ continue stateloop;
+ default:
+ state = transition(state,
+ Tokenizer.PROCESSING_INSTRUCTION,
+ reconsume, pos);
+ continue stateloop;
+ }
+ // END HOTSPOT WORKAROUND
+ }
+ }
+ flushChars(buf, pos);
+ /*
+ * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
+ */
+ // Save locals
+ stateSave = state;
+ returnStateSave = returnState;
+ return pos;
+ }
+
+ // HOTSPOT WORKAROUND INSERTION POINT
+
+ // [NOCPP[
+
+ protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException {
+ return to;
+ }
+
+ // ]NOCPP]
+
+ private void initDoctypeFields() {
+ // Discard the characters "DOCTYPE" accumulated as a potential bogus
+ // comment into strBuf.
+ clearStrBufAfterUse();
+ doctypeName = "";
+ if (systemIdentifier != null) {
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+ if (publicIdentifier != null) {
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ }
+ forceQuirks = false;
+ }
+
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
+ throws SAXException {
+ silentCarriageReturn();
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
+ }
+
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
+ throws SAXException {
+ silentLineFeed();
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
+ }
+
+ @Inline private void appendStrBufLineFeed() {
+ silentLineFeed();
+ appendStrBuf('\n');
+ }
+
+ @Inline private void appendStrBufCarriageReturn() {
+ silentCarriageReturn();
+ appendStrBuf('\n');
+ }
+
+ @Inline protected void silentCarriageReturn() {
+ ++line;
+ lastCR = true;
+ }
+
+ @Inline protected void silentLineFeed() {
+ ++line;
+ }
+
+ private void emitCarriageReturn(@NoLength char[] buf, int pos)
+ throws SAXException {
+ silentCarriageReturn();
+ flushChars(buf, pos);
+ tokenHandler.characters(Tokenizer.LF, 0, 1);
+ cstart = Integer.MAX_VALUE;
+ }
+
+ private void emitReplacementCharacter(@NoLength char[] buf, int pos)
+ throws SAXException {
+ flushChars(buf, pos);
+ tokenHandler.zeroOriginatingReplacementCharacter();
+ cstart = pos + 1;
+ }
+
+ private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos)
+ throws SAXException {
+ flushChars(buf, pos);
+ tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1);
+ cstart = pos + 1;
+ }
+
+ private void setAdditionalAndRememberAmpersandLocation(char add) {
+ additional = add;
+ // [NOCPP[
+ ampersandLocation = new LocatorImpl(this);
+ // ]NOCPP]
+ }
+
+ private void bogusDoctype() throws SAXException {
+ errBogusDoctype();
+ forceQuirks = true;
+ }
+
+ private void bogusDoctypeWithoutQuirks() throws SAXException {
+ errBogusDoctype();
+ forceQuirks = false;
+ }
+
+ private void handleNcrValue(int returnState) throws SAXException {
+ /*
+ * If one or more characters match the range, then take them all and
+ * interpret the string of characters as a number (either hexadecimal or
+ * decimal as appropriate).
+ */
+ if (value <= 0xFFFF) {
+ if (value >= 0x80 && value <= 0x9f) {
+ /*
+ * If that number is one of the numbers in the first column of
+ * the following table, then this is a parse error.
+ */
+ errNcrInC1Range();
+ /*
+ * Find the row with that number in the first column, and return
+ * a character token for the Unicode character given in the
+ * second column of that row.
+ */
+ @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
+ emitOrAppendOne(val, returnState);
+ // [NOCPP[
+ } else if (value == 0xC
+ && contentSpacePolicy != XmlViolationPolicy.ALLOW) {
+ if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) {
+ emitOrAppendOne(Tokenizer.SPACE, returnState);
+ } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) {
+ fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
+ }
+ // ]NOCPP]
+ } else if (value == 0x0) {
+ errNcrZero();
+ emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
+ } else if ((value & 0xF800) == 0xD800) {
+ errNcrSurrogate();
+ emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
+ } else {
+ /*
+ * Otherwise, return a character token for the Unicode character
+ * whose code point is that number.
+ */
+ char ch = (char) value;
+ // [NOCPP[
+ if (value == 0x0D) {
+ errNcrCr();
+ } else if ((value <= 0x0008) || (value == 0x000B)
+ || (value >= 0x000E && value <= 0x001F)) {
+ ch = errNcrControlChar(ch);
+ } else if (value >= 0xFDD0 && value <= 0xFDEF) {
+ errNcrUnassigned();
+ } else if ((value & 0xFFFE) == 0xFFFE) {
+ ch = errNcrNonCharacter(ch);
+ } else if (value >= 0x007F && value <= 0x009F) {
+ errNcrControlChar();
+ } else {
+ maybeWarnPrivateUse(ch);
+ }
+ // ]NOCPP]
+ bmpChar[0] = ch;
+ emitOrAppendOne(bmpChar, returnState);
+ }
+ } else if (value <= 0x10FFFF) {
+ // [NOCPP[
+ maybeWarnPrivateUseAstral();
+ if ((value & 0xFFFE) == 0xFFFE) {
+ errAstralNonCharacter(value);
+ }
+ // ]NOCPP]
+ astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10));
+ astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
+ emitOrAppendTwo(astralChar, returnState);
+ } else {
+ errNcrOutOfRange();
+ emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
+ }
+ }
+
+ public void eof() throws SAXException {
+ int state = stateSave;
+ int returnState = returnStateSave;
+
+ eofloop: for (;;) {
+ switch (state) {
+ case SCRIPT_DATA_LESS_THAN_SIGN:
+ case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
+ /*
+ * Otherwise, emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in the data
+ * state.
+ */
+ break eofloop;
+ case TAG_OPEN:
+ /*
+ * The behavior of this state depends on the content model
+ * flag.
+ */
+ /*
+ * Anything else Parse error.
+ */
+ errEofAfterLt();
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in the data
+ * state.
+ */
+ break eofloop;
+ case RAWTEXT_RCDATA_LESS_THAN_SIGN:
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token
+ */
+ tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
+ /*
+ * and reconsume the current input character in the RCDATA
+ * state.
+ */
+ break eofloop;
+ case NON_DATA_END_TAG_NAME:
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token, a U+002F
+ * SOLIDUS character token,
+ */
+ tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
+ /*
+ * a character token for each of the characters in the
+ * temporary buffer (in the order they were added to the
+ * buffer),
+ */
+ emitStrBuf();
+ /*
+ * and reconsume the current input character in the RCDATA
+ * state.
+ */
+ break eofloop;
+ case CLOSE_TAG_OPEN:
+ /* EOF Parse error. */
+ errEofAfterLt();
+ /*
+ * Emit a U+003C LESS-THAN SIGN character token and a U+002F
+ * SOLIDUS character token.
+ */
+ tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case TAG_NAME:
+ /*
+ * EOF Parse error.
+ */
+ errEofInTagName();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case BEFORE_ATTRIBUTE_NAME:
+ case AFTER_ATTRIBUTE_VALUE_QUOTED:
+ case SELF_CLOSING_START_TAG:
+ /* EOF Parse error. */
+ errEofWithoutGt();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case ATTRIBUTE_NAME:
+ /*
+ * EOF Parse error.
+ */
+ errEofInAttributeName();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case AFTER_ATTRIBUTE_NAME:
+ case BEFORE_ATTRIBUTE_VALUE:
+ /* EOF Parse error. */
+ errEofWithoutGt();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ case ATTRIBUTE_VALUE_UNQUOTED:
+ /* EOF Parse error. */
+ errEofInAttributeValue();
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case BOGUS_COMMENT:
+ emitComment(0, 0);
+ break eofloop;
+ case BOGUS_COMMENT_HYPHEN:
+ // [NOCPP[
+ maybeAppendSpaceToBogusComment();
+ // ]NOCPP]
+ emitComment(0, 0);
+ break eofloop;
+ case MARKUP_DECLARATION_OPEN:
+ errBogusComment();
+ emitComment(0, 0);
+ break eofloop;
+ case MARKUP_DECLARATION_HYPHEN:
+ errBogusComment();
+ emitComment(0, 0);
+ break eofloop;
+ case MARKUP_DECLARATION_OCTYPE:
+ if (index < 6) {
+ errBogusComment();
+ emitComment(0, 0);
+ } else {
+ /* EOF Parse error. */
+ errEofInDoctype();
+ /*
+ * Create a new DOCTYPE token. Set its force-quirks flag
+ * to on.
+ */
+ doctypeName = "";
+ if (systemIdentifier != null) {
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+ if (publicIdentifier != null) {
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ }
+ forceQuirks = true;
+ /*
+ * Emit the token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ }
+ break eofloop;
+ case COMMENT_START:
+ case COMMENT:
+ /*
+ * EOF Parse error.
+ */
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(0, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case COMMENT_END:
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(2, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case COMMENT_END_DASH:
+ case COMMENT_START_DASH:
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(1, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case COMMENT_END_BANG:
+ errEofInComment();
+ /* Emit the comment token. */
+ emitComment(3, 0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE:
+ case BEFORE_DOCTYPE_NAME:
+ errEofInDoctype();
+ /*
+ * Create a new DOCTYPE token. Set its force-quirks flag to
+ * on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit the token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_NAME:
+ errEofInDoctype();
+ strBufToDoctypeName();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_UBLIC:
+ case DOCTYPE_YSTEM:
+ case AFTER_DOCTYPE_NAME:
+ case AFTER_DOCTYPE_PUBLIC_KEYWORD:
+ case AFTER_DOCTYPE_SYSTEM_KEYWORD:
+ case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
+ errEofInDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
+ case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
+ /* EOF Parse error. */
+ errEofInPublicId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
+ case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
+ case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
+ errEofInDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
+ case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
+ /* EOF Parse error. */
+ errEofInSystemId();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
+ errEofInDoctype();
+ /*
+ * Set the DOCTYPE token's force-quirks flag to on.
+ */
+ forceQuirks = true;
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case BOGUS_DOCTYPE:
+ /*
+ * Emit that DOCTYPE token.
+ */
+ emitDoctypeToken(0);
+ /*
+ * Reconsume the EOF character in the data state.
+ */
+ break eofloop;
+ case CONSUME_CHARACTER_REFERENCE:
+ /*
+ * Unlike the definition is the spec, this state does not
+ * return a value and never requires the caller to
+ * backtrack. This state takes care of emitting characters
+ * or appending to the current attribute value. It also
+ * takes care of that in the case when consuming the entity
+ * fails.
+ */
+ /*
+ * This section defines how to consume an entity. This
+ * definition is used when parsing entities in text and in
+ * attributes.
+ *
+ * The behavior depends on the identity of the next
+ * character (the one immediately after the U+0026 AMPERSAND
+ * character):
+ */
+
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue;
+ case CHARACTER_REFERENCE_HILO_LOOKUP:
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue;
+ case CHARACTER_REFERENCE_TAIL:
+ outer: for (;;) {
+ char c = '\u0000';
+ entCol++;
+ /*
+ * Consume the maximum number of characters possible,
+ * with the consumed characters matching one of the
+ * identifiers in the first column of the named
+ * character references table (in a case-sensitive
+ * manner).
+ */
+ hiloop: for (;;) {
+ if (hi == -1) {
+ break hiloop;
+ }
+ if (entCol == NamedCharacters.NAMES[hi].length()) {
+ break hiloop;
+ }
+ if (entCol > NamedCharacters.NAMES[hi].length()) {
+ break outer;
+ } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
+ hi--;
+ } else {
+ break hiloop;
+ }
+ }
+
+ loloop: for (;;) {
+ if (hi < lo) {
+ break outer;
+ }
+ if (entCol == NamedCharacters.NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ lo++;
+ } else if (entCol > NamedCharacters.NAMES[lo].length()) {
+ break outer;
+ } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
+ lo++;
+ } else {
+ break loloop;
+ }
+ }
+ if (hi < lo) {
+ break outer;
+ }
+ continue;
+ }
+
+ if (candidate == -1) {
+ /*
+ * If no match can be made, then this is a parse error.
+ */
+ errNoNamedCharacterMatch();
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue eofloop;
+ } else {
+ @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
+ if (candidateName.length() == 0
+ || candidateName.charAt(candidateName.length() - 1) != ';') {
+ /*
+ * If the last character matched is not a U+003B
+ * SEMICOLON (;), there is a parse error.
+ */
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ /*
+ * If the entity is being consumed as part of an
+ * attribute, and the last character matched is
+ * not a U+003B SEMICOLON (;),
+ */
+ char ch;
+ if (charRefBufMark == charRefBufLen) {
+ ch = '\u0000';
+ } else {
+ ch = charRefBuf[charRefBufMark];
+ }
+ if ((ch >= '0' && ch <= '9')
+ || (ch >= 'A' && ch <= 'Z')
+ || (ch >= 'a' && ch <= 'z')) {
+ /*
+ * and the next character is in the range
+ * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
+ * U+0041 LATIN CAPITAL LETTER A to U+005A
+ * LATIN CAPITAL LETTER Z, or U+0061 LATIN
+ * SMALL LETTER A to U+007A LATIN SMALL
+ * LETTER Z, then, for historical reasons,
+ * all the characters that were matched
+ * after the U+0026 AMPERSAND (&) must be
+ * unconsumed, and nothing is returned.
+ */
+ errNoNamedCharacterMatch();
+ appendCharRefBufToStrBuf();
+ state = returnState;
+ continue eofloop;
+ }
+ }
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ errUnescapedAmpersandInterpretedAsCharacterReference();
+ } else {
+ errNotSemicolonTerminated();
+ }
+ }
+
+ /*
+ * Otherwise, return a character token for the character
+ * corresponding to the entity name (as given by the
+ * second column of the named character references
+ * table).
+ */
+ @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
+ if (
+ // [NOCPP[
+ val.length == 1
+ // ]NOCPP]
+ // CPPONLY: val[1] == 0
+ ) {
+ emitOrAppendOne(val, returnState);
+ } else {
+ emitOrAppendTwo(val, returnState);
+ }
+ // this is so complicated!
+ if (charRefBufMark < charRefBufLen) {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ } else {
+ tokenHandler.characters(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ }
+ }
+ charRefBufLen = 0;
+ state = returnState;
+ continue eofloop;
+ /*
+ * If the markup contains I'm &notit; I tell you, the
+ * entity is parsed as "not", as in, I'm ÂŹit; I tell
+ * you. But if the markup was I'm &notin; I tell you,
+ * the entity would be parsed as "notin;", resulting in
+ * I'm ∉ I tell you.
+ */
+ }
+ case CONSUME_NCR:
+ case DECIMAL_NRC_LOOP:
+ case HEX_NCR_LOOP:
+ /*
+ * If no characters match the range, then don't consume any
+ * characters (and unconsume the U+0023 NUMBER SIGN
+ * character and, if appropriate, the X character). This is
+ * a parse error; nothing is returned.
+ *
+ * Otherwise, if the next character is a U+003B SEMICOLON,
+ * consume that too. If it isn't, there is a parse error.
+ */
+ if (!seenDigits) {
+ errNoDigitsInNCR();
+ emitOrAppendCharRefBuf(returnState);
+ state = returnState;
+ continue;
+ } else {
+ errCharRefLacksSemicolon();
+ }
+ // WARNING previous state sets reconsume
+ handleNcrValue(returnState);
+ state = returnState;
+ continue;
+ case CDATA_RSQB:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
+ break eofloop;
+ case CDATA_RSQB_RSQB:
+ tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
+ break eofloop;
+ case DATA:
+ default:
+ break eofloop;
+ }
+ }
+ // case DATA:
+ /*
+ * EOF Emit an end-of-file token.
+ */
+ tokenHandler.eof();
+ return;
+ }
+
+ private void emitDoctypeToken(int pos) throws SAXException {
+ cstart = pos + 1;
+ tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
+ forceQuirks);
+ // It is OK and sufficient to release these here, since
+ // there's no way out of the doctype states than through paths
+ // that call this method.
+ doctypeName = null;
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+
+ @Inline protected char checkChar(@NoLength char[] buf, int pos)
+ throws SAXException {
+ return buf[pos];
+ }
+
+ public boolean internalEncodingDeclaration(String internalCharset)
+ throws SAXException {
+ if (encodingDeclarationHandler != null) {
+ return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset);
+ }
+ return false;
+ }
+
+ /**
+ * @param val
+ * @throws SAXException
+ */
+ private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
+ throws SAXException {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(val[0]);
+ appendStrBuf(val[1]);
+ } else {
+ tokenHandler.characters(val, 0, 2);
+ }
+ }
+
+ private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
+ throws SAXException {
+ if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+ appendStrBuf(val[0]);
+ } else {
+ tokenHandler.characters(val, 0, 1);
+ }
+ }
+
+ public void end() throws SAXException {
+ strBuf = null;
+ doctypeName = null;
+ if (systemIdentifier != null) {
+ Portability.releaseString(systemIdentifier);
+ systemIdentifier = null;
+ }
+ if (publicIdentifier != null) {
+ Portability.releaseString(publicIdentifier);
+ publicIdentifier = null;
+ }
+ if (tagName != null) {
+ tagName.release();
+ tagName = null;
+ }
+ if (attributeName != null) {
+ attributeName.release();
+ attributeName = null;
+ }
+ tokenHandler.endTokenization();
+ if (attributes != null) {
+ // [NOCPP[
+ attributes = null;
+ // ]NOCPP]
+ // CPPONLY: attributes.clear(mappingLangToXmlLang);
+ }
+ }
+
+ public void requestSuspension() {
+ shouldSuspend = true;
+ }
+
+ // [NOCPP[
+
+ public void becomeConfident() {
+ confident = true;
+ }
+
+ /**
+ * Returns the nextCharOnNewLine.
+ *
+ * @return the nextCharOnNewLine
+ */
+ public boolean isNextCharOnNewLine() {
+ return false;
+ }
+
+ public boolean isPrevCR() {
+ return lastCR;
+ }
+
+ /**
+ * Returns the line.
+ *
+ * @return the line
+ */
+ public int getLine() {
+ return -1;
+ }
+
+ /**
+ * Returns the col.
+ *
+ * @return the col
+ */
+ public int getCol() {
+ return -1;
+ }
+
+ // ]NOCPP]
+
+ public boolean isInDataState() {
+ return (stateSave == DATA);
+ }
+
+ public void resetToDataState() {
+ clearStrBufAfterUse();
+ charRefBufLen = 0;
+ stateSave = Tokenizer.DATA;
+ // line = 1; XXX line numbers
+ lastCR = false;
+ index = 0;
+ forceQuirks = false;
+ additional = '\u0000';
+ entCol = -1;
+ firstCharKey = -1;
+ lo = 0;
+ hi = 0; // will always be overwritten before use anyway
+ candidate = -1;
+ charRefBufMark = 0;
+ value = 0;
+ seenDigits = false;
+ endTag = false;
+ shouldSuspend = false;
+ initDoctypeFields();
+ if (tagName != null) {
+ tagName.release();
+ tagName = null;
+ }
+ if (attributeName != null) {
+ attributeName.release();
+ attributeName = null;
+ }
+ if (newAttributesEachTime) {
+ if (attributes != null) {
+ Portability.delete(attributes);
+ attributes = null;
+ }
+ }
+ }
+
+ public void loadState(Tokenizer other) throws SAXException {
+ strBufLen = other.strBufLen;
+ if (strBufLen > strBuf.length) {
+ strBuf = new char[strBufLen];
+ }
+ System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen);
+
+ charRefBufLen = other.charRefBufLen;
+ System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen);
+
+ stateSave = other.stateSave;
+ returnStateSave = other.returnStateSave;
+ endTagExpectation = other.endTagExpectation;
+ endTagExpectationAsArray = other.endTagExpectationAsArray;
+ // line = 1; XXX line numbers
+ lastCR = other.lastCR;
+ index = other.index;
+ forceQuirks = other.forceQuirks;
+ additional = other.additional;
+ entCol = other.entCol;
+ firstCharKey = other.firstCharKey;
+ lo = other.lo;
+ hi = other.hi;
+ candidate = other.candidate;
+ charRefBufMark = other.charRefBufMark;
+ value = other.value;
+ seenDigits = other.seenDigits;
+ endTag = other.endTag;
+ shouldSuspend = false;
+
+ if (other.doctypeName == null) {
+ doctypeName = null;
+ } else {
+ doctypeName = Portability.newLocalFromLocal(other.doctypeName,
+ interner);
+ }
+
+ Portability.releaseString(systemIdentifier);
+ if (other.systemIdentifier == null) {
+ systemIdentifier = null;
+ } else {
+ systemIdentifier = Portability.newStringFromString(other.systemIdentifier);
+ }
+
+ Portability.releaseString(publicIdentifier);
+ if (other.publicIdentifier == null) {
+ publicIdentifier = null;
+ } else {
+ publicIdentifier = Portability.newStringFromString(other.publicIdentifier);
+ }
+
+ if (tagName != null) {
+ tagName.release();
+ }
+ if (other.tagName == null) {
+ tagName = null;
+ } else {
+ tagName = other.tagName.cloneElementName(interner);
+ }
+
+ if (attributeName != null) {
+ attributeName.release();
+ }
+ if (other.attributeName == null) {
+ attributeName = null;
+ } else {
+ attributeName = other.attributeName.cloneAttributeName(interner);
+ }
+
+ Portability.delete(attributes);
+ if (other.attributes == null) {
+ attributes = null;
+ } else {
+ attributes = other.attributes.cloneAttributes(interner);
+ }
+ }
+
+ public void initializeWithoutStarting() throws SAXException {
+ confident = false;
+ strBuf = null;
+ line = 1;
+ // CPPONLY: attributeLine = 1;
+ // [NOCPP[
+ html4 = false;
+ metaBoundaryPassed = false;
+ wantsComments = tokenHandler.wantsComments();
+ if (!newAttributesEachTime) {
+ attributes = new HtmlAttributes(mappingLangToXmlLang);
+ }
+ // ]NOCPP]
+ resetToDataState();
+ }
+
+ protected void errGarbageAfterLtSlash() throws SAXException {
+ }
+
+ protected void errLtSlashGt() throws SAXException {
+ }
+
+ protected void errWarnLtSlashInRcdata() throws SAXException {
+ }
+
+ protected void errHtml4LtSlashInRcdata(char folded) throws SAXException {
+ }
+
+ protected void errCharRefLacksSemicolon() throws SAXException {
+ }
+
+ protected void errNoDigitsInNCR() throws SAXException {
+ }
+
+ protected void errGtInSystemId() throws SAXException {
+ }
+
+ protected void errGtInPublicId() throws SAXException {
+ }
+
+ protected void errNamelessDoctype() throws SAXException {
+ }
+
+ protected void errConsecutiveHyphens() throws SAXException {
+ }
+
+ protected void errPrematureEndOfComment() throws SAXException {
+ }
+
+ protected void errBogusComment() throws SAXException {
+ }
+
+ protected void errUnquotedAttributeValOrNull(char c) throws SAXException {
+ }
+
+ protected void errSlashNotFollowedByGt() throws SAXException {
+ }
+
+ protected void errHtml4XmlVoidSyntax() throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenAttributes() throws SAXException {
+ }
+
+ protected void errHtml4NonNameInUnquotedAttribute(char c)
+ throws SAXException {
+ }
+
+ protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
+ throws SAXException {
+ }
+
+ protected void errAttributeValueMissing() throws SAXException {
+ }
+
+ protected void errBadCharBeforeAttributeNameOrNull(char c)
+ throws SAXException {
+ }
+
+ protected void errEqualsSignBeforeAttributeName() throws SAXException {
+ }
+
+ protected void errBadCharAfterLt(char c) throws SAXException {
+ }
+
+ protected void errLtGt() throws SAXException {
+ }
+
+ protected void errProcessingInstruction() throws SAXException {
+ }
+
+ protected void errUnescapedAmpersandInterpretedAsCharacterReference()
+ throws SAXException {
+ }
+
+ protected void errNotSemicolonTerminated() throws SAXException {
+ }
+
+ protected void errNoNamedCharacterMatch() throws SAXException {
+ }
+
+ protected void errQuoteBeforeAttributeName(char c) throws SAXException {
+ }
+
+ protected void errQuoteOrLtInAttributeNameOrNull(char c)
+ throws SAXException {
+ }
+
+ protected void errExpectedPublicId() throws SAXException {
+ }
+
+ protected void errBogusDoctype() throws SAXException {
+ }
+
+ protected void maybeWarnPrivateUseAstral() throws SAXException {
+ }
+
+ protected void maybeWarnPrivateUse(char ch) throws SAXException {
+ }
+
+ protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
+ throws SAXException {
+ }
+
+ protected void maybeErrSlashInEndTag(boolean selfClosing)
+ throws SAXException {
+ }
+
+ protected char errNcrNonCharacter(char ch) throws SAXException {
+ return ch;
+ }
+
+ protected void errAstralNonCharacter(int ch) throws SAXException {
+ }
+
+ protected void errNcrSurrogate() throws SAXException {
+ }
+
+ protected char errNcrControlChar(char ch) throws SAXException {
+ return ch;
+ }
+
+ protected void errNcrCr() throws SAXException {
+ }
+
+ protected void errNcrInC1Range() throws SAXException {
+ }
+
+ protected void errEofInPublicId() throws SAXException {
+ }
+
+ protected void errEofInComment() throws SAXException {
+ }
+
+ protected void errEofInDoctype() throws SAXException {
+ }
+
+ protected void errEofInAttributeValue() throws SAXException {
+ }
+
+ protected void errEofInAttributeName() throws SAXException {
+ }
+
+ protected void errEofWithoutGt() throws SAXException {
+ }
+
+ protected void errEofInTagName() throws SAXException {
+ }
+
+ protected void errEofInEndTag() throws SAXException {
+ }
+
+ protected void errEofAfterLt() throws SAXException {
+ }
+
+ protected void errNcrOutOfRange() throws SAXException {
+ }
+
+ protected void errNcrUnassigned() throws SAXException {
+ }
+
+ protected void errDuplicateAttribute() throws SAXException {
+ }
+
+ protected void errEofInSystemId() throws SAXException {
+ }
+
+ protected void errExpectedSystemId() throws SAXException {
+ }
+
+ protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
+ }
+
+ protected void errHyphenHyphenBang() throws SAXException {
+ }
+
+ protected void errNcrControlChar() throws SAXException {
+ }
+
+ protected void errNcrZero() throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
+ throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException {
+ }
+
+ protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
+ throws SAXException {
+ }
+
+ protected void noteAttributeWithoutValue() throws SAXException {
+ }
+
+ protected void noteUnquotedAttributeValue() throws SAXException {
+ }
+
+ /**
+ * Sets the encodingDeclarationHandler.
+ *
+ * @param encodingDeclarationHandler
+ * the encodingDeclarationHandler to set
+ */
+ public void setEncodingDeclarationHandler(
+ EncodingDeclarationHandler encodingDeclarationHandler) {
+ this.encodingDeclarationHandler = encodingDeclarationHandler;
+ }
+
+ void destructor() {
+ // The translator will write refcount tracing stuff here
+ Portability.delete(attributes);
+ attributes = null;
+ }
+
+ // [NOCPP[
+
+ /**
+ * Sets an offset to be added to the position reported to
+ * <code>TransitionHandler</code>.
+ *
+ * @param offset the offset
+ */
+ public void setTransitionBaseOffset(int offset) {
+
+ }
+
+ // ]NOCPP]
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java
new file mode 100644
index 000000000..5e83d1847
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java
@@ -0,0 +1,6558 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2015 Mozilla Foundation
+ * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
+ * Foundation, and Opera Software ASA.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * The comments following this one that use the same comment syntax as this
+ * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007
+ * amended as of June 28 2007.
+ * That document came with this statement:
+ * "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and
+ * Opera Software ASA. You are granted a license to use, reproduce and
+ * create derivative works of this document."
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.Const;
+import nu.validator.htmlparser.annotation.IdType;
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.annotation.Literal;
+import nu.validator.htmlparser.annotation.Local;
+import nu.validator.htmlparser.annotation.NoLength;
+import nu.validator.htmlparser.annotation.NsUri;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Interner;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+public abstract class TreeBuilder<T> implements TokenHandler,
+ TreeBuilderState<T> {
+
+ /**
+ * Array version of U+FFFD.
+ */
+ private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+
+ // Start dispatch groups
+
+ final static int OTHER = 0;
+
+ final static int A = 1;
+
+ final static int BASE = 2;
+
+ final static int BODY = 3;
+
+ final static int BR = 4;
+
+ final static int BUTTON = 5;
+
+ final static int CAPTION = 6;
+
+ final static int COL = 7;
+
+ final static int COLGROUP = 8;
+
+ final static int FORM = 9;
+
+ final static int FRAME = 10;
+
+ final static int FRAMESET = 11;
+
+ final static int IMAGE = 12;
+
+ final static int INPUT = 13;
+
+ final static int ISINDEX = 14;
+
+ final static int LI = 15;
+
+ final static int LINK_OR_BASEFONT_OR_BGSOUND = 16;
+
+ final static int MATH = 17;
+
+ final static int META = 18;
+
+ final static int SVG = 19;
+
+ final static int HEAD = 20;
+
+ final static int HR = 22;
+
+ final static int HTML = 23;
+
+ final static int NOBR = 24;
+
+ final static int NOFRAMES = 25;
+
+ final static int NOSCRIPT = 26;
+
+ final static int OPTGROUP = 27;
+
+ final static int OPTION = 28;
+
+ final static int P = 29;
+
+ final static int PLAINTEXT = 30;
+
+ final static int SCRIPT = 31;
+
+ final static int SELECT = 32;
+
+ final static int STYLE = 33;
+
+ final static int TABLE = 34;
+
+ final static int TEXTAREA = 35;
+
+ final static int TITLE = 36;
+
+ final static int TR = 37;
+
+ final static int XMP = 38;
+
+ final static int TBODY_OR_THEAD_OR_TFOOT = 39;
+
+ final static int TD_OR_TH = 40;
+
+ final static int DD_OR_DT = 41;
+
+ final static int H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42;
+
+ final static int MARQUEE_OR_APPLET = 43;
+
+ final static int PRE_OR_LISTING = 44;
+
+ final static int B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45;
+
+ final static int UL_OR_OL_OR_DL = 46;
+
+ final static int IFRAME = 47;
+
+ final static int EMBED = 48;
+
+ final static int AREA_OR_WBR = 49;
+
+ final static int DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50;
+
+ final static int ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY = 51;
+
+ final static int RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52;
+
+ final static int RB_OR_RTC = 53;
+
+ final static int PARAM_OR_SOURCE_OR_TRACK = 55;
+
+ final static int MGLYPH_OR_MALIGNMARK = 56;
+
+ final static int MI_MO_MN_MS_MTEXT = 57;
+
+ final static int ANNOTATION_XML = 58;
+
+ final static int FOREIGNOBJECT_OR_DESC = 59;
+
+ final static int NOEMBED = 60;
+
+ final static int FIELDSET = 61;
+
+ final static int OUTPUT = 62;
+
+ final static int OBJECT = 63;
+
+ final static int FONT = 64;
+
+ final static int KEYGEN = 65;
+
+ final static int MENUITEM = 66;
+
+ final static int TEMPLATE = 67;
+
+ final static int IMG = 68;
+
+ final static int RT_OR_RP = 69;
+
+ // start insertion modes
+
+ private static final int IN_ROW = 0;
+
+ private static final int IN_TABLE_BODY = 1;
+
+ private static final int IN_TABLE = 2;
+
+ private static final int IN_CAPTION = 3;
+
+ private static final int IN_CELL = 4;
+
+ private static final int FRAMESET_OK = 5;
+
+ private static final int IN_BODY = 6;
+
+ private static final int IN_HEAD = 7;
+
+ private static final int IN_HEAD_NOSCRIPT = 8;
+
+ // no fall-through
+
+ private static final int IN_COLUMN_GROUP = 9;
+
+ // no fall-through
+
+ private static final int IN_SELECT_IN_TABLE = 10;
+
+ private static final int IN_SELECT = 11;
+
+ // no fall-through
+
+ private static final int AFTER_BODY = 12;
+
+ // no fall-through
+
+ private static final int IN_FRAMESET = 13;
+
+ private static final int AFTER_FRAMESET = 14;
+
+ // no fall-through
+
+ private static final int INITIAL = 15;
+
+ // could add fall-through
+
+ private static final int BEFORE_HTML = 16;
+
+ // could add fall-through
+
+ private static final int BEFORE_HEAD = 17;
+
+ // no fall-through
+
+ private static final int AFTER_HEAD = 18;
+
+ // no fall-through
+
+ private static final int AFTER_AFTER_BODY = 19;
+
+ // no fall-through
+
+ private static final int AFTER_AFTER_FRAMESET = 20;
+
+ // no fall-through
+
+ private static final int TEXT = 21;
+
+ private static final int IN_TEMPLATE = 22;
+
+ // start charset states
+
+ private static final int CHARSET_INITIAL = 0;
+
+ private static final int CHARSET_C = 1;
+
+ private static final int CHARSET_H = 2;
+
+ private static final int CHARSET_A = 3;
+
+ private static final int CHARSET_R = 4;
+
+ private static final int CHARSET_S = 5;
+
+ private static final int CHARSET_E = 6;
+
+ private static final int CHARSET_T = 7;
+
+ private static final int CHARSET_EQUALS = 8;
+
+ private static final int CHARSET_SINGLE_QUOTED = 9;
+
+ private static final int CHARSET_DOUBLE_QUOTED = 10;
+
+ private static final int CHARSET_UNQUOTED = 11;
+
+ // end pseudo enums
+
+ // [NOCPP[
+
+ private final static String[] HTML4_PUBLIC_IDS = {
+ "-//W3C//DTD HTML 4.0 Frameset//EN",
+ "-//W3C//DTD HTML 4.0 Transitional//EN",
+ "-//W3C//DTD HTML 4.0//EN", "-//W3C//DTD HTML 4.01 Frameset//EN",
+ "-//W3C//DTD HTML 4.01 Transitional//EN",
+ "-//W3C//DTD HTML 4.01//EN" };
+
+ // ]NOCPP]
+
+ @Literal private final static String[] QUIRKY_PUBLIC_IDS = {
+ "+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//" };
+
+ private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE;
+
+ // [NOCPP[
+
+ private static final @Local String HTML_LOCAL = "html";
+
+ // ]NOCPP]
+
+ private int mode = INITIAL;
+
+ private int originalMode = INITIAL;
+
+ /**
+ * Used only when moving back to IN_BODY.
+ */
+ private boolean framesetOk = true;
+
+ protected Tokenizer tokenizer;
+
+ // [NOCPP[
+
+ protected ErrorHandler errorHandler;
+
+ private DocumentModeHandler documentModeHandler;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private LocatorImpl firstCommentLocation;
+
+ // ]NOCPP]
+
+ private boolean scriptingEnabled = false;
+
+ private boolean needToDropLF;
+
+ // [NOCPP[
+
+ private boolean wantingComments;
+
+ // ]NOCPP]
+
+ private boolean fragment;
+
+ private @Local String contextName;
+
+ private @NsUri String contextNamespace;
+
+ private T contextNode;
+
+ /**
+ * Stack of template insertion modes
+ */
+ private @Auto int[] templateModeStack;
+
+ /**
+ * Current template mode stack pointer.
+ */
+ private int templateModePtr = -1;
+
+ private @Auto StackNode<T>[] stack;
+
+ private int currentPtr = -1;
+
+ private @Auto StackNode<T>[] listOfActiveFormattingElements;
+
+ private int listPtr = -1;
+
+ private T formPointer;
+
+ private T headPointer;
+
+ /**
+ * Used to work around Gecko limitations. Not used in Java.
+ */
+ private T deepTreeSurrogateParent;
+
+ protected @Auto char[] charBuffer;
+
+ protected int charBufferLen = 0;
+
+ private boolean quirks = false;
+
+ private boolean isSrcdocDocument = false;
+
+ // [NOCPP[
+
+ private boolean reportingDoctype = true;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
+
+ private final Map<String, LocatorImpl> idLocations = new HashMap<String, LocatorImpl>();
+
+ private boolean html4;
+
+ // ]NOCPP]
+
+ protected TreeBuilder() {
+ fragment = false;
+ }
+
+ /**
+ * Reports an condition that would make the infoset incompatible with XML
+ * 1.0 as fatal.
+ *
+ * @throws SAXException
+ * @throws SAXParseException
+ */
+ protected void fatal() throws SAXException {
+ }
+
+ // [NOCPP[
+
+ protected final void fatal(Exception e) throws SAXException {
+ SAXParseException spe = new SAXParseException(e.getMessage(),
+ tokenizer, e);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ final void fatal(String s) throws SAXException {
+ SAXParseException spe = new SAXParseException(s, tokenizer);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ /**
+ * Reports a Parse Error.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void err(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck(message);
+ }
+
+ /**
+ * Reports a Parse Error without checking if an error handler is present.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void errNoCheck(String message) throws SAXException {
+ SAXParseException spe = new SAXParseException(message, tokenizer);
+ errorHandler.error(spe);
+ }
+
+ private void errListUnclosedStartTags(int eltPos) throws SAXException {
+ if (currentPtr != -1) {
+ for (int i = currentPtr; i > eltPos; i--) {
+ reportUnclosedElementNameAndLocation(i);
+ }
+ }
+ }
+
+ /**
+ * Reports the name and location of an unclosed element.
+ *
+ * @throws SAXException
+ */
+ private final void reportUnclosedElementNameAndLocation(int pos) throws SAXException {
+ StackNode<T> node = stack[pos];
+ if (node.isOptionalEndTag()) {
+ return;
+ }
+ TaintableLocatorImpl locator = node.getLocator();
+ if (locator.isTainted()) {
+ return;
+ }
+ locator.markTainted();
+ SAXParseException spe = new SAXParseException(
+ "Unclosed element \u201C" + node.popName + "\u201D.", locator);
+ errorHandler.error(spe);
+ }
+
+ /**
+ * Reports a warning
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void warn(String message) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, tokenizer);
+ errorHandler.warning(spe);
+ }
+
+ /**
+ * Reports a warning with an explicit locator
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ final void warn(String message, Locator locator) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, locator);
+ errorHandler.warning(spe);
+ }
+
+ // ]NOCPP]
+
+ @SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException {
+ tokenizer = self;
+ stack = new StackNode[64];
+ templateModeStack = new int[64];
+ listOfActiveFormattingElements = new StackNode[64];
+ needToDropLF = false;
+ originalMode = INITIAL;
+ templateModePtr = -1;
+ currentPtr = -1;
+ listPtr = -1;
+ formPointer = null;
+ headPointer = null;
+ deepTreeSurrogateParent = null;
+ // [NOCPP[
+ html4 = false;
+ idLocations.clear();
+ wantingComments = wantsComments();
+ firstCommentLocation = null;
+ // ]NOCPP]
+ start(fragment);
+ charBufferLen = 0;
+ charBuffer = null;
+ framesetOk = true;
+ if (fragment) {
+ T elt;
+ if (contextNode != null) {
+ elt = contextNode;
+ } else {
+ elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes());
+ }
+ // When the context node is not in the HTML namespace, contrary
+ // to the spec, the first node on the stack is not set to "html"
+ // in the HTML namespace. Instead, it is set to a node that has
+ // the characteristics of the appropriate "adjusted current node".
+ // This way, there is no need to perform "adjusted current node"
+ // checks during tree construction. Instead, it's sufficient to
+ // just look at the current node. However, this also means that it
+ // is not safe to treat "html" in the HTML namespace as a sentinel
+ // that ends stack popping. Instead, stack popping loops that are
+ // meant not to pop the first element on the stack need to check
+ // for currentPos becoming zero.
+ if (contextNamespace == "http://www.w3.org/2000/svg") {
+ ElementName elementName = ElementName.SVG;
+ if ("title" == contextName || "desc" == contextName
+ || "foreignObject" == contextName) {
+ // These elements are all alike and we don't care about
+ // the exact name.
+ elementName = ElementName.FOREIGNOBJECT;
+ }
+ // This is the SVG variant of the StackNode constructor.
+ StackNode<T> node = new StackNode<T>(elementName,
+ elementName.camelCaseName, elt
+ // [NOCPP[
+ , errorHandler == null ? null
+ : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ currentPtr++;
+ stack[currentPtr] = node;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
+ contextName);
+ // The frameset-ok flag is set even though <frameset> never
+ // ends up being allowed as HTML frameset in the fragment case.
+ mode = FRAMESET_OK;
+ } else if (contextNamespace == "http://www.w3.org/1998/Math/MathML") {
+ ElementName elementName = ElementName.MATH;
+ if ("mi" == contextName || "mo" == contextName
+ || "mn" == contextName || "ms" == contextName
+ || "mtext" == contextName) {
+ // These elements are all alike and we don't care about
+ // the exact name.
+ elementName = ElementName.MTEXT;
+ } else if ("annotation-xml" == contextName) {
+ elementName = ElementName.ANNOTATION_XML;
+ // Blink does not check the encoding attribute of the
+ // annotation-xml element innerHTML is being set on.
+ // Let's do the same at least until
+ // https://www.w3.org/Bugs/Public/show_bug.cgi?id=26783
+ // is resolved.
+ }
+ // This is the MathML variant of the StackNode constructor.
+ StackNode<T> node = new StackNode<T>(elementName, elt,
+ elementName.name, false
+ // [NOCPP[
+ , errorHandler == null ? null
+ : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ currentPtr++;
+ stack[currentPtr] = node;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
+ contextName);
+ // The frameset-ok flag is set even though <frameset> never
+ // ends up being allowed as HTML frameset in the fragment case.
+ mode = FRAMESET_OK;
+ } else { // html
+ StackNode<T> node = new StackNode<T>(ElementName.HTML, elt
+ // [NOCPP[
+ , errorHandler == null ? null
+ : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ currentPtr++;
+ stack[currentPtr] = node;
+ if ("template" == contextName) {
+ pushTemplateMode(IN_TEMPLATE);
+ }
+ resetTheInsertionMode();
+ formPointer = getFormPointerForContext(contextNode);
+ if ("title" == contextName || "textarea" == contextName) {
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA,
+ contextName);
+ } else if ("style" == contextName || "xmp" == contextName
+ || "iframe" == contextName || "noembed" == contextName
+ || "noframes" == contextName
+ || (scriptingEnabled && "noscript" == contextName)) {
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT,
+ contextName);
+ } else if ("plaintext" == contextName) {
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT,
+ contextName);
+ } else if ("script" == contextName) {
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.SCRIPT_DATA, contextName);
+ } else {
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
+ contextName);
+ }
+ }
+ contextName = null;
+ contextNode = null;
+ } else {
+ mode = INITIAL;
+ // If we are viewing XML source, put a foreign element permanently
+ // on the stack so that cdataSectionAllowed() returns true.
+ // CPPONLY: if (tokenizer.isViewingXmlSource()) {
+ // CPPONLY: T elt = createElement("http://www.w3.org/2000/svg",
+ // CPPONLY: "svg",
+ // CPPONLY: tokenizer.emptyAttributes(), null);
+ // CPPONLY: StackNode<T> node = new StackNode<T>(ElementName.SVG,
+ // CPPONLY: "svg",
+ // CPPONLY: elt);
+ // CPPONLY: currentPtr++;
+ // CPPONLY: stack[currentPtr] = node;
+ // CPPONLY: }
+ }
+ }
+
+ public final void doctype(@Local String name, String publicIdentifier,
+ String systemIdentifier, boolean forceQuirks) throws SAXException {
+ needToDropLF = false;
+ if (!isInForeign() && mode == INITIAL) {
+ // [NOCPP[
+ if (reportingDoctype) {
+ // ]NOCPP]
+ String emptyString = Portability.newEmptyString();
+ appendDoctypeToDocument(name == null ? "" : name,
+ publicIdentifier == null ? emptyString
+ : publicIdentifier,
+ systemIdentifier == null ? emptyString
+ : systemIdentifier);
+ Portability.releaseString(emptyString);
+ // [NOCPP[
+ }
+ switch (doctypeExpectation) {
+ case HTML:
+ // ]NOCPP]
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ errQuirkyDoctype();
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ // [NOCPP[
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ // ]NOCPP]
+ errAlmostStandardsDoctype();
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else {
+ // [NOCPP[
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ((Portability.literalEqualsString(
+ "-//W3C//DTD HTML 4.0//EN", publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
+ "http://www.w3.org/TR/REC-html40/strict.dtd",
+ systemIdentifier)))
+ || (Portability.literalEqualsString(
+ "-//W3C//DTD HTML 4.01//EN",
+ publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
+ "http://www.w3.org/TR/html4/strict.dtd",
+ systemIdentifier)))
+ || (Portability.literalEqualsString(
+ "-//W3C//DTD XHTML 1.0 Strict//EN",
+ publicIdentifier) && Portability.literalEqualsString(
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd",
+ systemIdentifier))
+ || (Portability.literalEqualsString(
+ "-//W3C//DTD XHTML 1.1//EN",
+ publicIdentifier) && Portability.literalEqualsString(
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd",
+ systemIdentifier))
+
+ ) {
+ warn("Obsolete doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ } else if (!((systemIdentifier == null || Portability.literalEqualsString(
+ "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) {
+ err("Legacy doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ // ]NOCPP]
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ }
+ // [NOCPP[
+ break;
+ case HTML401_STRICT:
+ html4 = true;
+ tokenizer.turnOnAdditionalHtml4Errors();
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ err("Almost standards mode doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ }
+ } else {
+ err("The doctype was not the HTML 4.01 Strict doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ }
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ }
+ break;
+ case HTML401_TRANSITIONAL:
+ html4 = true;
+ tokenizer.turnOnAdditionalHtml4Errors();
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)
+ && systemIdentifier != null) {
+ if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ }
+ } else {
+ err("The doctype was not a non-quirky HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ }
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ } else {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ err("The doctype was not the HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, true);
+ }
+ break;
+ case AUTO:
+ html4 = isHtml4Doctype(publicIdentifier);
+ if (html4) {
+ tokenizer.turnOnAdditionalHtml4Errors();
+ }
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ err("Quirky doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, html4);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ }
+ } else {
+ err("Almost standards mode doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ }
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, html4);
+ } else {
+ if (firstCommentLocation != null) {
+ warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
+ firstCommentLocation);
+ }
+ if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ }
+ } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the XHTML 1.0 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\u201D.");
+ }
+ } else if ("//W3C//DTD XHTML 1.1//EN".equals(publicIdentifier)) {
+ if (!"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd".equals(systemIdentifier)) {
+ warn("The doctype did not contain the system identifier prescribed by the XHTML 1.1 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\u201D.");
+ }
+ } else if (!((systemIdentifier == null || Portability.literalEqualsString(
+ "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) {
+ err("Unexpected doctype. Expected, e.g., \u201C<!DOCTYPE html>\u201D.");
+ }
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, html4);
+ }
+ break;
+ case NO_DOCTYPE_ERRORS:
+ if (isQuirky(name, publicIdentifier, systemIdentifier,
+ forceQuirks)) {
+ documentModeInternal(DocumentMode.QUIRKS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else if (isAlmostStandards(publicIdentifier,
+ systemIdentifier)) {
+ documentModeInternal(
+ DocumentMode.ALMOST_STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ } else {
+ documentModeInternal(DocumentMode.STANDARDS_MODE,
+ publicIdentifier, systemIdentifier, false);
+ }
+ break;
+ }
+ // ]NOCPP]
+
+ /*
+ *
+ * Then, switch to the root element mode of the tree construction
+ * stage.
+ */
+ mode = BEFORE_HTML;
+ return;
+ }
+ /*
+ * A DOCTYPE token Parse error.
+ */
+ errStrayDoctype();
+ /*
+ * Ignore the token.
+ */
+ return;
+ }
+
+ // [NOCPP[
+
+ private boolean isHtml4Doctype(String publicIdentifier) {
+ if (publicIdentifier != null
+ && (Arrays.binarySearch(TreeBuilder.HTML4_PUBLIC_IDS,
+ publicIdentifier) > -1)) {
+ return true;
+ }
+ return false;
+ }
+
+ // ]NOCPP]
+
+ public final void comment(@NoLength char[] buf, int start, int length)
+ throws SAXException {
+ needToDropLF = false;
+ // [NOCPP[
+ if (firstCommentLocation == null) {
+ firstCommentLocation = new LocatorImpl(tokenizer);
+ }
+ if (!wantingComments) {
+ return;
+ }
+ // ]NOCPP]
+ if (!isInForeign()) {
+ switch (mode) {
+ case INITIAL:
+ case BEFORE_HTML:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ /*
+ * A comment token Append a Comment node to the Document
+ * object with the data attribute set to the data given in
+ * the comment token.
+ */
+ appendCommentToDocument(buf, start, length);
+ return;
+ case AFTER_BODY:
+ /*
+ * A comment token Append a Comment node to the first
+ * element in the stack of open elements (the html element),
+ * with the data attribute set to the data given in the
+ * comment token.
+ */
+ flushCharacters();
+ appendComment(stack[0].node, buf, start, length);
+ return;
+ default:
+ break;
+ }
+ }
+ /*
+ * A comment token Append a Comment node to the current node with the
+ * data attribute set to the data given in the comment token.
+ */
+ flushCharacters();
+ appendComment(stack[currentPtr].node, buf, start, length);
+ return;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int,
+ * int)
+ */
+ public final void characters(@Const @NoLength char[] buf, int start, int length)
+ throws SAXException {
+ // Note: Can't attach error messages to EOF in C++ yet
+
+ // CPPONLY: if (tokenizer.isViewingXmlSource()) {
+ // CPPONLY: return;
+ // CPPONLY: }
+ if (needToDropLF) {
+ needToDropLF = false;
+ if (buf[start] == '\n') {
+ start++;
+ length--;
+ if (length == 0) {
+ return;
+ }
+ }
+ }
+
+ // optimize the most common case
+ switch (mode) {
+ case IN_BODY:
+ case IN_CELL:
+ case IN_CAPTION:
+ if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
+ reconstructTheActiveFormattingElements();
+ }
+ // fall through
+ case TEXT:
+ accumulateCharacters(buf, start, length);
+ return;
+ case IN_TABLE:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ accumulateCharactersForced(buf, start, length);
+ return;
+ default:
+ int end = start + length;
+ charactersloop: for (int i = start; i < end; i++) {
+ switch (buf[i]) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\u000C':
+ /*
+ * A character token that is one of one of U+0009
+ * CHARACTER TABULATION, U+000A LINE FEED (LF),
+ * U+000C FORM FEED (FF), or U+0020 SPACE
+ */
+ switch (mode) {
+ case INITIAL:
+ case BEFORE_HTML:
+ case BEFORE_HEAD:
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ case IN_HEAD:
+ case IN_HEAD_NOSCRIPT:
+ case AFTER_HEAD:
+ case IN_COLUMN_GROUP:
+ case IN_FRAMESET:
+ case AFTER_FRAMESET:
+ /*
+ * Append the character to the current node.
+ */
+ continue;
+ case FRAMESET_OK:
+ case IN_TEMPLATE:
+ case IN_BODY:
+ case IN_CELL:
+ case IN_CAPTION:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+
+ /*
+ * Reconstruct the active formatting
+ * elements, if any.
+ */
+ if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
+ flushCharacters();
+ reconstructTheActiveFormattingElements();
+ }
+ /*
+ * Append the token's character to the
+ * current node.
+ */
+ break charactersloop;
+ case IN_SELECT:
+ case IN_SELECT_IN_TABLE:
+ break charactersloop;
+ case IN_TABLE:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ accumulateCharactersForced(buf, i, 1);
+ start = i + 1;
+ continue;
+ case AFTER_BODY:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Reconstruct the active formatting
+ * elements, if any.
+ */
+ flushCharacters();
+ reconstructTheActiveFormattingElements();
+ /*
+ * Append the token's character to the
+ * current node.
+ */
+ continue;
+ }
+ default:
+ /*
+ * A character token that is not one of one of
+ * U+0009 CHARACTER TABULATION, U+000A LINE FEED
+ * (LF), U+000C FORM FEED (FF), or U+0020 SPACE
+ */
+ switch (mode) {
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("Non-space characters found without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ // XXX figure out a way to report this in the Gecko View Source case
+ err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML401_STRICT:
+ err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(
+ DocumentMode.QUIRKS_MODE, null,
+ null, false);
+ /*
+ * Then, switch to the root element mode of
+ * the tree construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ i--;
+ continue;
+ case BEFORE_HTML:
+ /*
+ * Create an HTMLElement node with the tag
+ * name html, in the HTML namespace. Append
+ * it to the Document object.
+ */
+ // No need to flush characters here,
+ // because there's nothing to flush.
+ appendHtmlElementToDocumentAndPush();
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ i--;
+ continue;
+ case BEFORE_HEAD:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * /Act as if a start tag token with the tag
+ * name "head" and no attributes had been
+ * seen,
+ */
+ flushCharacters();
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ /*
+ * then reprocess the current token.
+ *
+ * This will result in an empty head element
+ * being generated, with the current token
+ * being reprocessed in the "after head"
+ * insertion mode.
+ */
+ i--;
+ continue;
+ case IN_HEAD:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Act as if an end tag token with the tag
+ * name "head" had been seen,
+ */
+ flushCharacters();
+ pop();
+ mode = AFTER_HEAD;
+ /*
+ * and reprocess the current token.
+ */
+ i--;
+ continue;
+ case IN_HEAD_NOSCRIPT:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Parse error. Act as if an end tag with
+ * the tag name "noscript" had been seen
+ */
+ errNonSpaceInNoscriptInHead();
+ flushCharacters();
+ pop();
+ mode = IN_HEAD;
+ /*
+ * and reprocess the current token.
+ */
+ i--;
+ continue;
+ case AFTER_HEAD:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Act as if a start tag token with the tag
+ * name "body" and no attributes had been
+ * seen,
+ */
+ flushCharacters();
+ appendToCurrentNodeAndPushBodyElement();
+ mode = FRAMESET_OK;
+ /*
+ * and then reprocess the current token.
+ */
+ i--;
+ continue;
+ case FRAMESET_OK:
+ framesetOk = false;
+ mode = IN_BODY;
+ i--;
+ continue;
+ case IN_TEMPLATE:
+ case IN_BODY:
+ case IN_CELL:
+ case IN_CAPTION:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Reconstruct the active formatting
+ * elements, if any.
+ */
+ if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
+ flushCharacters();
+ reconstructTheActiveFormattingElements();
+ }
+ /*
+ * Append the token's character to the
+ * current node.
+ */
+ break charactersloop;
+ case IN_TABLE:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ accumulateCharactersForced(buf, i, 1);
+ start = i + 1;
+ continue;
+ case IN_COLUMN_GROUP:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ start = i;
+ }
+ /*
+ * Act as if an end tag with the tag name
+ * "colgroup" had been seen, and then, if
+ * that token wasn't ignored, reprocess the
+ * current token.
+ */
+ if (currentPtr == 0 || stack[currentPtr].getGroup() ==
+ TreeBuilder.TEMPLATE) {
+ errNonSpaceInColgroupInFragment();
+ start = i + 1;
+ continue;
+ }
+ flushCharacters();
+ pop();
+ mode = IN_TABLE;
+ i--;
+ continue;
+ case IN_SELECT:
+ case IN_SELECT_IN_TABLE:
+ break charactersloop;
+ case AFTER_BODY:
+ errNonSpaceAfterBody();
+ fatal();
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ i--;
+ continue;
+ case IN_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ // start index is adjusted below.
+ }
+ /*
+ * Parse error.
+ */
+ errNonSpaceInFrameset();
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ case AFTER_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ // start index is adjusted below.
+ }
+ /*
+ * Parse error.
+ */
+ errNonSpaceAfterFrameset();
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ case AFTER_AFTER_BODY:
+ /*
+ * Parse error.
+ */
+ errNonSpaceInTrailer();
+ /*
+ * Switch back to the main mode and
+ * reprocess the token.
+ */
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ i--;
+ continue;
+ case AFTER_AFTER_FRAMESET:
+ if (start < i) {
+ accumulateCharacters(buf, start, i
+ - start);
+ // start index is adjusted below.
+ }
+ /*
+ * Parse error.
+ */
+ errNonSpaceInTrailer();
+ /*
+ * Ignore the token.
+ */
+ start = i + 1;
+ continue;
+ }
+ }
+ }
+ if (start < end) {
+ accumulateCharacters(buf, start, end - start);
+ }
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter()
+ */
+ public void zeroOriginatingReplacementCharacter() throws SAXException {
+ if (mode == TEXT) {
+ accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
+ return;
+ }
+ if (currentPtr >= 0) {
+ if (isSpecialParentInForeign(stack[currentPtr])) {
+ return;
+ }
+ accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
+ }
+ }
+
+ public final void eof() throws SAXException {
+ flushCharacters();
+ // Note: Can't attach error messages to EOF in C++ yet
+ eofloop: for (;;) {
+ switch (mode) {
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("End of file seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML401_STRICT:
+ err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
+ false);
+ /*
+ * Then, switch to the root element mode of the tree
+ * construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ continue;
+ case BEFORE_HTML:
+ /*
+ * Create an HTMLElement node with the tag name html, in the
+ * HTML namespace. Append it to the Document object.
+ */
+ appendHtmlElementToDocumentAndPush();
+ // XXX application cache manifest
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ continue;
+ case BEFORE_HEAD:
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ continue;
+ case IN_HEAD:
+ // [NOCPP[
+ if (errorHandler != null && currentPtr > 1) {
+ errEofWithUnclosedElements();
+ }
+ // ]NOCPP]
+ while (currentPtr > 0) {
+ popOnEof();
+ }
+ mode = AFTER_HEAD;
+ continue;
+ case IN_HEAD_NOSCRIPT:
+ // [NOCPP[
+ errEofWithUnclosedElements();
+ // ]NOCPP]
+ while (currentPtr > 1) {
+ popOnEof();
+ }
+ mode = IN_HEAD;
+ continue;
+ case AFTER_HEAD:
+ appendToCurrentNodeAndPushBodyElement();
+ mode = IN_BODY;
+ continue;
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ case IN_TABLE:
+ case IN_SELECT_IN_TABLE:
+ case IN_SELECT:
+ case IN_COLUMN_GROUP:
+ case FRAMESET_OK:
+ case IN_CAPTION:
+ case IN_CELL:
+ case IN_BODY:
+ // [NOCPP[
+ // i > 0 to stop in time in the foreign fragment case.
+ openelementloop: for (int i = currentPtr; i > 0; i--) {
+ int group = stack[i].getGroup();
+ switch (group) {
+ case DD_OR_DT:
+ case LI:
+ case P:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case BODY:
+ case HTML:
+ break;
+ default:
+ errEofWithUnclosedElements();
+ break openelementloop;
+ }
+ }
+ // ]NOCPP]
+
+ if (isTemplateModeStackEmpty()) {
+ break eofloop;
+ }
+
+ // fall through to IN_TEMPLATE
+ case IN_TEMPLATE:
+ int eltPos = findLast("template");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break eofloop;
+ }
+ if (errorHandler != null) {
+ errUnclosedElements(eltPos, "template");
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ popTemplateMode();
+ resetTheInsertionMode();
+
+ // Reprocess token.
+ continue;
+ case TEXT:
+ // [NOCPP[
+ if (errorHandler != null) {
+ errNoCheck("End of file seen when expecting text or an end tag.");
+ errListUnclosedStartTags(0);
+ }
+ // ]NOCPP]
+ // XXX mark script as already executed
+ if (originalMode == AFTER_HEAD) {
+ popOnEof();
+ }
+ popOnEof();
+ mode = originalMode;
+ continue;
+ case IN_FRAMESET:
+ // [NOCPP[
+ if (errorHandler != null && currentPtr > 0) {
+ errEofWithUnclosedElements();
+ }
+ // ]NOCPP]
+ break eofloop;
+ case AFTER_BODY:
+ case AFTER_FRAMESET:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ default:
+ // [NOCPP[
+ if (currentPtr == 0) { // This silliness is here to poison
+ // buggy compiler optimizations in
+ // GWT
+ System.currentTimeMillis();
+ }
+ // ]NOCPP]
+ break eofloop;
+ }
+ }
+ while (currentPtr > 0) {
+ popOnEof();
+ }
+ if (!fragment) {
+ popOnEof();
+ }
+ /* Stop parsing. */
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#endTokenization()
+ */
+ public final void endTokenization() throws SAXException {
+ formPointer = null;
+ headPointer = null;
+ deepTreeSurrogateParent = null;
+ templateModeStack = null;
+ if (stack != null) {
+ while (currentPtr > -1) {
+ stack[currentPtr].release();
+ currentPtr--;
+ }
+ stack = null;
+ }
+ if (listOfActiveFormattingElements != null) {
+ while (listPtr > -1) {
+ if (listOfActiveFormattingElements[listPtr] != null) {
+ listOfActiveFormattingElements[listPtr].release();
+ }
+ listPtr--;
+ }
+ listOfActiveFormattingElements = null;
+ }
+ // [NOCPP[
+ idLocations.clear();
+ // ]NOCPP]
+ charBuffer = null;
+ end();
+ }
+
+ public final void startTag(ElementName elementName,
+ HtmlAttributes attributes, boolean selfClosing) throws SAXException {
+ flushCharacters();
+
+ // [NOCPP[
+ if (errorHandler != null) {
+ // ID uniqueness
+ @IdType String id = attributes.getId();
+ if (id != null) {
+ LocatorImpl oldLoc = idLocations.get(id);
+ if (oldLoc != null) {
+ err("Duplicate ID \u201C" + id + "\u201D.");
+ errorHandler.warning(new SAXParseException(
+ "The first occurrence of ID \u201C" + id
+ + "\u201D was here.", oldLoc));
+ } else {
+ idLocations.put(id, new LocatorImpl(tokenizer));
+ }
+ }
+ }
+ // ]NOCPP]
+
+ int eltPos;
+ needToDropLF = false;
+ starttagloop: for (;;) {
+ int group = elementName.getGroup();
+ @Local String name = elementName.name;
+ if (isInForeign()) {
+ StackNode<T> currentNode = stack[currentPtr];
+ @NsUri String currNs = currentNode.ns;
+ if (!(currentNode.isHtmlIntegrationPoint() || (currNs == "http://www.w3.org/1998/Math/MathML" && ((currentNode.getGroup() == MI_MO_MN_MS_MTEXT && group != MGLYPH_OR_MALIGNMARK) || (currentNode.getGroup() == ANNOTATION_XML && group == SVG))))) {
+ switch (group) {
+ case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+ case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+ case BODY:
+ case BR:
+ case RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR:
+ case DD_OR_DT:
+ case UL_OR_OL_OR_DL:
+ case EMBED:
+ case IMG:
+ case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+ case HEAD:
+ case HR:
+ case LI:
+ case META:
+ case NOBR:
+ case P:
+ case PRE_OR_LISTING:
+ case TABLE:
+ case FONT:
+ // re-check FONT to deal with the special case
+ if (!(group == FONT && !(attributes.contains(AttributeName.COLOR)
+ || attributes.contains(AttributeName.FACE) || attributes.contains(AttributeName.SIZE)))) {
+ errHtmlStartTagInForeignContext(name);
+ if (!fragment) {
+ while (!isSpecialParentInForeign(stack[currentPtr])) {
+ pop();
+ }
+ continue starttagloop;
+ } // else fall thru
+ }
+ // else fall thru
+ default:
+ if ("http://www.w3.org/2000/svg" == currNs) {
+ attributes.adjustForSvg();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterSVG(
+ elementName, attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterSVG(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ } else {
+ attributes.adjustForMath();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterMathML(
+ elementName, attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterMathML(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ } // switch
+ } // foreignObject / annotation-xml
+ }
+ switch (mode) {
+ case IN_TEMPLATE:
+ switch (group) {
+ case COL:
+ popTemplateMode();
+ pushTemplateMode(IN_COLUMN_GROUP);
+ mode = IN_COLUMN_GROUP;
+ // Reprocess token.
+ continue;
+ case CAPTION:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ popTemplateMode();
+ pushTemplateMode(IN_TABLE);
+ mode = IN_TABLE;
+ // Reprocess token.
+ continue;
+ case TR:
+ popTemplateMode();
+ pushTemplateMode(IN_TABLE_BODY);
+ mode = IN_TABLE_BODY;
+ // Reprocess token.
+ continue;
+ case TD_OR_TH:
+ popTemplateMode();
+ pushTemplateMode(IN_ROW);
+ mode = IN_ROW;
+ // Reprocess token.
+ continue;
+ case META:
+ checkMetaCharset(attributes);
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case TITLE:
+ startTagTitleInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case SCRIPT:
+ startTagScriptInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOFRAMES:
+ case STYLE:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ popTemplateMode();
+ pushTemplateMode(IN_BODY);
+ mode = IN_BODY;
+ // Reprocess token.
+ continue;
+ }
+ case IN_ROW:
+ switch (group) {
+ case TD_OR_TH:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TR));
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_CELL;
+ insertMarker();
+ attributes = null; // CPP
+ break starttagloop;
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break starttagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ continue;
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE_BODY:
+ switch (group) {
+ case TR:
+ clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot());
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_ROW;
+ attributes = null; // CPP
+ break starttagloop;
+ case TD_OR_TH:
+ errStartTagInTableBody(name);
+ clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot());
+ appendToCurrentNodeAndPushElement(
+ ElementName.TR,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_ROW;
+ continue;
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot();
+ if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errStrayStartTag(name);
+ break starttagloop;
+ } else {
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE;
+ continue;
+ }
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE:
+ intableloop: for (;;) {
+ switch (group) {
+ case CAPTION:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ insertMarker();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_CAPTION;
+ attributes = null; // CPP
+ break starttagloop;
+ case COLGROUP:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_COLUMN_GROUP;
+ attributes = null; // CPP
+ break starttagloop;
+ case COL:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ ElementName.COLGROUP,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_COLUMN_GROUP;
+ continue starttagloop;
+ case TBODY_OR_THEAD_OR_TFOOT:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_TABLE_BODY;
+ attributes = null; // CPP
+ break starttagloop;
+ case TR:
+ case TD_OR_TH:
+ clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
+ appendToCurrentNodeAndPushElement(
+ ElementName.TBODY,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_TABLE_BODY;
+ continue starttagloop;
+ case TEMPLATE:
+ // fall through to IN_HEAD
+ break intableloop;
+ case TABLE:
+ errTableSeenWhileTableOpen();
+ eltPos = findLastInTableScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment || isTemplateContents();
+ break starttagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent("table")) {
+ errNoCheckUnclosedElementsOnStack();
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue starttagloop;
+ case SCRIPT:
+ // XXX need to manage much more stuff
+ // here if
+ // supporting
+ // document.write()
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.SCRIPT_DATA, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case INPUT:
+ errStartTagInTable(name);
+ if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "hidden",
+ attributes.getValue(AttributeName.TYPE))) {
+ break intableloop;
+ }
+ appendVoidElementToCurrent(
+ name, attributes,
+ formPointer);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case FORM:
+ if (formPointer != null || isTemplateContents()) {
+ errFormWhenFormOpen();
+ break starttagloop;
+ } else {
+ errStartTagInTable(name);
+ appendVoidFormToCurrent(attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ default:
+ errStartTagInTable(name);
+ // fall through to IN_BODY
+ break intableloop;
+ }
+ }
+ case IN_CAPTION:
+ switch (group) {
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ errStrayStartTag(name);
+ eltPos = findLastInTableScope("caption");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ break starttagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && currentPtr != eltPos) {
+ errNoCheckUnclosedElementsOnStack();
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_TABLE;
+ continue;
+ default:
+ // fall through to IN_BODY
+ }
+ case IN_CELL:
+ switch (group) {
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ eltPos = findLastInTableScopeTdTh();
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoCellToClose();
+ break starttagloop;
+ } else {
+ closeTheCell(eltPos);
+ continue;
+ }
+ default:
+ // fall through to IN_BODY
+ }
+ case FRAMESET_OK:
+ switch (group) {
+ case FRAMESET:
+ if (mode == FRAMESET_OK) {
+ if (currentPtr == 0 || stack[1].getGroup() != BODY) {
+ assert fragment || isTemplateContents();
+ errStrayStartTag(name);
+ break starttagloop;
+ } else {
+ errFramesetStart();
+ detachFromParent(stack[1].node);
+ while (currentPtr > 0) {
+ pop();
+ }
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_FRAMESET;
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ } else {
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ // NOT falling through!
+ case PRE_OR_LISTING:
+ case LI:
+ case DD_OR_DT:
+ case BUTTON:
+ case MARQUEE_OR_APPLET:
+ case OBJECT:
+ case TABLE:
+ case AREA_OR_WBR:
+ case BR:
+ case EMBED:
+ case IMG:
+ case INPUT:
+ case KEYGEN:
+ case HR:
+ case TEXTAREA:
+ case XMP:
+ case IFRAME:
+ case SELECT:
+ if (mode == FRAMESET_OK
+ && !(group == INPUT && Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "hidden",
+ attributes.getValue(AttributeName.TYPE)))) {
+ framesetOk = false;
+ mode = IN_BODY;
+ }
+ // fall through to IN_BODY
+ default:
+ // fall through to IN_BODY
+ }
+ case IN_BODY:
+ inbodyloop: for (;;) {
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ case META:
+ case STYLE:
+ case SCRIPT:
+ case TITLE:
+ case TEMPLATE:
+ // Fall through to IN_HEAD
+ break inbodyloop;
+ case BODY:
+ if (currentPtr == 0 || stack[1].getGroup() != BODY || isTemplateContents()) {
+ assert fragment || isTemplateContents();
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ errFooSeenWhenFooOpen(name);
+ framesetOk = false;
+ if (mode == FRAMESET_OK) {
+ mode = IN_BODY;
+ }
+ if (addAttributesToBody(attributes)) {
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case P:
+ case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+ case UL_OR_OL_OR_DL:
+ case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+ implicitlyCloseP();
+ if (stack[currentPtr].getGroup() == H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) {
+ errHeadingWhenHeadingOpen();
+ pop();
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case FIELDSET:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ attributes = null; // CPP
+ break starttagloop;
+ case PRE_OR_LISTING:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ needToDropLF = true;
+ attributes = null; // CPP
+ break starttagloop;
+ case FORM:
+ if (formPointer != null && !isTemplateContents()) {
+ errFormWhenFormOpen();
+ break starttagloop;
+ } else {
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushFormElementMayFoster(attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ case LI:
+ case DD_OR_DT:
+ eltPos = currentPtr;
+ for (;;) {
+ StackNode<T> node = stack[eltPos]; // weak
+ // ref
+ if (node.getGroup() == group) { // LI or
+ // DD_OR_DT
+ generateImpliedEndTagsExceptFor(node.name);
+ if (errorHandler != null
+ && eltPos != currentPtr) {
+ errUnclosedElementsImplied(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break;
+ } else if (eltPos == 0 || (node.isSpecial()
+ && (node.ns != "http://www.w3.org/1999/xhtml"
+ || (node.name != "p"
+ && node.name != "address"
+ && node.name != "div")))) {
+ break;
+ }
+ eltPos--;
+ }
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case PLAINTEXT:
+ implicitlyCloseP();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.PLAINTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case A:
+ int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a");
+ if (activeAPos != -1) {
+ errFooSeenWhenFooOpen(name);
+ StackNode<T> activeA = listOfActiveFormattingElements[activeAPos];
+ activeA.retain();
+ adoptionAgencyEndTag("a");
+ removeFromStack(activeA);
+ activeAPos = findInListOfActiveFormattingElements(activeA);
+ if (activeAPos != -1) {
+ removeFromListOfActiveFormattingElements(activeAPos);
+ }
+ activeA.release();
+ }
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushFormattingElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+ case FONT:
+ reconstructTheActiveFormattingElements();
+ maybeForgetEarlierDuplicateFormattingElement(elementName.name, attributes);
+ appendToCurrentNodeAndPushFormattingElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOBR:
+ reconstructTheActiveFormattingElements();
+ if (TreeBuilder.NOT_FOUND_ON_STACK != findLastInScope("nobr")) {
+ errFooSeenWhenFooOpen(name);
+ adoptionAgencyEndTag("nobr");
+ reconstructTheActiveFormattingElements();
+ }
+ appendToCurrentNodeAndPushFormattingElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case BUTTON:
+ eltPos = findLastInScope(name);
+ if (eltPos != TreeBuilder.NOT_FOUND_ON_STACK) {
+ errFooSeenWhenFooOpen(name);
+ generateImpliedEndTags();
+ if (errorHandler != null
+ && !isCurrent(name)) {
+ errUnclosedElementsImplied(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ continue starttagloop;
+ } else {
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ case OBJECT:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ insertMarker();
+ attributes = null; // CPP
+ break starttagloop;
+ case MARQUEE_OR_APPLET:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ insertMarker();
+ attributes = null; // CPP
+ break starttagloop;
+ case TABLE:
+ // The only quirk. Blame Hixie and
+ // Acid2.
+ if (!quirks) {
+ implicitlyCloseP();
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ mode = IN_TABLE;
+ attributes = null; // CPP
+ break starttagloop;
+ case BR:
+ case EMBED:
+ case AREA_OR_WBR:
+ reconstructTheActiveFormattingElements();
+ // FALL THROUGH to PARAM_OR_SOURCE_OR_TRACK
+ // CPPONLY: case MENUITEM:
+ case PARAM_OR_SOURCE_OR_TRACK:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case HR:
+ implicitlyCloseP();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case IMAGE:
+ errImage();
+ elementName = ElementName.IMG;
+ continue starttagloop;
+ case IMG:
+ case KEYGEN:
+ case INPUT:
+ reconstructTheActiveFormattingElements();
+ appendVoidElementToCurrentMayFoster(
+ name, attributes,
+ formPointer);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case ISINDEX:
+ errIsindex();
+ if (formPointer != null && !isTemplateContents()) {
+ break starttagloop;
+ }
+ implicitlyCloseP();
+ HtmlAttributes formAttrs = new HtmlAttributes(0);
+ int actionIndex = attributes.getIndex(AttributeName.ACTION);
+ if (actionIndex > -1) {
+ formAttrs.addAttribute(
+ AttributeName.ACTION,
+ attributes.getValueNoBoundsCheck(actionIndex)
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , attributes.getLineNoBoundsCheck(actionIndex)
+ );
+ }
+ appendToCurrentNodeAndPushFormElementMayFoster(formAttrs);
+ appendVoidElementToCurrentMayFoster(
+ ElementName.HR,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ appendToCurrentNodeAndPushElementMayFoster(
+ ElementName.LABEL,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ int promptIndex = attributes.getIndex(AttributeName.PROMPT);
+ if (promptIndex > -1) {
+ @Auto char[] prompt = Portability.newCharArrayFromString(attributes.getValueNoBoundsCheck(promptIndex));
+ appendCharacters(stack[currentPtr].node,
+ prompt, 0, prompt.length);
+ } else {
+ appendIsindexPrompt(stack[currentPtr].node);
+ }
+ HtmlAttributes inputAttributes = new HtmlAttributes(
+ 0);
+ inputAttributes.addAttribute(
+ AttributeName.NAME,
+ Portability.newStringFromLiteral("isindex")
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , tokenizer.getLineNumber()
+ );
+ for (int i = 0; i < attributes.getLength(); i++) {
+ AttributeName attributeQName = attributes.getAttributeNameNoBoundsCheck(i);
+ if (AttributeName.NAME == attributeQName
+ || AttributeName.PROMPT == attributeQName) {
+ attributes.releaseValue(i);
+ } else if (AttributeName.ACTION != attributeQName) {
+ inputAttributes.addAttribute(
+ attributeQName,
+ attributes.getValueNoBoundsCheck(i)
+ // [NOCPP[
+ , XmlViolationPolicy.ALLOW
+ // ]NOCPP]
+ // CPPONLY: , attributes.getLineNoBoundsCheck(i)
+ );
+ }
+ }
+ attributes.clearWithoutReleasingContents();
+ appendVoidElementToCurrentMayFoster(
+ "input",
+ inputAttributes, formPointer);
+ pop(); // label
+ appendVoidElementToCurrentMayFoster(
+ ElementName.HR,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ pop(); // form
+
+ if (!isTemplateContents()) {
+ formPointer = null;
+ }
+
+ selfClosing = false;
+ // Portability.delete(formAttrs);
+ // Portability.delete(inputAttributes);
+ // Don't delete attributes, they are deleted
+ // later
+ break starttagloop;
+ case TEXTAREA:
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RCDATA, elementName);
+ originalMode = mode;
+ mode = TEXT;
+ needToDropLF = true;
+ attributes = null; // CPP
+ break starttagloop;
+ case XMP:
+ implicitlyCloseP();
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOSCRIPT:
+ if (!scriptingEnabled) {
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ } else {
+ // fall through
+ }
+ case NOFRAMES:
+ case IFRAME:
+ case NOEMBED:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case SELECT:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ switch (mode) {
+ case IN_TABLE:
+ case IN_CAPTION:
+ case IN_COLUMN_GROUP:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ case IN_CELL:
+ mode = IN_SELECT_IN_TABLE;
+ break;
+ default:
+ mode = IN_SELECT;
+ break;
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case OPTGROUP:
+ case OPTION:
+ if (isCurrent("option")) {
+ pop();
+ }
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case RB_OR_RTC:
+ eltPos = findLastInScope("ruby");
+ if (eltPos != NOT_FOUND_ON_STACK) {
+ generateImpliedEndTags();
+ }
+ if (eltPos != currentPtr) {
+ if (eltPos == NOT_FOUND_ON_STACK) {
+ errStartTagSeenWithoutRuby(name);
+ } else {
+ errUnclosedChildrenInRuby();
+ }
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case RT_OR_RP:
+ eltPos = findLastInScope("ruby");
+ if (eltPos != NOT_FOUND_ON_STACK) {
+ generateImpliedEndTagsExceptFor("rtc");
+ }
+ if (eltPos != currentPtr) {
+ if (!isCurrent("rtc")) {
+ if (eltPos == NOT_FOUND_ON_STACK) {
+ errStartTagSeenWithoutRuby(name);
+ } else {
+ errUnclosedChildrenInRuby();
+ }
+ }
+ }
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case MATH:
+ reconstructTheActiveFormattingElements();
+ attributes.adjustForMath();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterMathML(
+ elementName, attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterMathML(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case SVG:
+ reconstructTheActiveFormattingElements();
+ attributes.adjustForSvg();
+ if (selfClosing) {
+ appendVoidElementToCurrentMayFosterSVG(
+ elementName,
+ attributes);
+ selfClosing = false;
+ } else {
+ appendToCurrentNodeAndPushElementMayFosterSVG(
+ elementName, attributes);
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ case FRAME:
+ case FRAMESET:
+ case HEAD:
+ errStrayStartTag(name);
+ break starttagloop;
+ case OUTPUT:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes, formPointer);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ reconstructTheActiveFormattingElements();
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ }
+ }
+ case IN_HEAD:
+ inheadloop: for (;;) {
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case META:
+ // Fall through to IN_HEAD_NOSCRIPT
+ break inheadloop;
+ case TITLE:
+ startTagTitleInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case NOSCRIPT:
+ if (scriptingEnabled) {
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ } else {
+ appendToCurrentNodeAndPushElementMayFoster(
+ elementName,
+ attributes);
+ mode = IN_HEAD_NOSCRIPT;
+ }
+ attributes = null; // CPP
+ break starttagloop;
+ case SCRIPT:
+ startTagScriptInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ case NOFRAMES:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case HEAD:
+ /* Parse error. */
+ errFooSeenWhenFooOpen(name);
+ /* Ignore the token. */
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ pop();
+ mode = AFTER_HEAD;
+ continue starttagloop;
+ }
+ }
+ case IN_HEAD_NOSCRIPT:
+ switch (group) {
+ case HTML:
+ // XXX did Hixie really mean to omit "base"
+ // here?
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case META:
+ checkMetaCharset(attributes);
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ case NOFRAMES:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case HEAD:
+ errFooSeenWhenFooOpen(name);
+ break starttagloop;
+ case NOSCRIPT:
+ errFooSeenWhenFooOpen(name);
+ break starttagloop;
+ default:
+ errBadStartTagInHead(name);
+ pop();
+ mode = IN_HEAD;
+ continue;
+ }
+ case IN_COLUMN_GROUP:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case COL:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ if (currentPtr == 0 || stack[currentPtr].getGroup() == TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errGarbageInColgroup();
+ break starttagloop;
+ }
+ pop();
+ mode = IN_TABLE;
+ continue;
+ }
+ case IN_SELECT_IN_TABLE:
+ switch (group) {
+ case CAPTION:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ case TABLE:
+ errStartTagWithSelectOpen(name);
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break starttagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue;
+ default:
+ // fall through to IN_SELECT
+ }
+ case IN_SELECT:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case OPTION:
+ if (isCurrent("option")) {
+ pop();
+ }
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case OPTGROUP:
+ if (isCurrent("option")) {
+ pop();
+ }
+ if (isCurrent("optgroup")) {
+ pop();
+ }
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case SELECT:
+ errStartSelectWhereEndSelectExpected();
+ eltPos = findLastInTableScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ errNoSelectInTableScope();
+ break starttagloop;
+ } else {
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ break starttagloop;
+ }
+ case INPUT:
+ case TEXTAREA:
+ case KEYGEN:
+ errStartTagWithSelectOpen(name);
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break starttagloop;
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue;
+ case SCRIPT:
+ startTagScriptInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ startTagTemplateInHead(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ case AFTER_BODY:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ }
+ case IN_FRAMESET:
+ switch (group) {
+ case FRAMESET:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ case FRAME:
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ // fall through to AFTER_FRAMESET
+ }
+ case AFTER_FRAMESET:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case NOFRAMES:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("Start tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ // ]NOCPP]
+ errStartTagWithoutDoctype();
+ // [NOCPP[
+ break;
+ case HTML401_STRICT:
+ err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
+ false);
+ /*
+ * Then, switch to the root element mode of the tree
+ * construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ continue;
+ case BEFORE_HTML:
+ switch (group) {
+ case HTML:
+ // optimize error check and streaming SAX by
+ // hoisting
+ // "html" handling here.
+ if (attributes == HtmlAttributes.EMPTY_ATTRIBUTES) {
+ // This has the right magic side effect
+ // that
+ // it
+ // makes attributes in SAX Tree mutable.
+ appendHtmlElementToDocumentAndPush();
+ } else {
+ appendHtmlElementToDocumentAndPush(attributes);
+ }
+ // XXX application cache should fire here
+ mode = BEFORE_HEAD;
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ /*
+ * Create an HTMLElement node with the tag name
+ * html, in the HTML namespace. Append it to the
+ * Document object.
+ */
+ appendHtmlElementToDocumentAndPush();
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ continue;
+ }
+ case BEFORE_HEAD:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case HEAD:
+ /*
+ * A start tag whose tag name is "head"
+ *
+ * Create an element for the token.
+ *
+ * Set the head element pointer to this new element
+ * node.
+ *
+ * Append the new element to the current node and
+ * push it onto the stack of open elements.
+ */
+ appendToCurrentNodeAndPushHeadElement(attributes);
+ /*
+ * Change the insertion mode to "in head".
+ */
+ mode = IN_HEAD;
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ /*
+ * Any other start tag token
+ *
+ * Act as if a start tag token with the tag name
+ * "head" and no attributes had been seen,
+ */
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ /*
+ * then reprocess the current token.
+ *
+ * This will result in an empty head element being
+ * generated, with the current token being
+ * reprocessed in the "after head" insertion mode.
+ */
+ continue;
+ }
+ case AFTER_HEAD:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case BODY:
+ if (attributes.getLength() == 0) {
+ // This has the right magic side effect
+ // that
+ // it
+ // makes attributes in SAX Tree mutable.
+ appendToCurrentNodeAndPushBodyElement();
+ } else {
+ appendToCurrentNodeAndPushBodyElement(attributes);
+ }
+ framesetOk = false;
+ mode = IN_BODY;
+ attributes = null; // CPP
+ break starttagloop;
+ case FRAMESET:
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ mode = IN_FRAMESET;
+ attributes = null; // CPP
+ break starttagloop;
+ case TEMPLATE:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ StackNode<T> headOnStack = stack[currentPtr];
+ startTagTemplateInHead(elementName, attributes);
+ removeFromStack(headOnStack);
+ attributes = null; // CPP
+ break starttagloop;
+ case BASE:
+ case LINK_OR_BASEFONT_OR_BGSOUND:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ pop(); // head
+ attributes = null; // CPP
+ break starttagloop;
+ case META:
+ errFooBetweenHeadAndBody(name);
+ checkMetaCharset(attributes);
+ pushHeadPointerOntoStack();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ attributes);
+ selfClosing = false;
+ pop(); // head
+ attributes = null; // CPP
+ break starttagloop;
+ case SCRIPT:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.SCRIPT_DATA, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case STYLE:
+ case NOFRAMES:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RAWTEXT, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case TITLE:
+ errFooBetweenHeadAndBody(name);
+ pushHeadPointerOntoStack();
+ appendToCurrentNodeAndPushElement(
+ elementName,
+ attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(
+ Tokenizer.RCDATA, elementName);
+ attributes = null; // CPP
+ break starttagloop;
+ case HEAD:
+ errStrayStartTag(name);
+ break starttagloop;
+ default:
+ appendToCurrentNodeAndPushBodyElement();
+ mode = FRAMESET_OK;
+ continue;
+ }
+ case AFTER_AFTER_BODY:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ fatal();
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ }
+ case AFTER_AFTER_FRAMESET:
+ switch (group) {
+ case HTML:
+ errStrayStartTag(name);
+ if (!fragment && !isTemplateContents()) {
+ addAttributesToHtml(attributes);
+ attributes = null; // CPP
+ }
+ break starttagloop;
+ case NOFRAMES:
+ startTagGenericRawText(elementName, attributes);
+ attributes = null; // CPP
+ break starttagloop;
+ default:
+ errStrayStartTag(name);
+ break starttagloop;
+ }
+ case TEXT:
+ assert false;
+ break starttagloop; // Avoid infinite loop if the assertion
+ // fails
+ }
+ }
+ if (selfClosing) {
+ errSelfClosing();
+ }
+ // CPPONLY: if (mBuilder == null && attributes != HtmlAttributes.EMPTY_ATTRIBUTES) {
+ // CPPONLY: Portability.delete(attributes);
+ // CPPONLY: }
+ }
+
+ private void startTagTitleInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, elementName);
+ }
+
+ private void startTagGenericRawText(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, elementName);
+ }
+
+ private void startTagScriptInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ // XXX need to manage much more stuff here if supporting document.write()
+ appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
+ originalMode = mode;
+ mode = TEXT;
+ tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA, elementName);
+ }
+
+ private void startTagTemplateInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
+ appendToCurrentNodeAndPushElement(elementName, attributes);
+ insertMarker();
+ framesetOk = false;
+ originalMode = mode;
+ mode = IN_TEMPLATE;
+ pushTemplateMode(IN_TEMPLATE);
+ }
+
+ private boolean isTemplateContents() {
+ return TreeBuilder.NOT_FOUND_ON_STACK != findLast("template");
+ }
+
+ private boolean isTemplateModeStackEmpty() {
+ return templateModePtr == -1;
+ }
+
+ private boolean isSpecialParentInForeign(StackNode<T> stackNode) {
+ @NsUri String ns = stackNode.ns;
+ return ("http://www.w3.org/1999/xhtml" == ns)
+ || (stackNode.isHtmlIntegrationPoint())
+ || (("http://www.w3.org/1998/Math/MathML" == ns) && (stackNode.getGroup() == MI_MO_MN_MS_MTEXT));
+ }
+
+ /**
+ *
+ * <p>
+ * C++ memory note: The return value must be released.
+ *
+ * @return
+ * @throws SAXException
+ * @throws StopSniffingException
+ */
+ public static String extractCharsetFromContent(String attributeValue
+ // CPPONLY: , TreeBuilder tb
+ ) {
+ // This is a bit ugly. Converting the string to char array in order to
+ // make the portability layer smaller.
+ int charsetState = CHARSET_INITIAL;
+ int start = -1;
+ int end = -1;
+ @Auto char[] buffer = Portability.newCharArrayFromString(attributeValue);
+
+ charsetloop: for (int i = 0; i < buffer.length; i++) {
+ char c = buffer[i];
+ switch (charsetState) {
+ case CHARSET_INITIAL:
+ switch (c) {
+ case 'c':
+ case 'C':
+ charsetState = CHARSET_C;
+ continue;
+ default:
+ continue;
+ }
+ case CHARSET_C:
+ switch (c) {
+ case 'h':
+ case 'H':
+ charsetState = CHARSET_H;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_H:
+ switch (c) {
+ case 'a':
+ case 'A':
+ charsetState = CHARSET_A;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_A:
+ switch (c) {
+ case 'r':
+ case 'R':
+ charsetState = CHARSET_R;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_R:
+ switch (c) {
+ case 's':
+ case 'S':
+ charsetState = CHARSET_S;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_S:
+ switch (c) {
+ case 'e':
+ case 'E':
+ charsetState = CHARSET_E;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_E:
+ switch (c) {
+ case 't':
+ case 'T':
+ charsetState = CHARSET_T;
+ continue;
+ default:
+ charsetState = CHARSET_INITIAL;
+ continue;
+ }
+ case CHARSET_T:
+ switch (c) {
+ case '\t':
+ case '\n':
+ case '\u000C':
+ case '\r':
+ case ' ':
+ continue;
+ case '=':
+ charsetState = CHARSET_EQUALS;
+ continue;
+ default:
+ return null;
+ }
+ case CHARSET_EQUALS:
+ switch (c) {
+ case '\t':
+ case '\n':
+ case '\u000C':
+ case '\r':
+ case ' ':
+ continue;
+ case '\'':
+ start = i + 1;
+ charsetState = CHARSET_SINGLE_QUOTED;
+ continue;
+ case '\"':
+ start = i + 1;
+ charsetState = CHARSET_DOUBLE_QUOTED;
+ continue;
+ default:
+ start = i;
+ charsetState = CHARSET_UNQUOTED;
+ continue;
+ }
+ case CHARSET_SINGLE_QUOTED:
+ switch (c) {
+ case '\'':
+ end = i;
+ break charsetloop;
+ default:
+ continue;
+ }
+ case CHARSET_DOUBLE_QUOTED:
+ switch (c) {
+ case '\"':
+ end = i;
+ break charsetloop;
+ default:
+ continue;
+ }
+ case CHARSET_UNQUOTED:
+ switch (c) {
+ case '\t':
+ case '\n':
+ case '\u000C':
+ case '\r':
+ case ' ':
+ case ';':
+ end = i;
+ break charsetloop;
+ default:
+ continue;
+ }
+ }
+ }
+ String charset = null;
+ if (start != -1) {
+ if (end == -1) {
+ end = buffer.length;
+ }
+ charset = Portability.newStringFromBuffer(buffer, start, end
+ - start
+ // CPPONLY: , tb
+ );
+ }
+ return charset;
+ }
+
+ private void checkMetaCharset(HtmlAttributes attributes)
+ throws SAXException {
+ String charset = attributes.getValue(AttributeName.CHARSET);
+ if (charset != null) {
+ if (tokenizer.internalEncodingDeclaration(charset)) {
+ requestSuspension();
+ return;
+ }
+ return;
+ }
+ if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "content-type",
+ attributes.getValue(AttributeName.HTTP_EQUIV))) {
+ return;
+ }
+ String content = attributes.getValue(AttributeName.CONTENT);
+ if (content != null) {
+ String extract = TreeBuilder.extractCharsetFromContent(content
+ // CPPONLY: , this
+ );
+ // remember not to return early without releasing the string
+ if (extract != null) {
+ if (tokenizer.internalEncodingDeclaration(extract)) {
+ requestSuspension();
+ }
+ }
+ Portability.releaseString(extract);
+ }
+ }
+
+ public final void endTag(ElementName elementName) throws SAXException {
+ flushCharacters();
+ needToDropLF = false;
+ int eltPos;
+ int group = elementName.getGroup();
+ @Local String name = elementName.name;
+ endtagloop: for (;;) {
+ if (isInForeign()) {
+ if (stack[currentPtr].name != name) {
+ if (currentPtr == 0) {
+ errStrayEndTag(name);
+ } else {
+ errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName);
+ }
+ }
+ eltPos = currentPtr;
+ for (;;) {
+ if (eltPos == 0) {
+ assert fragment: "We can get this close to the root of the stack in foreign content only in the fragment case.";
+ break endtagloop;
+ }
+ if (stack[eltPos].name == name) {
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ }
+ if (stack[--eltPos].ns == "http://www.w3.org/1999/xhtml") {
+ break;
+ }
+ }
+ }
+ switch (mode) {
+ case IN_TEMPLATE:
+ switch (group) {
+ case TEMPLATE:
+ // fall through to IN_HEAD
+ break;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case IN_ROW:
+ switch (group) {
+ case TR:
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ break endtagloop;
+ case TABLE:
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ continue;
+ case TBODY_OR_THEAD_OR_TFOOT:
+ if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ eltPos = findLastOrRoot(TreeBuilder.TR);
+ if (eltPos == 0) {
+ assert fragment || isTemplateContents();
+ errNoTableRowToClose();
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE_BODY;
+ continue;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TD_OR_TH:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE_BODY:
+ switch (group) {
+ case TBODY_OR_THEAD_OR_TFOOT:
+ eltPos = findLastOrRoot(name);
+ if (eltPos == 0) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE;
+ break endtagloop;
+ case TABLE:
+ eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot();
+ if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ clearStackBackTo(eltPos);
+ pop();
+ mode = IN_TABLE;
+ continue;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TD_OR_TH:
+ case TR:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_TABLE
+ }
+ case IN_TABLE:
+ switch (group) {
+ case TABLE:
+ eltPos = findLast("table");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ break endtagloop;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case TR:
+ errStrayEndTag(name);
+ break endtagloop;
+ case TEMPLATE:
+ // fall through to IN_HEAD
+ break;
+ default:
+ errStrayEndTag(name);
+ // fall through to IN_BODY
+ }
+ case IN_CAPTION:
+ switch (group) {
+ case CAPTION:
+ eltPos = findLastInTableScope("caption");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && currentPtr != eltPos) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_TABLE;
+ break endtagloop;
+ case TABLE:
+ errTableClosedWhileCaptionOpen();
+ eltPos = findLastInTableScope("caption");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && currentPtr != eltPos) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_TABLE;
+ continue;
+ case BODY:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case TR:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_BODY
+ }
+ case IN_CELL:
+ switch (group) {
+ case TD_OR_TH:
+ eltPos = findLastInTableScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_ROW;
+ break endtagloop;
+ case TABLE:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert name == "tbody" || name == "tfoot" || name == "thead" || fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ closeTheCell(findLastInTableScopeTdTh());
+ continue;
+ case BODY:
+ case CAPTION:
+ case COL:
+ case COLGROUP:
+ case HTML:
+ errStrayEndTag(name);
+ break endtagloop;
+ default:
+ // fall through to IN_BODY
+ }
+ case FRAMESET_OK:
+ case IN_BODY:
+ switch (group) {
+ case BODY:
+ if (!isSecondOnStackBody()) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ assert currentPtr >= 1;
+ if (errorHandler != null) {
+ uncloseloop1: for (int i = 2; i <= currentPtr; i++) {
+ switch (stack[i].getGroup()) {
+ case DD_OR_DT:
+ case LI:
+ case OPTGROUP:
+ case OPTION: // is this possible?
+ case P:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ case TD_OR_TH:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ break;
+ default:
+ errEndWithUnclosedElements(name);
+ break uncloseloop1;
+ }
+ }
+ }
+ mode = AFTER_BODY;
+ break endtagloop;
+ case HTML:
+ if (!isSecondOnStackBody()) {
+ assert fragment || isTemplateContents();
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ if (errorHandler != null) {
+ uncloseloop2: for (int i = 0; i <= currentPtr; i++) {
+ switch (stack[i].getGroup()) {
+ case DD_OR_DT:
+ case LI:
+ case P:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TD_OR_TH:
+ case BODY:
+ case HTML:
+ break;
+ default:
+ errEndWithUnclosedElements(name);
+ break uncloseloop2;
+ }
+ }
+ }
+ mode = AFTER_BODY;
+ continue;
+ case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
+ case UL_OR_OL_OR_DL:
+ case PRE_OR_LISTING:
+ case FIELDSET:
+ case BUTTON:
+ case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ } else {
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case FORM:
+ if (!isTemplateContents()) {
+ if (formPointer == null) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ formPointer = null;
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ removeFromStack(eltPos);
+ break endtagloop;
+ } else {
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ }
+ case P:
+ eltPos = findLastInButtonScope("p");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoElementToCloseButEndTagSeen("p");
+ // XXX Can the 'in foreign' case happen anymore?
+ if (isInForeign()) {
+ errHtmlStartTagInForeignContext(name);
+ // Check for currentPtr for the fragment
+ // case.
+ while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
+ pop();
+ }
+ }
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ break endtagloop;
+ }
+ generateImpliedEndTagsExceptFor("p");
+ assert eltPos != TreeBuilder.NOT_FOUND_ON_STACK;
+ if (errorHandler != null && eltPos != currentPtr) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ case LI:
+ eltPos = findLastInListScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoElementToCloseButEndTagSeen(name);
+ } else {
+ generateImpliedEndTagsExceptFor(name);
+ if (errorHandler != null
+ && eltPos != currentPtr) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case DD_OR_DT:
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errNoElementToCloseButEndTagSeen(name);
+ } else {
+ generateImpliedEndTagsExceptFor(name);
+ if (errorHandler != null
+ && eltPos != currentPtr) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
+ eltPos = findLastInScopeHn();
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ } else {
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+ break endtagloop;
+ case OBJECT:
+ case MARQUEE_OR_APPLET:
+ eltPos = findLastInScope(name);
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag(name);
+ } else {
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ }
+ break endtagloop;
+ case BR:
+ errEndTagBr();
+ if (isInForeign()) {
+ // XXX can this happen anymore?
+ errHtmlStartTagInForeignContext(name);
+ // Check for currentPtr for the fragment
+ // case.
+ while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
+ pop();
+ }
+ }
+ reconstructTheActiveFormattingElements();
+ appendVoidElementToCurrentMayFoster(
+ elementName,
+ HtmlAttributes.EMPTY_ATTRIBUTES);
+ break endtagloop;
+ case TEMPLATE:
+ // fall through to IN_HEAD;
+ break;
+ case AREA_OR_WBR:
+ // CPPONLY: case MENUITEM:
+ case PARAM_OR_SOURCE_OR_TRACK:
+ case EMBED:
+ case IMG:
+ case IMAGE:
+ case INPUT:
+ case KEYGEN: // XXX??
+ case HR:
+ case ISINDEX:
+ case IFRAME:
+ case NOEMBED: // XXX???
+ case NOFRAMES: // XXX??
+ case SELECT:
+ case TABLE:
+ case TEXTAREA: // XXX??
+ errStrayEndTag(name);
+ break endtagloop;
+ case NOSCRIPT:
+ if (scriptingEnabled) {
+ errStrayEndTag(name);
+ break endtagloop;
+ } else {
+ // fall through
+ }
+ case A:
+ case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
+ case FONT:
+ case NOBR:
+ if (adoptionAgencyEndTag(name)) {
+ break endtagloop;
+ }
+ // else handle like any other tag
+ default:
+ if (isCurrent(name)) {
+ pop();
+ break endtagloop;
+ }
+
+ eltPos = currentPtr;
+ for (;;) {
+ StackNode<T> node = stack[eltPos];
+ if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) {
+ generateImpliedEndTags();
+ if (errorHandler != null
+ && !isCurrent(name)) {
+ errUnclosedElements(eltPos, name);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ break endtagloop;
+ } else if (eltPos == 0 || node.isSpecial()) {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ eltPos--;
+ }
+ }
+ case IN_HEAD:
+ switch (group) {
+ case HEAD:
+ pop();
+ mode = AFTER_HEAD;
+ break endtagloop;
+ case BR:
+ case HTML:
+ case BODY:
+ pop();
+ mode = AFTER_HEAD;
+ continue;
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case IN_HEAD_NOSCRIPT:
+ switch (group) {
+ case NOSCRIPT:
+ pop();
+ mode = IN_HEAD;
+ break endtagloop;
+ case BR:
+ errStrayEndTag(name);
+ pop();
+ mode = IN_HEAD;
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case IN_COLUMN_GROUP:
+ switch (group) {
+ case COLGROUP:
+ if (currentPtr == 0 || stack[currentPtr].getGroup() ==
+ TreeBuilder.TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errGarbageInColgroup();
+ break endtagloop;
+ }
+ pop();
+ mode = IN_TABLE;
+ break endtagloop;
+ case COL:
+ errStrayEndTag(name);
+ break endtagloop;
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ default:
+ if (currentPtr == 0 || stack[currentPtr].getGroup() ==
+ TreeBuilder.TEMPLATE) {
+ assert fragment || isTemplateContents();
+ errGarbageInColgroup();
+ break endtagloop;
+ }
+ pop();
+ mode = IN_TABLE;
+ continue;
+ }
+ case IN_SELECT_IN_TABLE:
+ switch (group) {
+ case CAPTION:
+ case TABLE:
+ case TBODY_OR_THEAD_OR_TFOOT:
+ case TR:
+ case TD_OR_TH:
+ errEndTagSeenWithSelectOpen(name);
+ if (findLastInTableScope(name) != TreeBuilder.NOT_FOUND_ON_STACK) {
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ break endtagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ continue;
+ } else {
+ break endtagloop;
+ }
+ default:
+ // fall through to IN_SELECT
+ }
+ case IN_SELECT:
+ switch (group) {
+ case OPTION:
+ if (isCurrent("option")) {
+ pop();
+ break endtagloop;
+ } else {
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case OPTGROUP:
+ if (isCurrent("option")
+ && "optgroup" == stack[currentPtr - 1].name) {
+ pop();
+ }
+ if (isCurrent("optgroup")) {
+ pop();
+ } else {
+ errStrayEndTag(name);
+ }
+ break endtagloop;
+ case SELECT:
+ eltPos = findLastInTableScope("select");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ assert fragment;
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ resetTheInsertionMode();
+ break endtagloop;
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_BODY:
+ switch (group) {
+ case HTML:
+ if (fragment) {
+ errStrayEndTag(name);
+ break endtagloop;
+ } else {
+ mode = AFTER_AFTER_BODY;
+ break endtagloop;
+ }
+ default:
+ errEndTagAfterBody();
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ }
+ case IN_FRAMESET:
+ switch (group) {
+ case FRAMESET:
+ if (currentPtr == 0) {
+ assert fragment;
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ pop();
+ if ((!fragment) && !isCurrent("frameset")) {
+ mode = AFTER_FRAMESET;
+ }
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_FRAMESET:
+ switch (group) {
+ case HTML:
+ mode = AFTER_AFTER_FRAMESET;
+ break endtagloop;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case INITIAL:
+ /*
+ * Parse error.
+ */
+ // [NOCPP[
+ switch (doctypeExpectation) {
+ case AUTO:
+ err("End tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
+ break;
+ case HTML:
+ // ]NOCPP]
+ errEndTagSeenWithoutDoctype();
+ // [NOCPP[
+ break;
+ case HTML401_STRICT:
+ err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
+ break;
+ case HTML401_TRANSITIONAL:
+ err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
+ break;
+ case NO_DOCTYPE_ERRORS:
+ }
+ // ]NOCPP]
+ /*
+ *
+ * Set the document to quirks mode.
+ */
+ documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
+ false);
+ /*
+ * Then, switch to the root element mode of the tree
+ * construction stage
+ */
+ mode = BEFORE_HTML;
+ /*
+ * and reprocess the current token.
+ */
+ continue;
+ case BEFORE_HTML:
+ switch (group) {
+ case HEAD:
+ case BR:
+ case HTML:
+ case BODY:
+ /*
+ * Create an HTMLElement node with the tag name
+ * html, in the HTML namespace. Append it to the
+ * Document object.
+ */
+ appendHtmlElementToDocumentAndPush();
+ /* Switch to the main mode */
+ mode = BEFORE_HEAD;
+ /*
+ * reprocess the current token.
+ */
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case BEFORE_HEAD:
+ switch (group) {
+ case HEAD:
+ case BR:
+ case HTML:
+ case BODY:
+ appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
+ mode = IN_HEAD;
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_HEAD:
+ switch (group) {
+ case TEMPLATE:
+ endTagTemplateInHead();
+ break endtagloop;
+ case HTML:
+ case BODY:
+ case BR:
+ appendToCurrentNodeAndPushBodyElement();
+ mode = FRAMESET_OK;
+ continue;
+ default:
+ errStrayEndTag(name);
+ break endtagloop;
+ }
+ case AFTER_AFTER_BODY:
+ errStrayEndTag(name);
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ continue;
+ case AFTER_AFTER_FRAMESET:
+ errStrayEndTag(name);
+ break endtagloop;
+ case TEXT:
+ // XXX need to manage insertion point here
+ pop();
+ if (originalMode == AFTER_HEAD) {
+ silentPop();
+ }
+ mode = originalMode;
+ break endtagloop;
+ }
+ } // endtagloop
+ }
+
+ private void endTagTemplateInHead() throws SAXException {
+ int eltPos = findLast("template");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ errStrayEndTag("template");
+ return;
+ }
+ generateImpliedEndTags();
+ if (errorHandler != null && !isCurrent("template")) {
+ errUnclosedElements(eltPos, "template");
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ popTemplateMode();
+ resetTheInsertionMode();
+ }
+
+ private int findLastInTableScopeOrRootTemplateTbodyTheadTfoot() {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].getGroup() == TreeBuilder.TBODY_OR_THEAD_OR_TFOOT ||
+ stack[i].getGroup() == TreeBuilder.TEMPLATE) {
+ return i;
+ }
+ }
+ return 0;
+ }
+
+ private int findLast(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
+ return i;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInTableScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if (stack[i].name == name) {
+ return i;
+ } else if (stack[i].name == "table" || stack[i].name == "template") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInButtonScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if (stack[i].name == name) {
+ return i;
+ } else if (stack[i].name == "button") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+
+ if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
+ return i;
+ } else if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInListScope(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if (stack[i].name == name) {
+ return i;
+ } else if (stack[i].name == "ul" || stack[i].name == "ol") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+
+ if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private int findLastInScopeHn() {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].getGroup() == TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) {
+ return i;
+ } else if (stack[i].isScoping()) {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private void generateImpliedEndTagsExceptFor(@Local String name)
+ throws SAXException {
+ for (;;) {
+ StackNode<T> node = stack[currentPtr];
+ switch (node.getGroup()) {
+ case P:
+ case LI:
+ case DD_OR_DT:
+ case OPTION:
+ case OPTGROUP:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) {
+ return;
+ }
+ pop();
+ continue;
+ default:
+ return;
+ }
+ }
+ }
+
+ private void generateImpliedEndTags() throws SAXException {
+ for (;;) {
+ switch (stack[currentPtr].getGroup()) {
+ case P:
+ case LI:
+ case DD_OR_DT:
+ case OPTION:
+ case OPTGROUP:
+ case RB_OR_RTC:
+ case RT_OR_RP:
+ pop();
+ continue;
+ default:
+ return;
+ }
+ }
+ }
+
+ private boolean isSecondOnStackBody() {
+ return currentPtr >= 1 && stack[1].getGroup() == TreeBuilder.BODY;
+ }
+
+ private void documentModeInternal(DocumentMode m, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+
+ if (isSrcdocDocument) {
+ // Srcdoc documents are always rendered in standards mode.
+ quirks = false;
+ if (documentModeHandler != null) {
+ documentModeHandler.documentMode(
+ DocumentMode.STANDARDS_MODE
+ // [NOCPP[
+ , null, null, false
+ // ]NOCPP]
+ );
+ }
+ return;
+ }
+
+ quirks = (m == DocumentMode.QUIRKS_MODE);
+ if (documentModeHandler != null) {
+ documentModeHandler.documentMode(
+ m
+ // [NOCPP[
+ , publicIdentifier, systemIdentifier,
+ html4SpecificAdditionalErrorChecks
+ // ]NOCPP]
+ );
+ }
+ // [NOCPP[
+ documentMode(m, publicIdentifier, systemIdentifier,
+ html4SpecificAdditionalErrorChecks);
+ // ]NOCPP]
+ }
+
+ private boolean isAlmostStandards(String publicIdentifier,
+ String systemIdentifier) {
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd xhtml 1.0 transitional//en", publicIdentifier)) {
+ return true;
+ }
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd xhtml 1.0 frameset//en", publicIdentifier)) {
+ return true;
+ }
+ if (systemIdentifier != null) {
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) {
+ return true;
+ }
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean isQuirky(@Local String name, String publicIdentifier,
+ String systemIdentifier, boolean forceQuirks) {
+ if (forceQuirks) {
+ return true;
+ }
+ if (name != HTML_LOCAL) {
+ return true;
+ }
+ if (publicIdentifier != null) {
+ for (int i = 0; i < TreeBuilder.QUIRKY_PUBLIC_IDS.length; i++) {
+ if (Portability.lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(
+ TreeBuilder.QUIRKY_PUBLIC_IDS[i], publicIdentifier)) {
+ return true;
+ }
+ }
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3o//dtd w3 html strict 3.0//en//", publicIdentifier)
+ || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-/w3c/dtd html 4.0 transitional/en",
+ publicIdentifier)
+ || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "html", publicIdentifier)) {
+ return true;
+ }
+ }
+ if (systemIdentifier == null) {
+ if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) {
+ return true;
+ } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) {
+ return true;
+ }
+ } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd",
+ systemIdentifier)) {
+ return true;
+ }
+ return false;
+ }
+
+ private void closeTheCell(int eltPos) throws SAXException {
+ generateImpliedEndTags();
+ if (errorHandler != null && eltPos != currentPtr) {
+ errUnclosedElementsCell(eltPos);
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ clearTheListOfActiveFormattingElementsUpToTheLastMarker();
+ mode = IN_ROW;
+ return;
+ }
+
+ private int findLastInTableScopeTdTh() {
+ for (int i = currentPtr; i > 0; i--) {
+ @Local String name = stack[i].name;
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
+ if ("td" == name || "th" == name) {
+ return i;
+ } else if (name == "table" || name == "template") {
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+ }
+ }
+ return TreeBuilder.NOT_FOUND_ON_STACK;
+ }
+
+ private void clearStackBackTo(int eltPos) throws SAXException {
+ int eltGroup = stack[eltPos].getGroup();
+ while (currentPtr > eltPos) { // > not >= intentional
+ if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml"
+ && stack[currentPtr].getGroup() == TEMPLATE
+ && (eltGroup == TABLE || eltGroup == TBODY_OR_THEAD_OR_TFOOT|| eltGroup == TR || eltPos == 0)) {
+ return;
+ }
+ pop();
+ }
+ }
+
+ private void resetTheInsertionMode() {
+ StackNode<T> node;
+ @Local String name;
+ @NsUri String ns;
+ for (int i = currentPtr; i >= 0; i--) {
+ node = stack[i];
+ name = node.name;
+ ns = node.ns;
+ if (i == 0) {
+ if (!(contextNamespace == "http://www.w3.org/1999/xhtml" && (contextName == "td" || contextName == "th"))) {
+ if (fragment) {
+ // Make sure we are parsing a fragment otherwise the context element doesn't make sense.
+ name = contextName;
+ ns = contextNamespace;
+ }
+ } else {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY; // XXX from Hixie's email
+ return;
+ }
+ }
+ if ("select" == name) {
+ int ancestorIndex = i;
+ while (ancestorIndex > 0) {
+ StackNode<T> ancestor = stack[ancestorIndex--];
+ if ("http://www.w3.org/1999/xhtml" == ancestor.ns) {
+ if ("template" == ancestor.name) {
+ break;
+ }
+ if ("table" == ancestor.name) {
+ mode = IN_SELECT_IN_TABLE;
+ return;
+ }
+ }
+ }
+ mode = IN_SELECT;
+ return;
+ } else if ("td" == name || "th" == name) {
+ mode = IN_CELL;
+ return;
+ } else if ("tr" == name) {
+ mode = IN_ROW;
+ return;
+ } else if ("tbody" == name || "thead" == name || "tfoot" == name) {
+ mode = IN_TABLE_BODY;
+ return;
+ } else if ("caption" == name) {
+ mode = IN_CAPTION;
+ return;
+ } else if ("colgroup" == name) {
+ mode = IN_COLUMN_GROUP;
+ return;
+ } else if ("table" == name) {
+ mode = IN_TABLE;
+ return;
+ } else if ("http://www.w3.org/1999/xhtml" != ns) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ return;
+ } else if ("template" == name) {
+ assert templateModePtr >= 0;
+ mode = templateModeStack[templateModePtr];
+ return;
+ } else if ("head" == name) {
+ if (name == contextName) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY; // really
+ } else {
+ mode = IN_HEAD;
+ }
+ return;
+ } else if ("body" == name) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ return;
+ } else if ("frameset" == name) {
+ // TODO: Fragment case. Add error reporting.
+ mode = IN_FRAMESET;
+ return;
+ } else if ("html" == name) {
+ if (headPointer == null) {
+ // TODO: Fragment case. Add error reporting.
+ mode = BEFORE_HEAD;
+ } else {
+ mode = AFTER_HEAD;
+ }
+ return;
+ } else if (i == 0) {
+ mode = framesetOk ? FRAMESET_OK : IN_BODY;
+ return;
+ }
+ }
+ }
+
+ /**
+ * @throws SAXException
+ *
+ */
+ private void implicitlyCloseP() throws SAXException {
+ int eltPos = findLastInButtonScope("p");
+ if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
+ return;
+ }
+ generateImpliedEndTagsExceptFor("p");
+ if (errorHandler != null && eltPos != currentPtr) {
+ errUnclosedElementsImplied(eltPos, "p");
+ }
+ while (currentPtr >= eltPos) {
+ pop();
+ }
+ }
+
+ private boolean debugOnlyClearLastStackSlot() {
+ stack[currentPtr] = null;
+ return true;
+ }
+
+ private boolean debugOnlyClearLastListSlot() {
+ listOfActiveFormattingElements[listPtr] = null;
+ return true;
+ }
+
+ private void pushTemplateMode(int mode) {
+ templateModePtr++;
+ if (templateModePtr == templateModeStack.length) {
+ int[] newStack = new int[templateModeStack.length + 64];
+ System.arraycopy(templateModeStack, 0, newStack, 0, templateModeStack.length);
+ templateModeStack = newStack;
+ }
+ templateModeStack[templateModePtr] = mode;
+ }
+
+ @SuppressWarnings("unchecked") private void push(StackNode<T> node) throws SAXException {
+ currentPtr++;
+ if (currentPtr == stack.length) {
+ StackNode<T>[] newStack = new StackNode[stack.length + 64];
+ System.arraycopy(stack, 0, newStack, 0, stack.length);
+ stack = newStack;
+ }
+ stack[currentPtr] = node;
+ elementPushed(node.ns, node.popName, node.node);
+ }
+
+ @SuppressWarnings("unchecked") private void silentPush(StackNode<T> node) throws SAXException {
+ currentPtr++;
+ if (currentPtr == stack.length) {
+ StackNode<T>[] newStack = new StackNode[stack.length + 64];
+ System.arraycopy(stack, 0, newStack, 0, stack.length);
+ stack = newStack;
+ }
+ stack[currentPtr] = node;
+ }
+
+ @SuppressWarnings("unchecked") private void append(StackNode<T> node) {
+ listPtr++;
+ if (listPtr == listOfActiveFormattingElements.length) {
+ StackNode<T>[] newList = new StackNode[listOfActiveFormattingElements.length + 64];
+ System.arraycopy(listOfActiveFormattingElements, 0, newList, 0,
+ listOfActiveFormattingElements.length);
+ listOfActiveFormattingElements = newList;
+ }
+ listOfActiveFormattingElements[listPtr] = node;
+ }
+
+ @Inline private void insertMarker() {
+ append(null);
+ }
+
+ private void clearTheListOfActiveFormattingElementsUpToTheLastMarker() {
+ while (listPtr > -1) {
+ if (listOfActiveFormattingElements[listPtr] == null) {
+ --listPtr;
+ return;
+ }
+ listOfActiveFormattingElements[listPtr].release();
+ --listPtr;
+ }
+ }
+
+ @Inline private boolean isCurrent(@Local String name) {
+ return stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" &&
+ name == stack[currentPtr].name;
+ }
+
+ private void removeFromStack(int pos) throws SAXException {
+ if (currentPtr == pos) {
+ pop();
+ } else {
+ fatal();
+ stack[pos].release();
+ System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ }
+ }
+
+ private void removeFromStack(StackNode<T> node) throws SAXException {
+ if (stack[currentPtr] == node) {
+ pop();
+ } else {
+ int pos = currentPtr - 1;
+ while (pos >= 0 && stack[pos] != node) {
+ pos--;
+ }
+ if (pos == -1) {
+ // dead code?
+ return;
+ }
+ fatal();
+ node.release();
+ System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
+ currentPtr--;
+ }
+ }
+
+ private void removeFromListOfActiveFormattingElements(int pos) {
+ assert listOfActiveFormattingElements[pos] != null;
+ listOfActiveFormattingElements[pos].release();
+ if (pos == listPtr) {
+ assert debugOnlyClearLastListSlot();
+ listPtr--;
+ return;
+ }
+ assert pos < listPtr;
+ System.arraycopy(listOfActiveFormattingElements, pos + 1,
+ listOfActiveFormattingElements, pos, listPtr - pos);
+ assert debugOnlyClearLastListSlot();
+ listPtr--;
+ }
+
+ /**
+ * Adoption agency algorithm.
+ *
+ * @param name subject as described in the specified algorithm.
+ * @return Returns true if the algorithm has completed and there is nothing remaining to
+ * be done. Returns false if the algorithm needs to "act as described in the 'any other
+ * end tag' entry" as described in the specified algorithm.
+ * @throws SAXException
+ */
+ private boolean adoptionAgencyEndTag(@Local String name) throws SAXException {
+ // This check intends to ensure that for properly nested tags, closing tags will match
+ // against the stack instead of the listOfActiveFormattingElements.
+ if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" &&
+ stack[currentPtr].name == name &&
+ findInListOfActiveFormattingElements(stack[currentPtr]) == -1) {
+ // If the current element matches the name but isn't on the list of active
+ // formatting elements, then it is possible that the list was mangled by the Noah's Ark
+ // clause. In this case, we want to match the end tag against the stack instead of
+ // proceeding with the AAA algorithm that may match against the list of
+ // active formatting elements (and possibly mangle the tree in unexpected ways).
+ pop();
+ return true;
+ }
+
+ // If you crash around here, perhaps some stack node variable claimed to
+ // be a weak ref isn't.
+ for (int i = 0; i < 8; ++i) {
+ int formattingEltListPos = listPtr;
+ while (formattingEltListPos > -1) {
+ StackNode<T> listNode = listOfActiveFormattingElements[formattingEltListPos]; // weak ref
+ if (listNode == null) {
+ formattingEltListPos = -1;
+ break;
+ } else if (listNode.name == name) {
+ break;
+ }
+ formattingEltListPos--;
+ }
+ if (formattingEltListPos == -1) {
+ return false;
+ }
+ // this *looks* like a weak ref to the list of formatting elements
+ StackNode<T> formattingElt = listOfActiveFormattingElements[formattingEltListPos];
+ int formattingEltStackPos = currentPtr;
+ boolean inScope = true;
+ while (formattingEltStackPos > -1) {
+ StackNode<T> node = stack[formattingEltStackPos]; // weak ref
+ if (node == formattingElt) {
+ break;
+ } else if (node.isScoping()) {
+ inScope = false;
+ }
+ formattingEltStackPos--;
+ }
+ if (formattingEltStackPos == -1) {
+ errNoElementToCloseButEndTagSeen(name);
+ removeFromListOfActiveFormattingElements(formattingEltListPos);
+ return true;
+ }
+ if (!inScope) {
+ errNoElementToCloseButEndTagSeen(name);
+ return true;
+ }
+ // stackPos now points to the formatting element and it is in scope
+ if (formattingEltStackPos != currentPtr) {
+ errEndTagViolatesNestingRules(name);
+ }
+ int furthestBlockPos = formattingEltStackPos + 1;
+ while (furthestBlockPos <= currentPtr) {
+ StackNode<T> node = stack[furthestBlockPos]; // weak ref
+ assert furthestBlockPos > 0: "How is formattingEltStackPos + 1 not > 0?";
+ if (node.isSpecial()) {
+ break;
+ }
+ furthestBlockPos++;
+ }
+ if (furthestBlockPos > currentPtr) {
+ // no furthest block
+ while (currentPtr >= formattingEltStackPos) {
+ pop();
+ }
+ removeFromListOfActiveFormattingElements(formattingEltListPos);
+ return true;
+ }
+ StackNode<T> commonAncestor = stack[formattingEltStackPos - 1]; // weak ref
+ StackNode<T> furthestBlock = stack[furthestBlockPos]; // weak ref
+ // detachFromParent(furthestBlock.node); XXX AAA CHANGE
+ int bookmark = formattingEltListPos;
+ int nodePos = furthestBlockPos;
+ StackNode<T> lastNode = furthestBlock; // weak ref
+ int j = 0;
+ for (;;) {
+ ++j;
+ nodePos--;
+ if (nodePos == formattingEltStackPos) {
+ break;
+ }
+ StackNode<T> node = stack[nodePos]; // weak ref
+ int nodeListPos = findInListOfActiveFormattingElements(node);
+
+ if (j > 3 && nodeListPos != -1) {
+ removeFromListOfActiveFormattingElements(nodeListPos);
+
+ // Adjust the indices into the list to account
+ // for the removal of nodeListPos.
+ if (nodeListPos <= formattingEltListPos) {
+ formattingEltListPos--;
+ }
+ if (nodeListPos <= bookmark) {
+ bookmark--;
+ }
+
+ // Update position to reflect removal from list.
+ nodeListPos = -1;
+ }
+
+ if (nodeListPos == -1) {
+ assert formattingEltStackPos < nodePos;
+ assert bookmark < nodePos;
+ assert furthestBlockPos > nodePos;
+ removeFromStack(nodePos); // node is now a bad pointer in C++
+ furthestBlockPos--;
+ continue;
+ }
+ // now node is both on stack and in the list
+ if (nodePos == furthestBlockPos) {
+ bookmark = nodeListPos + 1;
+ }
+ // if (hasChildren(node.node)) { XXX AAA CHANGE
+ assert node == listOfActiveFormattingElements[nodeListPos];
+ assert node == stack[nodePos];
+ T clone = createElement("http://www.w3.org/1999/xhtml",
+ node.name, node.attributes.cloneAttributes(null), commonAncestor.node);
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ node.name, clone, node.popName, node.attributes
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ ); // creation ownership goes to stack
+ node.dropAttributes(); // adopt ownership to newNode
+ stack[nodePos] = newNode;
+ newNode.retain(); // retain for list
+ listOfActiveFormattingElements[nodeListPos] = newNode;
+ node.release(); // release from stack
+ node.release(); // release from list
+ node = newNode;
+ // } XXX AAA CHANGE
+ detachFromParent(lastNode.node);
+ appendElement(lastNode.node, node.node);
+ lastNode = node;
+ }
+ if (commonAncestor.isFosterParenting()) {
+ fatal();
+ detachFromParent(lastNode.node);
+ insertIntoFosterParent(lastNode.node);
+ } else {
+ detachFromParent(lastNode.node);
+ appendElement(lastNode.node, commonAncestor.node);
+ }
+ T clone = createElement("http://www.w3.org/1999/xhtml",
+ formattingElt.name,
+ formattingElt.attributes.cloneAttributes(null), furthestBlock.node);
+ StackNode<T> formattingClone = new StackNode<T>(
+ formattingElt.getFlags(), formattingElt.ns,
+ formattingElt.name, clone, formattingElt.popName,
+ formattingElt.attributes
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ ); // Ownership transfers to stack below
+ formattingElt.dropAttributes(); // transfer ownership to
+ // formattingClone
+ appendChildrenToNewParent(furthestBlock.node, clone);
+ appendElement(clone, furthestBlock.node);
+ removeFromListOfActiveFormattingElements(formattingEltListPos);
+ insertIntoListOfActiveFormattingElements(formattingClone, bookmark);
+ assert formattingEltStackPos < furthestBlockPos;
+ removeFromStack(formattingEltStackPos);
+ // furthestBlockPos is now off by one and points to the slot after
+ // it
+ insertIntoStack(formattingClone, furthestBlockPos);
+ }
+ return true;
+ }
+
+ private void insertIntoStack(StackNode<T> node, int position)
+ throws SAXException {
+ assert currentPtr + 1 < stack.length;
+ assert position <= currentPtr + 1;
+ if (position == currentPtr + 1) {
+ push(node);
+ } else {
+ System.arraycopy(stack, position, stack, position + 1,
+ (currentPtr - position) + 1);
+ currentPtr++;
+ stack[position] = node;
+ }
+ }
+
+ private void insertIntoListOfActiveFormattingElements(
+ StackNode<T> formattingClone, int bookmark) {
+ formattingClone.retain();
+ assert listPtr + 1 < listOfActiveFormattingElements.length;
+ if (bookmark <= listPtr) {
+ System.arraycopy(listOfActiveFormattingElements, bookmark,
+ listOfActiveFormattingElements, bookmark + 1,
+ (listPtr - bookmark) + 1);
+ }
+ listPtr++;
+ listOfActiveFormattingElements[bookmark] = formattingClone;
+ }
+
+ private int findInListOfActiveFormattingElements(StackNode<T> node) {
+ for (int i = listPtr; i >= 0; i--) {
+ if (node == listOfActiveFormattingElements[i]) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ private int findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker(
+ @Local String name) {
+ for (int i = listPtr; i >= 0; i--) {
+ StackNode<T> node = listOfActiveFormattingElements[i];
+ if (node == null) {
+ return -1;
+ } else if (node.name == name) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+
+ private void maybeForgetEarlierDuplicateFormattingElement(
+ @Local String name, HtmlAttributes attributes) throws SAXException {
+ int candidate = -1;
+ int count = 0;
+ for (int i = listPtr; i >= 0; i--) {
+ StackNode<T> node = listOfActiveFormattingElements[i];
+ if (node == null) {
+ break;
+ }
+ if (node.name == name && node.attributes.equalsAnother(attributes)) {
+ candidate = i;
+ ++count;
+ }
+ }
+ if (count >= 3) {
+ removeFromListOfActiveFormattingElements(candidate);
+ }
+ }
+
+ private int findLastOrRoot(@Local String name) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
+ return i;
+ }
+ }
+ return 0;
+ }
+
+ private int findLastOrRoot(int group) {
+ for (int i = currentPtr; i > 0; i--) {
+ if (stack[i].getGroup() == group) {
+ return i;
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Attempt to add attribute to the body element.
+ * @param attributes the attributes
+ * @return <code>true</code> iff the attributes were added
+ * @throws SAXException
+ */
+ private boolean addAttributesToBody(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ if (currentPtr >= 1) {
+ StackNode<T> body = stack[1];
+ if (body.getGroup() == TreeBuilder.BODY) {
+ addAttributesToElement(body.node, attributes);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void addAttributesToHtml(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ addAttributesToElement(stack[0].node, attributes);
+ }
+
+ private void pushHeadPointerOntoStack() throws SAXException {
+ assert headPointer != null;
+ assert mode == AFTER_HEAD;
+ fatal();
+ silentPush(new StackNode<T>(ElementName.HEAD, headPointer
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ ));
+ }
+
+ /**
+ * @throws SAXException
+ *
+ */
+ private void reconstructTheActiveFormattingElements() throws SAXException {
+ if (listPtr == -1) {
+ return;
+ }
+ StackNode<T> mostRecent = listOfActiveFormattingElements[listPtr];
+ if (mostRecent == null || isInStack(mostRecent)) {
+ return;
+ }
+ int entryPos = listPtr;
+ for (;;) {
+ entryPos--;
+ if (entryPos == -1) {
+ break;
+ }
+ if (listOfActiveFormattingElements[entryPos] == null) {
+ break;
+ }
+ if (isInStack(listOfActiveFormattingElements[entryPos])) {
+ break;
+ }
+ }
+ while (entryPos < listPtr) {
+ entryPos++;
+ StackNode<T> entry = listOfActiveFormattingElements[entryPos];
+ StackNode<T> currentNode = stack[currentPtr];
+
+ T clone;
+ if (currentNode.isFosterParenting()) {
+ clone = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", entry.name,
+ entry.attributes.cloneAttributes(null));
+ } else {
+ clone = createElement("http://www.w3.org/1999/xhtml", entry.name,
+ entry.attributes.cloneAttributes(null), currentNode.node);
+ appendElement(clone, currentNode.node);
+ }
+
+ StackNode<T> entryClone = new StackNode<T>(entry.getFlags(),
+ entry.ns, entry.name, clone, entry.popName,
+ entry.attributes
+ // [NOCPP[
+ , entry.getLocator()
+ // ]NOCPP]
+ );
+
+ entry.dropAttributes(); // transfer ownership to entryClone
+
+ push(entryClone);
+ // stack takes ownership of the local variable
+ listOfActiveFormattingElements[entryPos] = entryClone;
+ // overwriting the old entry on the list, so release & retain
+ entry.release();
+ entryClone.retain();
+ }
+ }
+
+ private void insertIntoFosterParent(T child) throws SAXException {
+ int tablePos = findLastOrRoot(TreeBuilder.TABLE);
+ int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
+
+ if (templatePos >= tablePos) {
+ appendElement(child, stack[templatePos].node);
+ return;
+ }
+
+ StackNode<T> node = stack[tablePos];
+ insertFosterParentedChild(child, node.node, stack[tablePos - 1].node);
+ }
+
+ private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes) throws SAXException {
+ return createAndInsertFosterParentedElement(ns, name, attributes, null);
+ }
+
+ private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T form) throws SAXException {
+ int tablePos = findLastOrRoot(TreeBuilder.TABLE);
+ int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
+
+ if (templatePos >= tablePos) {
+ T child = createElement(ns, name, attributes, form, stack[templatePos].node);
+ appendElement(child, stack[templatePos].node);
+ return child;
+ }
+
+ StackNode<T> node = stack[tablePos];
+ return createAndInsertFosterParentedElement(ns, name, attributes, form, node.node, stack[tablePos - 1].node);
+ }
+
+ private boolean isInStack(StackNode<T> node) {
+ for (int i = currentPtr; i >= 0; i--) {
+ if (stack[i] == node) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void popTemplateMode() {
+ templateModePtr--;
+ }
+
+ private void pop() throws SAXException {
+ StackNode<T> node = stack[currentPtr];
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ elementPopped(node.ns, node.popName, node.node);
+ node.release();
+ }
+
+ private void silentPop() throws SAXException {
+ StackNode<T> node = stack[currentPtr];
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ node.release();
+ }
+
+ private void popOnEof() throws SAXException {
+ StackNode<T> node = stack[currentPtr];
+ assert debugOnlyClearLastStackSlot();
+ currentPtr--;
+ markMalformedIfScript(node.node);
+ elementPopped(node.ns, node.popName, node.node);
+ node.release();
+ }
+
+ // [NOCPP[
+ private void checkAttributes(HtmlAttributes attributes, @NsUri String ns)
+ throws SAXException {
+ if (errorHandler != null) {
+ int len = attributes.getXmlnsLength();
+ for (int i = 0; i < len; i++) {
+ AttributeName name = attributes.getXmlnsAttributeName(i);
+ if (name == AttributeName.XMLNS) {
+ if (html4) {
+ err("Attribute \u201Cxmlns\u201D not allowed here. (HTML4-only error.)");
+ } else {
+ String xmlns = attributes.getXmlnsValue(i);
+ if (!ns.equals(xmlns)) {
+ err("Bad value \u201C"
+ + xmlns
+ + "\u201D for the attribute \u201Cxmlns\u201D (only \u201C"
+ + ns + "\u201D permitted here).");
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ // fall through
+ case ALLOW:
+ warn("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0.");
+ break;
+ case FATAL:
+ fatal("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0.");
+ break;
+ }
+ }
+ }
+ } else if (ns != "http://www.w3.org/1999/xhtml"
+ && name == AttributeName.XMLNS_XLINK) {
+ String xmlns = attributes.getXmlnsValue(i);
+ if (!"http://www.w3.org/1999/xlink".equals(xmlns)) {
+ err("Bad value \u201C"
+ + xmlns
+ + "\u201D for the attribute \u201Cxmlns:link\u201D (only \u201Chttp://www.w3.org/1999/xlink\u201D permitted here).");
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ // fall through
+ case ALLOW:
+ warn("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
+ break;
+ case FATAL:
+ fatal("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
+ break;
+ }
+ }
+ } else {
+ err("Attribute \u201C" + attributes.getXmlnsLocalName(i)
+ + "\u201D not allowed here.");
+ switch (namePolicy) {
+ case ALTER_INFOSET:
+ // fall through
+ case ALLOW:
+ warn("Attribute with the local name \u201C"
+ + attributes.getXmlnsLocalName(i)
+ + "\u201D is not serializable as XML 1.0.");
+ break;
+ case FATAL:
+ fatal("Attribute with the local name \u201C"
+ + attributes.getXmlnsLocalName(i)
+ + "\u201D is not serializable as XML 1.0.");
+ break;
+ }
+ }
+ }
+ }
+ attributes.processNonNcNames(this, namePolicy);
+ }
+
+ private String checkPopName(@Local String name) throws SAXException {
+ if (NCName.isNCName(name)) {
+ return name;
+ } else {
+ switch (namePolicy) {
+ case ALLOW:
+ warn("Element name \u201C" + name
+ + "\u201D cannot be represented as XML 1.0.");
+ return name;
+ case ALTER_INFOSET:
+ warn("Element name \u201C" + name
+ + "\u201D cannot be represented as XML 1.0.");
+ return NCName.escapeName(name);
+ case FATAL:
+ fatal("Element name \u201C" + name
+ + "\u201D cannot be represented as XML 1.0.");
+ }
+ }
+ return null; // keep compiler happy
+ }
+
+ // ]NOCPP]
+
+ private void appendHtmlElementToDocumentAndPush(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ T elt = createHtmlElementSetAsRoot(attributes);
+ StackNode<T> node = new StackNode<T>(ElementName.HTML,
+ elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendHtmlElementToDocumentAndPush() throws SAXException {
+ appendHtmlElementToDocumentAndPush(tokenizer.emptyAttributes());
+ }
+
+ private void appendToCurrentNodeAndPushHeadElement(HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", "head", attributes, currentNode);
+ appendElement(elt, currentNode);
+ headPointer = elt;
+ StackNode<T> node = new StackNode<T>(ElementName.HEAD,
+ elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushBodyElement(HtmlAttributes attributes)
+ throws SAXException {
+ appendToCurrentNodeAndPushElement(ElementName.BODY,
+ attributes);
+ }
+
+ private void appendToCurrentNodeAndPushBodyElement() throws SAXException {
+ appendToCurrentNodeAndPushBodyElement(tokenizer.emptyAttributes());
+ }
+
+ private void appendToCurrentNodeAndPushFormElementMayFoster(
+ HtmlAttributes attributes) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", "form", attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", "form", attributes, current.node);
+ appendElement(elt, current.node);
+ }
+
+ if (!isTemplateContents()) {
+ formPointer = elt;
+ }
+
+ StackNode<T> node = new StackNode<T>(ElementName.FORM,
+ elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushFormattingElementMayFoster(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // This method can't be called for custom elements
+ HtmlAttributes clone = attributes.cloneAttributes(null);
+ // Attributes must not be read after calling createElement, because
+ // createElement may delete attributes in C++.
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt, clone
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ append(node);
+ node.retain(); // append doesn't retain itself
+ }
+
+ private void appendToCurrentNodeAndPushElement(ElementName elementName,
+ HtmlAttributes attributes)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // This method can't be called for custom elements
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, currentNode);
+ appendElement(elt, currentNode);
+ if (ElementName.TEMPLATE == elementName) {
+ elt = getDocumentFragmentForTemplate(elt);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName,
+ HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt, popName
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFosterMathML(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ boolean markAsHtmlIntegrationPoint = false;
+ if (ElementName.ANNOTATION_XML == elementName
+ && annotationXmlEncodingPermitsHtml(attributes)) {
+ markAsHtmlIntegrationPoint = true;
+ }
+ // Attributes must not be read after calling createElement(), since
+ // createElement may delete the object in C++.
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt, popName,
+ markAsHtmlIntegrationPoint
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ // [NOCPP[
+ T getDocumentFragmentForTemplate(T template) {
+ return template;
+ }
+
+ T getFormPointerForContext(T context) {
+ return null;
+ }
+ // ]NOCPP]
+
+ private boolean annotationXmlEncodingPermitsHtml(HtmlAttributes attributes) {
+ String encoding = attributes.getValue(AttributeName.ENCODING);
+ if (encoding == null) {
+ return false;
+ }
+ return Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "application/xhtml+xml", encoding)
+ || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
+ "text/html", encoding);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFosterSVG(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.camelCaseName;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/2000/svg");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, popName, elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName,
+ HtmlAttributes attributes, T form)
+ throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // Can't be called for custom elements
+ T elt;
+ T formOwner = form == null || fragment || isTemplateContents() ? null : form;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name,
+ attributes, formOwner);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", elementName.name,
+ attributes, formOwner, current.node);
+ appendElement(elt, current.node);
+ }
+ StackNode<T> node = new StackNode<T>(elementName, elt
+ // [NOCPP[
+ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
+ // ]NOCPP]
+ );
+ push(node);
+ }
+
+ private void appendVoidElementToCurrentMayFoster(
+ @Local String name, HtmlAttributes attributes, T form) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // Can't be called for custom elements
+ T elt;
+ T formOwner = form == null || fragment || isTemplateContents() ? null : form;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", name,
+ attributes, formOwner);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", name,
+ attributes, formOwner, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/1999/xhtml", name, elt);
+ elementPopped("http://www.w3.org/1999/xhtml", name, elt);
+ }
+
+ private void appendVoidElementToCurrentMayFoster(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/1999/xhtml", popName, elt);
+ elementPopped("http://www.w3.org/1999/xhtml", popName, elt);
+ }
+
+ private void appendVoidElementToCurrentMayFosterSVG(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.camelCaseName;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/2000/svg");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/2000/svg", popName, elt);
+ elementPopped("http://www.w3.org/2000/svg", popName, elt);
+ }
+
+ private void appendVoidElementToCurrentMayFosterMathML(
+ ElementName elementName, HtmlAttributes attributes)
+ throws SAXException {
+ @Local String popName = elementName.name;
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML");
+ if (elementName.isCustom()) {
+ popName = checkPopName(popName);
+ }
+ // ]NOCPP]
+ T elt;
+ StackNode<T> current = stack[currentPtr];
+ if (current.isFosterParenting()) {
+ fatal();
+ elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes);
+ } else {
+ elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node);
+ appendElement(elt, current.node);
+ }
+ elementPushed("http://www.w3.org/1998/Math/MathML", popName, elt);
+ elementPopped("http://www.w3.org/1998/Math/MathML", popName, elt);
+ }
+
+ private void appendVoidElementToCurrent(
+ @Local String name, HtmlAttributes attributes, T form) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ // Can't be called for custom elements
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", name, attributes,
+ form == null || fragment || isTemplateContents() ? null : form, currentNode);
+ appendElement(elt, currentNode);
+ elementPushed("http://www.w3.org/1999/xhtml", name, elt);
+ elementPopped("http://www.w3.org/1999/xhtml", name, elt);
+ }
+
+ private void appendVoidFormToCurrent(HtmlAttributes attributes) throws SAXException {
+ // [NOCPP[
+ checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
+ // ]NOCPP]
+ T currentNode = stack[currentPtr].node;
+ T elt = createElement("http://www.w3.org/1999/xhtml", "form",
+ attributes, currentNode);
+ formPointer = elt;
+ // ownership transferred to form pointer
+ appendElement(elt, currentNode);
+ elementPushed("http://www.w3.org/1999/xhtml", "form", elt);
+ elementPopped("http://www.w3.org/1999/xhtml", "form", elt);
+ }
+
+ // [NOCPP[
+
+ private final void accumulateCharactersForced(@Const @NoLength char[] buf,
+ int start, int length) throws SAXException {
+ System.arraycopy(buf, start, charBuffer, charBufferLen, length);
+ charBufferLen += length;
+ }
+
+ @Override public void ensureBufferSpace(int inputLength)
+ throws SAXException {
+ // TODO: Unify Tokenizer.strBuf and TreeBuilder.charBuffer so that
+ // this method becomes unnecessary.
+ int worstCase = charBufferLen + inputLength;
+ if (charBuffer == null) {
+ // Add an arbitrary small value to avoid immediate reallocation
+ // once there are a few characters in the buffer.
+ charBuffer = new char[worstCase + 128];
+ } else if (worstCase > charBuffer.length) {
+ // HotSpot reportedly allocates memory with 8-byte accuracy, so
+ // there's no point in trying to do math here to avoid slop.
+ // Maybe we should add some small constant to worstCase here
+ // but not doing that without profiling. In C++ with jemalloc,
+ // the corresponding method should do math to round up here
+ // to avoid slop.
+ char[] newBuf = new char[worstCase];
+ System.arraycopy(charBuffer, 0, newBuf, 0, charBufferLen);
+ charBuffer = newBuf;
+ }
+ }
+
+ // ]NOCPP]
+
+ protected void accumulateCharacters(@Const @NoLength char[] buf, int start,
+ int length) throws SAXException {
+ appendCharacters(stack[currentPtr].node, buf, start, length);
+ }
+
+ // ------------------------------- //
+
+ protected final void requestSuspension() {
+ tokenizer.requestSuspension();
+ }
+
+ protected abstract T createElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T intendedParent) throws SAXException;
+
+ protected T createElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T form, T intendedParent) throws SAXException {
+ return createElement("http://www.w3.org/1999/xhtml", name, attributes, intendedParent);
+ }
+
+ protected abstract T createHtmlElementSetAsRoot(HtmlAttributes attributes)
+ throws SAXException;
+
+ protected abstract void detachFromParent(T element) throws SAXException;
+
+ protected abstract boolean hasChildren(T element) throws SAXException;
+
+ protected abstract void appendElement(T child, T newParent)
+ throws SAXException;
+
+ protected abstract void appendChildrenToNewParent(T oldParent, T newParent)
+ throws SAXException;
+
+ protected abstract void insertFosterParentedChild(T child, T table,
+ T stackParent) throws SAXException;
+
+ // We don't generate CPP code for this method because it is not used in generated CPP
+ // code. Instead, the form owner version of this method is called with a null form owner.
+ // [NOCPP[
+
+ protected abstract T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T table, T stackParent) throws SAXException;
+
+ // ]NOCPP]
+
+ protected T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name,
+ HtmlAttributes attributes, T form, T table, T stackParent) throws SAXException {
+ return createAndInsertFosterParentedElement(ns, name, attributes, table, stackParent);
+ };
+
+ protected abstract void insertFosterParentedCharacters(
+ @NoLength char[] buf, int start, int length, T table, T stackParent)
+ throws SAXException;
+
+ protected abstract void appendCharacters(T parent, @NoLength char[] buf,
+ int start, int length) throws SAXException;
+
+ protected abstract void appendIsindexPrompt(T parent) throws SAXException;
+
+ protected abstract void appendComment(T parent, @NoLength char[] buf,
+ int start, int length) throws SAXException;
+
+ protected abstract void appendCommentToDocument(@NoLength char[] buf,
+ int start, int length) throws SAXException;
+
+ protected abstract void addAttributesToElement(T element,
+ HtmlAttributes attributes) throws SAXException;
+
+ protected void markMalformedIfScript(T elt) throws SAXException {
+
+ }
+
+ protected void start(boolean fragmentMode) throws SAXException {
+
+ }
+
+ protected void end() throws SAXException {
+
+ }
+
+ protected void appendDoctypeToDocument(@Local String name,
+ String publicIdentifier, String systemIdentifier)
+ throws SAXException {
+
+ }
+
+ protected void elementPushed(@NsUri String ns, @Local String name, T node)
+ throws SAXException {
+
+ }
+
+ protected void elementPopped(@NsUri String ns, @Local String name, T node)
+ throws SAXException {
+
+ }
+
+ // [NOCPP[
+
+ protected void documentMode(DocumentMode m, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+
+ }
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#wantsComments()
+ */
+ public boolean wantsComments() {
+ return wantingComments;
+ }
+
+ public void setIgnoringComments(boolean ignoreComments) {
+ wantingComments = !ignoreComments;
+ }
+
+ /**
+ * Sets the errorHandler.
+ *
+ * @param errorHandler
+ * the errorHandler to set
+ */
+ public final void setErrorHandler(ErrorHandler errorHandler) {
+ this.errorHandler = errorHandler;
+ }
+
+ /**
+ * Returns the errorHandler.
+ *
+ * @return the errorHandler
+ */
+ public ErrorHandler getErrorHandler() {
+ return errorHandler;
+ }
+
+ /**
+ * The argument MUST be an interned string or <code>null</code>.
+ *
+ * @param context
+ */
+ public final void setFragmentContext(@Local String context) {
+ this.contextName = context;
+ this.contextNamespace = "http://www.w3.org/1999/xhtml";
+ this.contextNode = null;
+ this.fragment = (contextName != null);
+ this.quirks = false;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#cdataSectionAllowed()
+ */
+ @Inline public boolean cdataSectionAllowed() throws SAXException {
+ return isInForeign();
+ }
+
+ private boolean isInForeign() {
+ return currentPtr >= 0
+ && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml";
+ }
+
+ private boolean isInForeignButNotHtmlOrMathTextIntegrationPoint() {
+ if (currentPtr < 0) {
+ return false;
+ }
+ return !isSpecialParentInForeign(stack[currentPtr]);
+ }
+
+ /**
+ * The argument MUST be an interned string or <code>null</code>.
+ *
+ * @param context
+ */
+ public final void setFragmentContext(@Local String context,
+ @NsUri String ns, T node, boolean quirks) {
+ // [NOCPP[
+ if (!((context == null && ns == null)
+ || "http://www.w3.org/1999/xhtml" == ns
+ || "http://www.w3.org/2000/svg" == ns || "http://www.w3.org/1998/Math/MathML" == ns)) {
+ throw new IllegalArgumentException(
+ "The namespace must be the HTML, SVG or MathML namespace (or null when the local name is null). Got: "
+ + ns);
+ }
+ // ]NOCPP]
+ this.contextName = context;
+ this.contextNamespace = ns;
+ this.contextNode = node;
+ this.fragment = (contextName != null);
+ this.quirks = quirks;
+ }
+
+ protected final T currentNode() {
+ return stack[currentPtr].node;
+ }
+
+ /**
+ * Returns the scriptingEnabled.
+ *
+ * @return the scriptingEnabled
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets the scriptingEnabled.
+ *
+ * @param scriptingEnabled
+ * the scriptingEnabled to set
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ }
+
+ public void setIsSrcdocDocument(boolean isSrcdocDocument) {
+ this.isSrcdocDocument = isSrcdocDocument;
+ }
+
+ // [NOCPP[
+
+ /**
+ * Sets the doctypeExpectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ }
+
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ }
+
+ /**
+ * Sets the documentModeHandler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Sets the reportingDoctype.
+ *
+ * @param reportingDoctype
+ * the reportingDoctype to set
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * Flushes the pending characters. Public for document.write use cases only.
+ * @throws SAXException
+ */
+ public final void flushCharacters() throws SAXException {
+ if (charBufferLen > 0) {
+ if ((mode == IN_TABLE || mode == IN_TABLE_BODY || mode == IN_ROW)
+ && charBufferContainsNonWhitespace()) {
+ errNonSpaceInTable();
+ reconstructTheActiveFormattingElements();
+ if (!stack[currentPtr].isFosterParenting()) {
+ // reconstructing gave us a new current node
+ appendCharacters(currentNode(), charBuffer, 0,
+ charBufferLen);
+ charBufferLen = 0;
+ return;
+ }
+
+ int tablePos = findLastOrRoot(TreeBuilder.TABLE);
+ int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
+
+ if (templatePos >= tablePos) {
+ appendCharacters(stack[templatePos].node, charBuffer, 0, charBufferLen);
+ charBufferLen = 0;
+ return;
+ }
+
+ StackNode<T> tableElt = stack[tablePos];
+ insertFosterParentedCharacters(charBuffer, 0, charBufferLen,
+ tableElt.node, stack[tablePos - 1].node);
+ charBufferLen = 0;
+ return;
+ }
+ appendCharacters(currentNode(), charBuffer, 0, charBufferLen);
+ charBufferLen = 0;
+ }
+ }
+
+ private boolean charBufferContainsNonWhitespace() {
+ for (int i = 0; i < charBufferLen; i++) {
+ switch (charBuffer[i]) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\u000C':
+ continue;
+ default:
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Creates a comparable snapshot of the tree builder state. Snapshot
+ * creation is only supported immediately after a script end tag has been
+ * processed. In C++ the caller is responsible for calling
+ * <code>delete</code> on the returned object.
+ *
+ * @return a snapshot.
+ * @throws SAXException
+ */
+ @SuppressWarnings("unchecked") public TreeBuilderState<T> newSnapshot()
+ throws SAXException {
+ StackNode<T>[] listCopy = new StackNode[listPtr + 1];
+ for (int i = 0; i < listCopy.length; i++) {
+ StackNode<T> node = listOfActiveFormattingElements[i];
+ if (node != null) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ node.name, node.node, node.popName,
+ node.attributes.cloneAttributes(null)
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ listCopy[i] = newNode;
+ } else {
+ listCopy[i] = null;
+ }
+ }
+ StackNode<T>[] stackCopy = new StackNode[currentPtr + 1];
+ for (int i = 0; i < stackCopy.length; i++) {
+ StackNode<T> node = stack[i];
+ int listIndex = findInListOfActiveFormattingElements(node);
+ if (listIndex == -1) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ node.name, node.node, node.popName,
+ null
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ stackCopy[i] = newNode;
+ } else {
+ stackCopy[i] = listCopy[listIndex];
+ stackCopy[i].retain();
+ }
+ }
+ int[] templateModeStackCopy = new int[templateModePtr + 1];
+ System.arraycopy(templateModeStack, 0, templateModeStackCopy, 0,
+ templateModeStackCopy.length);
+ return new StateSnapshot<T>(stackCopy, listCopy, templateModeStackCopy, formPointer,
+ headPointer, deepTreeSurrogateParent, mode, originalMode, framesetOk,
+ needToDropLF, quirks);
+ }
+
+ public boolean snapshotMatches(TreeBuilderState<T> snapshot) {
+ StackNode<T>[] stackCopy = snapshot.getStack();
+ int stackLen = snapshot.getStackLength();
+ StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements();
+ int listLen = snapshot.getListOfActiveFormattingElementsLength();
+ int[] templateModeStackCopy = snapshot.getTemplateModeStack();
+ int templateModeStackLen = snapshot.getTemplateModeStackLength();
+
+ if (stackLen != currentPtr + 1
+ || listLen != listPtr + 1
+ || templateModeStackLen != templateModePtr + 1
+ || formPointer != snapshot.getFormPointer()
+ || headPointer != snapshot.getHeadPointer()
+ || deepTreeSurrogateParent != snapshot.getDeepTreeSurrogateParent()
+ || mode != snapshot.getMode()
+ || originalMode != snapshot.getOriginalMode()
+ || framesetOk != snapshot.isFramesetOk()
+ || needToDropLF != snapshot.isNeedToDropLF()
+ || quirks != snapshot.isQuirks()) { // maybe just assert quirks
+ return false;
+ }
+ for (int i = listLen - 1; i >= 0; i--) {
+ if (listCopy[i] == null
+ && listOfActiveFormattingElements[i] == null) {
+ continue;
+ } else if (listCopy[i] == null
+ || listOfActiveFormattingElements[i] == null) {
+ return false;
+ }
+ if (listCopy[i].node != listOfActiveFormattingElements[i].node) {
+ return false; // it's possible that this condition is overly
+ // strict
+ }
+ }
+ for (int i = stackLen - 1; i >= 0; i--) {
+ if (stackCopy[i].node != stack[i].node) {
+ return false;
+ }
+ }
+ for (int i = templateModeStackLen - 1; i >=0; i--) {
+ if (templateModeStackCopy[i] != templateModeStack[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @SuppressWarnings("unchecked") public void loadState(
+ TreeBuilderState<T> snapshot, Interner interner)
+ throws SAXException {
+ StackNode<T>[] stackCopy = snapshot.getStack();
+ int stackLen = snapshot.getStackLength();
+ StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements();
+ int listLen = snapshot.getListOfActiveFormattingElementsLength();
+ int[] templateModeStackCopy = snapshot.getTemplateModeStack();
+ int templateModeStackLen = snapshot.getTemplateModeStackLength();
+
+ for (int i = 0; i <= listPtr; i++) {
+ if (listOfActiveFormattingElements[i] != null) {
+ listOfActiveFormattingElements[i].release();
+ }
+ }
+ if (listOfActiveFormattingElements.length < listLen) {
+ listOfActiveFormattingElements = new StackNode[listLen];
+ }
+ listPtr = listLen - 1;
+
+ for (int i = 0; i <= currentPtr; i++) {
+ stack[i].release();
+ }
+ if (stack.length < stackLen) {
+ stack = new StackNode[stackLen];
+ }
+ currentPtr = stackLen - 1;
+
+ if (templateModeStack.length < templateModeStackLen) {
+ templateModeStack = new int[templateModeStackLen];
+ }
+ templateModePtr = templateModeStackLen - 1;
+
+ for (int i = 0; i < listLen; i++) {
+ StackNode<T> node = listCopy[i];
+ if (node != null) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ Portability.newLocalFromLocal(node.name, interner), node.node,
+ Portability.newLocalFromLocal(node.popName, interner),
+ node.attributes.cloneAttributes(null)
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ listOfActiveFormattingElements[i] = newNode;
+ } else {
+ listOfActiveFormattingElements[i] = null;
+ }
+ }
+ for (int i = 0; i < stackLen; i++) {
+ StackNode<T> node = stackCopy[i];
+ int listIndex = findInArray(node, listCopy);
+ if (listIndex == -1) {
+ StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
+ Portability.newLocalFromLocal(node.name, interner), node.node,
+ Portability.newLocalFromLocal(node.popName, interner),
+ null
+ // [NOCPP[
+ , node.getLocator()
+ // ]NOCPP]
+ );
+ stack[i] = newNode;
+ } else {
+ stack[i] = listOfActiveFormattingElements[listIndex];
+ stack[i].retain();
+ }
+ }
+ System.arraycopy(templateModeStackCopy, 0, templateModeStack, 0, templateModeStackLen);
+ formPointer = snapshot.getFormPointer();
+ headPointer = snapshot.getHeadPointer();
+ deepTreeSurrogateParent = snapshot.getDeepTreeSurrogateParent();
+ mode = snapshot.getMode();
+ originalMode = snapshot.getOriginalMode();
+ framesetOk = snapshot.isFramesetOk();
+ needToDropLF = snapshot.isNeedToDropLF();
+ quirks = snapshot.isQuirks();
+ }
+
+ private int findInArray(StackNode<T> node, StackNode<T>[] arr) {
+ for (int i = listPtr; i >= 0; i--) {
+ if (node == arr[i]) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer()
+ */
+ public T getFormPointer() {
+ return formPointer;
+ }
+
+ /**
+ * Returns the headPointer.
+ *
+ * @return the headPointer
+ */
+ public T getHeadPointer() {
+ return headPointer;
+ }
+
+ /**
+ * Returns the deepTreeSurrogateParent.
+ *
+ * @return the deepTreeSurrogateParent
+ */
+ public T getDeepTreeSurrogateParent() {
+ return deepTreeSurrogateParent;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements()
+ */
+ public StackNode<T>[] getListOfActiveFormattingElements() {
+ return listOfActiveFormattingElements;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack()
+ */
+ public StackNode<T>[] getStack() {
+ return stack;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack()
+ */
+ public int[] getTemplateModeStack() {
+ return templateModeStack;
+ }
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public int getMode() {
+ return mode;
+ }
+
+ /**
+ * Returns the originalMode.
+ *
+ * @return the originalMode
+ */
+ public int getOriginalMode() {
+ return originalMode;
+ }
+
+ /**
+ * Returns the framesetOk.
+ *
+ * @return the framesetOk
+ */
+ public boolean isFramesetOk() {
+ return framesetOk;
+ }
+
+ /**
+ * Returns the needToDropLF.
+ *
+ * @return the needToDropLF
+ */
+ public boolean isNeedToDropLF() {
+ return needToDropLF;
+ }
+
+ /**
+ * Returns the quirks.
+ *
+ * @return the quirks
+ */
+ public boolean isQuirks() {
+ return quirks;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength()
+ */
+ public int getListOfActiveFormattingElementsLength() {
+ return listPtr + 1;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength()
+ */
+ public int getStackLength() {
+ return currentPtr + 1;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength()
+ */
+ public int getTemplateModeStackLength() {
+ return templateModePtr + 1;
+ }
+
+ /**
+ * Reports a stray start tag.
+ * @param name the name of the stray tag
+ *
+ * @throws SAXException
+ */
+ private void errStrayStartTag(@Local String name) throws SAXException {
+ err("Stray start tag \u201C" + name + "\u201D.");
+ }
+
+ /**
+ * Reports a stray end tag.
+ * @param name the name of the stray tag
+ *
+ * @throws SAXException
+ */
+ private void errStrayEndTag(@Local String name) throws SAXException {
+ err("Stray end tag \u201C" + name + "\u201D.");
+ }
+
+ /**
+ * Reports a state when elements expected to be closed were not.
+ *
+ * @param eltPos the position of the start tag on the stack of the element
+ * being closed.
+ * @param name the name of the end tag
+ *
+ * @throws SAXException
+ */
+ private void errUnclosedElements(int eltPos, @Local String name) throws SAXException {
+ errNoCheck("End tag \u201C" + name + "\u201D seen, but there were open elements.");
+ errListUnclosedStartTags(eltPos);
+ }
+
+ /**
+ * Reports a state when elements expected to be closed ahead of an implied
+ * end tag but were not.
+ *
+ * @param eltPos the position of the start tag on the stack of the element
+ * being closed.
+ * @param name the name of the end tag
+ *
+ * @throws SAXException
+ */
+ private void errUnclosedElementsImplied(int eltPos, String name) throws SAXException {
+ errNoCheck("End tag \u201C" + name + "\u201D implied, but there were open elements.");
+ errListUnclosedStartTags(eltPos);
+ }
+
+ /**
+ * Reports a state when elements expected to be closed ahead of an implied
+ * table cell close.
+ *
+ * @param eltPos the position of the start tag on the stack of the element
+ * being closed.
+ * @throws SAXException
+ */
+ private void errUnclosedElementsCell(int eltPos) throws SAXException {
+ errNoCheck("A table cell was implicitly closed, but there were open elements.");
+ errListUnclosedStartTags(eltPos);
+ }
+
+ private void errStrayDoctype() throws SAXException {
+ err("Stray doctype.");
+ }
+
+ private void errAlmostStandardsDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("Almost standards mode doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errQuirkyDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("Quirky doctype. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errNonSpaceInTrailer() throws SAXException {
+ err("Non-space character in page trailer.");
+ }
+
+ private void errNonSpaceAfterFrameset() throws SAXException {
+ err("Non-space after \u201Cframeset\u201D.");
+ }
+
+ private void errNonSpaceInFrameset() throws SAXException {
+ err("Non-space in \u201Cframeset\u201D.");
+ }
+
+ private void errNonSpaceAfterBody() throws SAXException {
+ err("Non-space character after body.");
+ }
+
+ private void errNonSpaceInColgroupInFragment() throws SAXException {
+ err("Non-space in \u201Ccolgroup\u201D when parsing fragment.");
+ }
+
+ private void errNonSpaceInNoscriptInHead() throws SAXException {
+ err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D.");
+ }
+
+ private void errFooBetweenHeadAndBody(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
+ }
+
+ private void errStartTagWithoutDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errNoSelectInTableScope() throws SAXException {
+ err("No \u201Cselect\u201D in table scope.");
+ }
+
+ private void errStartSelectWhereEndSelectExpected() throws SAXException {
+ err("\u201Cselect\u201D start tag where end tag expected.");
+ }
+
+ private void errStartTagWithSelectOpen(@Local String name)
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name
+ + "\u201D start tag with \u201Cselect\u201D open.");
+ }
+
+ private void errBadStartTagInHead(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Bad start tag in \u201C" + name
+ + "\u201D in \u201Chead\u201D.");
+ }
+
+ private void errImage() throws SAXException {
+ err("Saw a start tag \u201Cimage\u201D.");
+ }
+
+ private void errIsindex() throws SAXException {
+ err("\u201Cisindex\u201D seen.");
+ }
+
+ private void errFooSeenWhenFooOpen(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("An \u201C" + name + "\u201D start tag seen but an element of the same type was already open.");
+ }
+
+ private void errHeadingWhenHeadingOpen() throws SAXException {
+ err("Heading cannot be a child of another heading.");
+ }
+
+ private void errFramesetStart() throws SAXException {
+ err("\u201Cframeset\u201D start tag seen.");
+ }
+
+ private void errNoCellToClose() throws SAXException {
+ err("No cell to close.");
+ }
+
+ private void errStartTagInTable(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Start tag \u201C" + name
+ + "\u201D seen in \u201Ctable\u201D.");
+ }
+
+ private void errFormWhenFormOpen() throws SAXException {
+ err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag.");
+ }
+
+ private void errTableSeenWhileTableOpen() throws SAXException {
+ err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open.");
+ }
+
+ private void errStartTagInTableBody(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name + "\u201D start tag in table body.");
+ }
+
+ private void errEndTagSeenWithoutDoctype() throws SAXException {
+ if (!isSrcdocDocument) {
+ err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
+ }
+ }
+
+ private void errEndTagAfterBody() throws SAXException {
+ err("Saw an end tag after \u201Cbody\u201D had been closed.");
+ }
+
+ private void errEndTagSeenWithSelectOpen(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("\u201C" + name
+ + "\u201D end tag with \u201Cselect\u201D open.");
+ }
+
+ private void errGarbageInColgroup() throws SAXException {
+ err("Garbage in \u201Ccolgroup\u201D fragment.");
+ }
+
+ private void errEndTagBr() throws SAXException {
+ err("End tag \u201Cbr\u201D.");
+ }
+
+ private void errNoElementToCloseButEndTagSeen(@Local String name)
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("No \u201C" + name + "\u201D element in scope but a \u201C"
+ + name + "\u201D end tag seen.");
+ }
+
+ private void errHtmlStartTagInForeignContext(@Local String name)
+ throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("HTML start tag \u201C" + name
+ + "\u201D in a foreign namespace context.");
+ }
+
+ private void errTableClosedWhileCaptionOpen() throws SAXException {
+ err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open.");
+ }
+
+ private void errNoTableRowToClose() throws SAXException {
+ err("No table row to close.");
+ }
+
+ private void errNonSpaceInTable() throws SAXException {
+ err("Misplaced non-space characters insided a table.");
+ }
+
+ private void errUnclosedChildrenInRuby() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Unclosed children in \u201Cruby\u201D.");
+ }
+
+ private void errStartTagSeenWithoutRuby(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Start tag \u201C"
+ + name
+ + "\u201D seen without a \u201Cruby\u201D element being open.");
+ }
+
+ private void errSelfClosing() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("Self-closing syntax (\u201C/>\u201D) used on a non-void HTML element. Ignoring the slash and treating as a start tag.");
+ }
+
+ private void errNoCheckUnclosedElementsOnStack() throws SAXException {
+ errNoCheck("Unclosed elements on stack.");
+ }
+
+ private void errEndTagDidNotMatchCurrentOpenElement(@Local String name,
+ @Local String currOpenName) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End tag \u201C"
+ + name
+ + "\u201D did not match the name of the current open element (\u201C"
+ + currOpenName + "\u201D).");
+ }
+
+ private void errEndTagViolatesNestingRules(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End tag \u201C" + name + "\u201D violates nesting rules.");
+ }
+
+ private void errEofWithUnclosedElements() throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End of file seen and there were open elements.");
+ // just report all remaining unclosed elements
+ errListUnclosedStartTags(0);
+ }
+
+ /**
+ * Reports arriving at/near end of document with unclosed elements remaining.
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ private void errEndWithUnclosedElements(@Local String name) throws SAXException {
+ if (errorHandler == null) {
+ return;
+ }
+ errNoCheck("End tag for \u201C"
+ + name
+ + "\u201D seen, but there were unclosed elements.");
+ // just report all remaining unclosed elements
+ errListUnclosedStartTags(0);
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java
new file mode 100644
index 000000000..c4e2d4afb
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2009-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+/**
+ * Interface for exposing the state of the HTML5 tree builder so that the
+ * interface can be implemented by the tree builder itself and by snapshots.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface TreeBuilderState<T> {
+
+ /**
+ * Returns the stack.
+ *
+ * @return the stack
+ */
+ public StackNode<T>[] getStack();
+
+ /**
+ * Returns the listOfActiveFormattingElements.
+ *
+ * @return the listOfActiveFormattingElements
+ */
+ public StackNode<T>[] getListOfActiveFormattingElements();
+
+ /**
+ * Returns the stack of template insertion modes.
+ *
+ * @return the stack of template insertion modes
+ */
+ public int[] getTemplateModeStack();
+
+ /**
+ * Returns the formPointer.
+ *
+ * @return the formPointer
+ */
+ public T getFormPointer();
+
+ /**
+ * Returns the headPointer.
+ *
+ * @return the headPointer
+ */
+ public T getHeadPointer();
+
+ /**
+ * Returns the deepTreeSurrogateParent.
+ *
+ * @return the deepTreeSurrogateParent
+ */
+ public T getDeepTreeSurrogateParent();
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public int getMode();
+
+ /**
+ * Returns the originalMode.
+ *
+ * @return the originalMode
+ */
+ public int getOriginalMode();
+
+ /**
+ * Returns the framesetOk.
+ *
+ * @return the framesetOk
+ */
+ public boolean isFramesetOk();
+
+ /**
+ * Returns the needToDropLF.
+ *
+ * @return the needToDropLF
+ */
+ public boolean isNeedToDropLF();
+
+ /**
+ * Returns the quirks.
+ *
+ * @return the quirks
+ */
+ public boolean isQuirks();
+
+ /**
+ * Return the length of the stack.
+ * @return the length of the stack.
+ */
+ public int getStackLength();
+
+ /**
+ * Return the length of the list of active formatting elements.
+ * @return the length of the list of active formatting elements.
+ */
+ public int getListOfActiveFormattingElementsLength();
+
+ /**
+ * Return the length of the stack of template insertion modes.
+ *
+ * @return the length of the stack of template insertion modes.
+ */
+ int getTemplateModeStackLength();
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java
new file mode 100644
index 000000000..ec79185ec
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import nu.validator.htmlparser.annotation.NoLength;
+
+/**
+ * An UTF-16 buffer that knows the start and end indeces of its unconsumed
+ * content.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class UTF16Buffer {
+
+ /**
+ * The backing store of the buffer. May be larger than the logical content
+ * of this <code>UTF16Buffer</code>.
+ */
+ private final @NoLength char[] buffer;
+
+ /**
+ * The index of the first unconsumed character in the backing buffer.
+ */
+ private int start;
+
+ /**
+ * The index of the slot immediately after the last character in the backing
+ * buffer that is part of the logical content of this
+ * <code>UTF16Buffer</code>.
+ */
+ private int end;
+
+ //[NOCPP[
+
+ /**
+ * Constructor for wrapping an existing UTF-16 code unit array.
+ *
+ * @param buffer
+ * the backing buffer
+ * @param start
+ * the index of the first character to consume
+ * @param end
+ * the index immediately after the last character to consume
+ */
+ public UTF16Buffer(@NoLength char[] buffer, int start, int end) {
+ this.buffer = buffer;
+ this.start = start;
+ this.end = end;
+ }
+
+ // ]NOCPP]
+
+ /**
+ * Returns the start index.
+ *
+ * @return the start index
+ */
+ public int getStart() {
+ return start;
+ }
+
+ /**
+ * Sets the start index.
+ *
+ * @param start
+ * the start index
+ */
+ public void setStart(int start) {
+ this.start = start;
+ }
+
+ /**
+ * Returns the backing buffer.
+ *
+ * @return the backing buffer
+ */
+ public @NoLength char[] getBuffer() {
+ return buffer;
+ }
+
+ /**
+ * Returns the end index.
+ *
+ * @return the end index
+ */
+ public int getEnd() {
+ return end;
+ }
+
+ /**
+ * Checks if the buffer has data left.
+ *
+ * @return <code>true</code> if there's data left
+ */
+ public boolean hasMore() {
+ return start < end;
+ }
+
+ /**
+ * Returns <code>end - start</code>.
+ *
+ * @return <code>end - start</code>
+ */
+ public int getLength() {
+ return end - start;
+ }
+
+ /**
+ * Adjusts the start index to skip over the first character if it is a line
+ * feed and the previous character was a carriage return.
+ *
+ * @param lastWasCR
+ * whether the previous character was a carriage return
+ */
+ public void adjust(boolean lastWasCR) {
+ if (lastWasCR && buffer[start] == '\n') {
+ start++;
+ }
+ }
+
+ /**
+ * Sets the end index.
+ *
+ * @param end
+ * the end index
+ */
+ public void setEnd(int end) {
+ this.end = end;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html
new file mode 100644
index 000000000..6d029a13e
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package contains the bulk of parser internals. Only implementors of
+additional tree builders or token handlers should look here.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java
new file mode 100644
index 000000000..42d7a837f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+
+import nu.validator.htmlparser.common.ByteReadable;
+
+/**
+ * The BOM sniffing part of the HTML5 encoding sniffing algorithm.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class BomSniffer {
+
+ private final ByteReadable source;
+
+ /**
+ * @param source
+ */
+ public BomSniffer(final ByteReadable source) {
+ this.source = source;
+ }
+
+ Encoding sniff() throws IOException {
+ int b = source.readByte();
+ if (b == 0xEF) { // UTF-8
+ b = source.readByte();
+ if (b == 0xBB) {
+ b = source.readByte();
+ if (b == 0xBF) {
+ return Encoding.UTF8;
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ } else if (b == 0xFF) { // little-endian
+ b = source.readByte();
+ if (b == 0xFE) {
+ return Encoding.UTF16LE;
+ } else {
+ return null;
+ }
+ } else if (b == 0xFE) { // big-endian UTF-16
+ b = source.readByte();
+ if (b == 0xFF) {
+ return Encoding.UTF16BE;
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java
new file mode 100644
index 000000000..1a2d49746
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+public enum Confidence {
+ TENTATIVE, CERTAIN
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java
new file mode 100644
index 000000000..f0b0cc55d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2005, 2006, 2007 Henri Sivonen
+ * Copyright (c) 2007-2013 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.charset.UnsupportedCharsetException;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.EncodingDeclarationHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.extra.NormalizationChecker;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.impl.UTF16Buffer;
+import nu.validator.htmlparser.rewindable.RewindableInputStream;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class Driver implements EncodingDeclarationHandler {
+
+ /**
+ * The input UTF-16 code unit stream. If a byte stream was given, this
+ * object is an instance of <code>HtmlInputStreamReader</code>.
+ */
+ private Reader reader;
+
+ /**
+ * The reference to the rewindable byte stream. <code>null</code> if
+ * prohibited or no longer needed.
+ */
+ private RewindableInputStream rewindableInputStream;
+
+ private boolean swallowBom;
+
+ private Encoding characterEncoding;
+
+ private boolean allowRewinding = true;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private final Tokenizer tokenizer;
+
+ private Confidence confidence;
+
+ /**
+ * Used for NFC checking if non-<code>null</code>, source code capture,
+ * etc.
+ */
+ private CharacterHandler[] characterHandlers = new CharacterHandler[0];
+
+ public Driver(Tokenizer tokenizer) {
+ this.tokenizer = tokenizer;
+ tokenizer.setEncodingDeclarationHandler(this);
+ }
+
+ /**
+ * Returns the allowRewinding.
+ *
+ * @return the allowRewinding
+ */
+ public boolean isAllowRewinding() {
+ return allowRewinding;
+ }
+
+ /**
+ * Sets the allowRewinding.
+ *
+ * @param allowRewinding
+ * the allowRewinding to set
+ */
+ public void setAllowRewinding(boolean allowRewinding) {
+ this.allowRewinding = allowRewinding;
+ }
+
+ /**
+ * Turns NFC checking on or off.
+ *
+ * @param enable
+ * <code>true</code> if checking on
+ */
+ public void setCheckingNormalization(boolean enable) {
+ if (enable) {
+ if (isCheckingNormalization()) {
+ return;
+ } else {
+ NormalizationChecker normalizationChecker = new NormalizationChecker(tokenizer);
+ normalizationChecker.setErrorHandler(tokenizer.getErrorHandler());
+
+ }
+ } else {
+ if (isCheckingNormalization()) {
+ CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length - 1];
+ boolean skipped = false;
+ int j = 0;
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ if (!(!skipped && (ch instanceof NormalizationChecker))) {
+ newHandlers[j] = ch;
+ j++;
+ }
+ }
+ characterHandlers = newHandlers;
+ } else {
+ return;
+ }
+ }
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ if (characterHandler == null) {
+ throw new IllegalArgumentException("Null argument.");
+ }
+ CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length + 1];
+ System.arraycopy(characterHandlers, 0, newHandlers, 0,
+ characterHandlers.length);
+ newHandlers[characterHandlers.length] = characterHandler;
+ characterHandlers = newHandlers;
+ }
+
+ /**
+ * Query if checking normalization.
+ *
+ * @return <code>true</code> if checking on
+ */
+ public boolean isCheckingNormalization() {
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ if (ch instanceof NormalizationChecker) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Runs the tokenization. This is the main entry point.
+ *
+ * @param is
+ * the input source
+ * @throws SAXException
+ * on fatal error (if configured to treat XML violations as
+ * fatal) or if the token handler threw
+ * @throws IOException
+ * if the stream threw
+ */
+ public void tokenize(InputSource is) throws SAXException, IOException {
+ if (is == null) {
+ throw new IllegalArgumentException("InputSource was null.");
+ }
+ tokenizer.start();
+ confidence = Confidence.TENTATIVE;
+ swallowBom = true;
+ rewindableInputStream = null;
+ tokenizer.initLocation(is.getPublicId(), is.getSystemId());
+ this.reader = is.getCharacterStream();
+ this.characterEncoding = encodingFromExternalDeclaration(is.getEncoding());
+ if (this.reader == null) {
+ InputStream inputStream = is.getByteStream();
+ if (inputStream == null) {
+ throw new SAXException("Both streams in InputSource were null.");
+ }
+ if (this.characterEncoding == null) {
+ if (allowRewinding) {
+ inputStream = rewindableInputStream = new RewindableInputStream(
+ inputStream);
+ }
+ this.reader = new HtmlInputStreamReader(inputStream,
+ tokenizer.getErrorHandler(), tokenizer, this, heuristics);
+ } else {
+ if (this.characterEncoding != Encoding.UTF8) {
+ warnWithoutLocation("Legacy encoding \u201C"
+ + this.characterEncoding.getCanonName()
+ + "\u201D used. Documents should use UTF-8.");
+ }
+ becomeConfident();
+ this.reader = new HtmlInputStreamReader(inputStream,
+ tokenizer.getErrorHandler(), tokenizer, this, this.characterEncoding);
+ }
+ } else {
+ becomeConfident();
+ }
+ Throwable t = null;
+ try {
+ for (;;) {
+ try {
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.start();
+ }
+ runStates();
+ break;
+ } catch (ReparseException e) {
+ if (rewindableInputStream == null) {
+ tokenizer.fatal("Changing encoding at this point would need non-streamable behavior.");
+ } else {
+ rewindableInputStream.rewind();
+ becomeConfident();
+ this.reader = new HtmlInputStreamReader(
+ rewindableInputStream, tokenizer.getErrorHandler(), tokenizer,
+ this, this.characterEncoding);
+ }
+ continue;
+ }
+ }
+ } catch (Throwable tr) {
+ t = tr;
+ } finally {
+ try {
+ tokenizer.end();
+ characterEncoding = null;
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.end();
+ }
+ reader.close();
+ reader = null;
+ rewindableInputStream = null;
+ } catch (Throwable tr) {
+ if (t == null) {
+ t = tr;
+ } // else drop the later throwable
+ }
+ if (t != null) {
+ if (t instanceof IOException) {
+ throw (IOException) t;
+ } else if (t instanceof SAXException) {
+ throw (SAXException) t;
+ } else if (t instanceof RuntimeException) {
+ throw (RuntimeException) t;
+ } else if (t instanceof Error) {
+ throw (Error) t;
+ } else {
+ // impossible
+ throw new RuntimeException(t);
+ }
+ }
+ }
+ }
+
+ void dontSwallowBom() {
+ swallowBom = false;
+ }
+
+ private void runStates() throws SAXException, IOException {
+ char[] buffer = new char[2048];
+ UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0);
+ boolean lastWasCR = false;
+ int len = -1;
+ if ((len = reader.read(buffer)) != -1) {
+ assert len > 0;
+ int streamOffset = 0;
+ int offset = 0;
+ int length = len;
+ if (swallowBom) {
+ if (buffer[0] == '\uFEFF') {
+ streamOffset = -1;
+ offset = 1;
+ length--;
+ }
+ }
+ if (length > 0) {
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.characters(buffer, offset, length);
+ }
+ tokenizer.setTransitionBaseOffset(streamOffset);
+ bufr.setStart(offset);
+ bufr.setEnd(offset + length);
+ while (bufr.hasMore()) {
+ bufr.adjust(lastWasCR);
+ lastWasCR = false;
+ if (bufr.hasMore()) {
+ lastWasCR = tokenizer.tokenizeBuffer(bufr);
+ }
+ }
+ }
+ streamOffset = length;
+ while ((len = reader.read(buffer)) != -1) {
+ assert len > 0;
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ ch.characters(buffer, 0, len);
+ }
+ tokenizer.setTransitionBaseOffset(streamOffset);
+ bufr.setStart(0);
+ bufr.setEnd(len);
+ while (bufr.hasMore()) {
+ bufr.adjust(lastWasCR);
+ lastWasCR = false;
+ if (bufr.hasMore()) {
+ lastWasCR = tokenizer.tokenizeBuffer(bufr);
+ }
+ }
+ streamOffset += len;
+ }
+ }
+ tokenizer.eof();
+ }
+
+ public void setEncoding(Encoding encoding, Confidence confidence) {
+ this.characterEncoding = encoding;
+ if (confidence == Confidence.CERTAIN) {
+ becomeConfident();
+ }
+ }
+
+ public boolean internalEncodingDeclaration(String internalCharset)
+ throws SAXException {
+ try {
+ internalCharset = Encoding.toAsciiLowerCase(internalCharset);
+ Encoding cs;
+ if ("utf-16".equals(internalCharset)
+ || "utf-16be".equals(internalCharset)
+ || "utf-16le".equals(internalCharset)) {
+ tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
+ + internalCharset
+ + "\u201D which is not an ASCII superset. Continuing as if the encoding had been \u201Cutf-8\u201D.");
+ cs = Encoding.UTF8;
+ internalCharset = "utf-8";
+ } else {
+ cs = Encoding.forName(internalCharset);
+ }
+ Encoding actual = cs.getActualHtmlEncoding();
+ if (actual == null) {
+ actual = cs;
+ }
+ if (!actual.isAsciiSuperset()) {
+ tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
+ + internalCharset
+ + "\u201D which is not an ASCII superset. Not changing the encoding.");
+ return false;
+ }
+ if (characterEncoding == null) {
+ // Reader case
+ return true;
+ }
+ if (characterEncoding == actual) {
+ becomeConfident();
+ return true;
+ }
+ if (confidence == Confidence.CERTAIN && actual != characterEncoding) {
+ tokenizer.errTreeBuilder("Internal encoding declaration \u201C"
+ + internalCharset
+ + "\u201D disagrees with the actual encoding of the document (\u201C"
+ + characterEncoding.getCanonName() + "\u201D).");
+ } else {
+ Encoding newEnc = whineAboutEncodingAndReturnActual(
+ internalCharset, cs);
+ tokenizer.errTreeBuilder("Changing character encoding \u201C"
+ + internalCharset + "\u201D and reparsing.");
+ characterEncoding = newEnc;
+ throw new ReparseException();
+ }
+ return true;
+ } catch (UnsupportedCharsetException e) {
+ tokenizer.errTreeBuilder("Internal encoding declaration named an unsupported chararacter encoding \u201C"
+ + internalCharset + "\u201D.");
+ return false;
+ }
+ }
+
+ /**
+ *
+ */
+ private void becomeConfident() {
+ if (rewindableInputStream != null) {
+ rewindableInputStream.willNotRewind();
+ }
+ confidence = Confidence.CERTAIN;
+ tokenizer.becomeConfident();
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics
+ * the heuristics to set
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ }
+
+ /**
+ * Reports a warning without line/col
+ *
+ * @param message
+ * the message
+ * @throws SAXException
+ */
+ protected void warnWithoutLocation(String message) throws SAXException {
+ ErrorHandler errorHandler = tokenizer.getErrorHandler();
+ if (errorHandler == null) {
+ return;
+ }
+ SAXParseException spe = new SAXParseException(message, null,
+ tokenizer.getSystemId(), -1, -1);
+ errorHandler.warning(spe);
+ }
+
+ /**
+ * Initializes a decoder from external decl.
+ */
+ protected Encoding encodingFromExternalDeclaration(String encoding)
+ throws SAXException {
+ if (encoding == null) {
+ return null;
+ }
+ encoding = Encoding.toAsciiLowerCase(encoding);
+ try {
+ Encoding cs = Encoding.forName(encoding);
+ if ("utf-16".equals(cs.getCanonName())
+ || "utf-32".equals(cs.getCanonName())) {
+ swallowBom = false;
+ }
+ return whineAboutEncodingAndReturnActual(encoding, cs);
+ } catch (UnsupportedCharsetException e) {
+ tokenizer.err("Unsupported character encoding name: \u201C" + encoding
+ + "\u201D. Will sniff.");
+ swallowBom = true;
+ }
+ return null; // keep the compiler happy
+ }
+
+ /**
+ * @param encoding
+ * @param cs
+ * @return
+ * @throws SAXException
+ */
+ protected Encoding whineAboutEncodingAndReturnActual(String encoding,
+ Encoding cs) throws SAXException {
+ String canonName = cs.getCanonName();
+ if (!cs.isRegistered()) {
+ if (encoding.startsWith("x-")) {
+ tokenizer.err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding. (Charmod C022)");
+ } else {
+ tokenizer.err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
+ }
+ } else if (!canonName.equals(encoding)) {
+ tokenizer.err("The encoding \u201C"
+ + encoding
+ + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
+ + canonName + "\u201D. (Charmod C024)");
+ }
+ if (cs.isShouldNot()) {
+ tokenizer.warn("Authors should not use the character encoding \u201C"
+ + encoding
+ + "\u201D. It is recommended to use \u201CUTF-8\u201D.");
+ } else if (cs.isLikelyEbcdic()) {
+ tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D.");
+ } else if (cs.isObscure()) {
+ tokenizer.warn("The character encoding \u201C"
+ + encoding
+ + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
+ }
+ Encoding actual = cs.getActualHtmlEncoding();
+ if (actual == null) {
+ return cs;
+ } else {
+ tokenizer.warn("Using \u201C" + actual.getCanonName()
+ + "\u201D instead of the declared encoding \u201C"
+ + encoding + "\u201D.");
+ return actual;
+ }
+ }
+
+ private class ReparseException extends SAXException {
+
+ }
+
+ void notifyAboutMetaBoundary() {
+ tokenizer.notifyAboutMetaBoundary();
+ }
+
+ /**
+ * @param commentPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ tokenizer.setCommentPolicy(commentPolicy);
+ }
+
+ /**
+ * @param contentNonXmlCharPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ }
+
+ /**
+ * @param contentSpacePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ tokenizer.setContentSpacePolicy(contentSpacePolicy);
+ }
+
+ /**
+ * @param eh
+ * @see nu.validator.htmlparser.impl.Tokenizer#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler eh) {
+ tokenizer.setErrorHandler(eh);
+ for (int i = 0; i < characterHandlers.length; i++) {
+ CharacterHandler ch = characterHandlers[i];
+ if (ch instanceof NormalizationChecker) {
+ NormalizationChecker nc = (NormalizationChecker) ch;
+ nc.setErrorHandler(eh);
+ }
+ }
+ }
+
+ public void setTransitionHandler(TransitionHandler transitionHandler) {
+ if (tokenizer instanceof ErrorReportingTokenizer) {
+ ErrorReportingTokenizer ert = (ErrorReportingTokenizer) tokenizer;
+ ert.setTransitionHandler(transitionHandler);
+ } else if (transitionHandler != null) {
+ throw new IllegalStateException("Attempt to set a transition handler on a plain tokenizer.");
+ }
+ }
+
+ /**
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHtml4ModeCompatibleWithXhtml1Schemata(boolean)
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+
+ /**
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+
+ /**
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ tokenizer.setNamePolicy(namePolicy);
+ }
+
+ /**
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ tokenizer.setXmlnsPolicy(xmlnsPolicy);
+ }
+
+ public String getCharacterEncoding() throws SAXException {
+ return characterEncoding.getCanonName();
+ }
+
+ public Locator getDocumentLocator() {
+ return tokenizer;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java
new file mode 100644
index 000000000..3bbc606fa
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java
@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2006 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderMalfunctionError;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+
+public class Encoding {
+
+ public static final Encoding UTF8;
+
+ public static final Encoding UTF16;
+
+ public static final Encoding UTF16LE;
+
+ public static final Encoding UTF16BE;
+
+ public static final Encoding WINDOWS1252;
+
+ private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" };
+
+ private static String[] BANNED = { "bocu1", "cesu8", "compoundtext",
+ "iscii91", "macarabic", "maccentraleurroman", "maccroatian",
+ "maccyrillic", "macdevanagari", "macfarsi", "macgreek",
+ "macgujarati", "macgurmukhi", "machebrew", "macicelandic",
+ "macroman", "macromanian", "macthai", "macturkish", "macukranian",
+ "scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname",
+ "xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom",
+ "xutf32lebom", "xutf16oppositeendian", "xutf16platformendian",
+ "xutf32oppositeendian", "xutf32platformendian" };
+
+ private static String[] NOT_OBSCURE = { "big5", "big5hkscs", "eucjp",
+ "euckr", "gb18030", "gbk", "iso2022jp", "iso2022kr", "iso88591",
+ "iso885913", "iso885915", "iso88592", "iso88593", "iso88594",
+ "iso88595", "iso88596", "iso88597", "iso88598", "iso88599",
+ "koi8r", "shiftjis", "tis620", "usascii", "utf16", "utf16be",
+ "utf16le", "utf8", "windows1250", "windows1251", "windows1252",
+ "windows1253", "windows1254", "windows1255", "windows1256",
+ "windows1257", "windows1258" };
+
+ private static Map<String, Encoding> encodingByCookedName = new HashMap<String, Encoding>();
+
+ private final String canonName;
+
+ private final Charset charset;
+
+ private final boolean asciiSuperset;
+
+ private final boolean obscure;
+
+ private final boolean shouldNot;
+
+ private final boolean likelyEbcdic;
+
+ private Encoding actualHtmlEncoding = null;
+
+ static {
+ byte[] testBuf = new byte[0x7F];
+ for (int i = 0; i < 0x7F; i++) {
+ if (isAsciiSupersetnessSensitive(i)) {
+ testBuf[i] = (byte) i;
+ } else {
+ testBuf[i] = (byte) 0x20;
+ }
+ }
+
+ Set<Encoding> encodings = new HashSet<Encoding>();
+
+ SortedMap<String, Charset> charsets = Charset.availableCharsets();
+ for (Map.Entry<String, Charset> entry : charsets.entrySet()) {
+ Charset cs = entry.getValue();
+ String name = toNameKey(cs.name());
+ String canonName = toAsciiLowerCase(cs.name());
+ if (!isBanned(name)) {
+ name = name.intern();
+ boolean asciiSuperset = asciiMapsToBasicLatin(testBuf, cs);
+ Encoding enc = new Encoding(canonName.intern(), cs,
+ asciiSuperset, isObscure(name), isShouldNot(name),
+ isLikelyEbcdic(name, asciiSuperset));
+ encodings.add(enc);
+ Set<String> aliases = cs.aliases();
+ for (String alias : aliases) {
+ encodingByCookedName.put(toNameKey(alias).intern(), enc);
+ }
+ }
+ }
+ // Overwrite possible overlapping aliases with the real things--just in
+ // case
+ for (Encoding encoding : encodings) {
+ encodingByCookedName.put(toNameKey(encoding.getCanonName()),
+ encoding);
+ }
+ UTF8 = forName("utf-8");
+ UTF16 = forName("utf-16");
+ UTF16BE = forName("utf-16be");
+ UTF16LE = forName("utf-16le");
+ WINDOWS1252 = forName("windows-1252");
+ try {
+ forName("iso-8859-1").actualHtmlEncoding = forName("windows-1252");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("iso-8859-9").actualHtmlEncoding = forName("windows-1254");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("iso-8859-11").actualHtmlEncoding = forName("windows-874");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("x-iso-8859-11").actualHtmlEncoding = forName("windows-874");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("tis-620").actualHtmlEncoding = forName("windows-874");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("gb_2312-80").actualHtmlEncoding = forName("gbk");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ forName("gb2312").actualHtmlEncoding = forName("gbk");
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ encodingByCookedName.put("x-x-big5", forName("big5"));
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ encodingByCookedName.put("euc-kr", forName("windows-949"));
+ } catch (UnsupportedCharsetException e) {
+ }
+ try {
+ encodingByCookedName.put("ks_c_5601-1987", forName("windows-949"));
+ } catch (UnsupportedCharsetException e) {
+ }
+ }
+
+ private static boolean isAsciiSupersetnessSensitive(int c) {
+ return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0x22)
+ || (c >= 0x26 && c <= 0x27) || (c >= 0x2C && c <= 0x3F)
+ || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A);
+ }
+
+ private static boolean isObscure(String lowerCasePreferredIanaName) {
+ return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1);
+ }
+
+ private static boolean isBanned(String lowerCasePreferredIanaName) {
+ if (lowerCasePreferredIanaName.startsWith("xibm")) {
+ return true;
+ }
+ return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1);
+ }
+
+ private static boolean isShouldNot(String lowerCasePreferredIanaName) {
+ return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1);
+ }
+
+ /**
+ * @param testBuf
+ * @param cs
+ */
+ private static boolean asciiMapsToBasicLatin(byte[] testBuf, Charset cs) {
+ CharsetDecoder dec = cs.newDecoder();
+ dec.onMalformedInput(CodingErrorAction.REPORT);
+ dec.onUnmappableCharacter(CodingErrorAction.REPORT);
+ Reader r = new InputStreamReader(new ByteArrayInputStream(testBuf), dec);
+ try {
+ for (int i = 0; i < 0x7F; i++) {
+ if (isAsciiSupersetnessSensitive(i)) {
+ if (r.read() != i) {
+ return false;
+ }
+ } else {
+ if (r.read() != 0x20) {
+ return false;
+ }
+ }
+ }
+ } catch (IOException e) {
+ return false;
+ } catch (Exception e) {
+ return false;
+ } catch (CoderMalfunctionError e) {
+ return false;
+ }
+
+ return true;
+ }
+
+ private static boolean isLikelyEbcdic(String canonName,
+ boolean asciiSuperset) {
+ if (!asciiSuperset) {
+ return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm"));
+ } else {
+ return false;
+ }
+ }
+
+ public static Encoding forName(String name) {
+ Encoding rv = encodingByCookedName.get(toNameKey(name));
+ if (rv == null) {
+ throw new UnsupportedCharsetException(name);
+ } else {
+ return rv;
+ }
+ }
+
+ public static String toNameKey(String str) {
+ if (str == null) {
+ return null;
+ }
+ int j = 0;
+ char[] buf = new char[str.length()];
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (!((c >= '\t' && c <= '\r') || (c >= '\u0020' && c <= '\u002F')
+ || (c >= '\u003A' && c <= '\u0040')
+ || (c >= '\u005B' && c <= '\u0060') || (c >= '\u007B' && c <= '\u007E'))) {
+ buf[j] = c;
+ j++;
+ }
+ }
+ return new String(buf, 0, j);
+ }
+
+ public static String toAsciiLowerCase(String str) {
+ if (str == null) {
+ return null;
+ }
+ char[] buf = new char[str.length()];
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ buf[i] = c;
+ }
+ return new String(buf);
+ }
+
+ /**
+ * @param canonName
+ * @param charset
+ * @param asciiSuperset
+ * @param obscure
+ * @param shouldNot
+ * @param likelyEbcdic
+ */
+ private Encoding(final String canonName, final Charset charset,
+ final boolean asciiSuperset, final boolean obscure,
+ final boolean shouldNot, final boolean likelyEbcdic) {
+ this.canonName = canonName;
+ this.charset = charset;
+ this.asciiSuperset = asciiSuperset;
+ this.obscure = obscure;
+ this.shouldNot = shouldNot;
+ this.likelyEbcdic = likelyEbcdic;
+ }
+
+ /**
+ * Returns the asciiSuperset.
+ *
+ * @return the asciiSuperset
+ */
+ public boolean isAsciiSuperset() {
+ return asciiSuperset;
+ }
+
+ /**
+ * Returns the canonName.
+ *
+ * @return the canonName
+ */
+ public String getCanonName() {
+ return canonName;
+ }
+
+ /**
+ * Returns the likelyEbcdic.
+ *
+ * @return the likelyEbcdic
+ */
+ public boolean isLikelyEbcdic() {
+ return likelyEbcdic;
+ }
+
+ /**
+ * Returns the obscure.
+ *
+ * @return the obscure
+ */
+ public boolean isObscure() {
+ return obscure;
+ }
+
+ /**
+ * Returns the shouldNot.
+ *
+ * @return the shouldNot
+ */
+ public boolean isShouldNot() {
+ return shouldNot;
+ }
+
+ public boolean isRegistered() {
+ return !canonName.startsWith("x-");
+ }
+
+ /**
+ * @return
+ * @see java.nio.charset.Charset#canEncode()
+ */
+ public boolean canEncode() {
+ return charset.canEncode();
+ }
+
+ /**
+ * @return
+ * @see java.nio.charset.Charset#newDecoder()
+ */
+ public CharsetDecoder newDecoder() {
+ return charset.newDecoder();
+ }
+
+ /**
+ * @return
+ * @see java.nio.charset.Charset#newEncoder()
+ */
+ public CharsetEncoder newEncoder() {
+ return charset.newEncoder();
+ }
+
+ /**
+ * Returns the actualHtmlEncoding.
+ *
+ * @return the actualHtmlEncoding
+ */
+ public Encoding getActualHtmlEncoding() {
+ return actualHtmlEncoding;
+ }
+
+ public static void main(String[] args) {
+ for (Map.Entry<String, Encoding> entry : encodingByCookedName.entrySet()) {
+ String name = entry.getKey();
+ Encoding enc = entry.getValue();
+ System.out.printf(
+ "%21s: canon %21s, obs %5s, reg %5s, asc %5s, ebc %5s\n",
+ name, enc.getCanonName(), enc.isObscure(),
+ enc.isRegistered(), enc.isAsciiSuperset(),
+ enc.isLikelyEbcdic());
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
new file mode 100644
index 000000000..413f0d9e9
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2013 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import nu.validator.htmlparser.common.ByteReadable;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.extra.ChardetSniffer;
+import nu.validator.htmlparser.extra.IcuDetectorSniffer;
+import nu.validator.htmlparser.impl.Tokenizer;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+/**
+ * Be very careful with this class. It is not a general-purpose subclass of of
+ * <code>Reader</code>. Instead, it is the minimal implementation that does
+ * what <code>Tokenizer</code> needs while being an instance of
+ * <code>Reader</code>.
+ *
+ * The only reason why this is a public class is that it needs to be visible to
+ * test code in another package.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class HtmlInputStreamReader extends Reader implements
+ ByteReadable, Locator {
+
+ private static final int SNIFFING_LIMIT = 1024;
+
+ private final InputStream inputStream;
+
+ private final ErrorHandler errorHandler;
+
+ private final Tokenizer tokenizer;
+
+ private final Driver driver;
+
+ private CharsetDecoder decoder = null;
+
+ private boolean sniffing = true;
+
+ private int limit = 0;
+
+ private int position = 0;
+
+ private int bytesRead = 0;
+
+ private boolean eofSeen = false;
+
+ private boolean shouldReadBytes = false;
+
+ private boolean charsetBoundaryPassed = false;
+
+ private final byte[] byteArray = new byte[4096]; // Length must be >=
+
+ // SNIFFING_LIMIT
+
+ private final ByteBuffer byteBuffer = ByteBuffer.wrap(byteArray);
+
+ private boolean needToNotifyTokenizer = false;
+
+ private boolean flushing = false;
+
+ private int line = -1;
+
+ private int col = -1;
+
+ private int lineColPos;
+
+ private boolean hasPendingReplacementCharacter = false;
+
+ private boolean nextCharOnNewLine;
+
+ private boolean prevWasCR;
+
+ /**
+ * @param inputStream
+ * @param errorHandler
+ * @param locator
+ * @throws IOException
+ * @throws SAXException
+ */
+ public HtmlInputStreamReader(InputStream inputStream,
+ ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver,
+ Heuristics heuristics) throws SAXException, IOException {
+ this.inputStream = inputStream;
+ this.errorHandler = errorHandler;
+ this.tokenizer = tokenizer;
+ this.driver = driver;
+ this.sniffing = true;
+ Encoding encoding = (new BomSniffer(this)).sniff();
+ if (encoding == null) {
+ position = 0;
+ encoding = (new MetaSniffer(errorHandler, this)).sniff(this);
+ boolean declared = true;
+ if (encoding == null) {
+ declared = false;
+ } else if (encoding != Encoding.UTF8) {
+ warn("Legacy encoding \u201C"
+ + encoding.getCanonName()
+ + "\u201D used. Documents should use UTF-8.");
+ }
+ if (encoding == null
+ && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) {
+ encoding = (new ChardetSniffer(byteArray, limit)).sniff();
+ }
+ if (encoding == null
+ && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) {
+ position = 0;
+ encoding = (new IcuDetectorSniffer(this)).sniff();
+ }
+ sniffing = false;
+ if (encoding == null) {
+ encoding = Encoding.WINDOWS1252;
+ }
+ if (!declared) {
+ err("The character encoding was not declared. Proceeding using \u201C" + encoding.getCanonName() + "\u201D.");
+ }
+ if (driver != null) {
+ driver.setEncoding(encoding, Confidence.TENTATIVE);
+ }
+ } else {
+ if (encoding == Encoding.UTF8) {
+ if (driver != null) {
+ driver.setEncoding(Encoding.UTF8, Confidence.CERTAIN);
+ }
+ } else {
+ warn("Legacy encoding \u201C"
+ + encoding.getCanonName()
+ + "\u201D used. Documents should use UTF-8.");
+ if (driver != null) {
+ driver.setEncoding(Encoding.UTF16, Confidence.CERTAIN);
+ }
+ }
+ }
+ this.decoder = encoding.newDecoder();
+ sniffing = false;
+ position = 0;
+ bytesRead = 0;
+ byteBuffer.position(position);
+ byteBuffer.limit(limit);
+ initDecoder();
+ }
+
+ /**
+ *
+ */
+ private void initDecoder() {
+ this.decoder.onMalformedInput(CodingErrorAction.REPORT);
+ this.decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+
+ public HtmlInputStreamReader(InputStream inputStream,
+ ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver,
+ Encoding encoding) throws SAXException, IOException {
+ this.inputStream = inputStream;
+ this.errorHandler = errorHandler;
+ this.tokenizer = tokenizer;
+ this.driver = driver;
+ this.decoder = encoding.newDecoder();
+ this.sniffing = false;
+ position = 0;
+ bytesRead = 0;
+ byteBuffer.position(0);
+ byteBuffer.limit(0);
+ shouldReadBytes = true;
+ initDecoder();
+ }
+
+ @Override public void close() throws IOException {
+ inputStream.close();
+ }
+
+ @Override public int read(char[] charArray) throws IOException {
+ lineColPos = 0;
+ assert !sniffing;
+ assert charArray.length >= 2;
+ if (needToNotifyTokenizer) {
+ if (driver != null) {
+ driver.notifyAboutMetaBoundary();
+ }
+ needToNotifyTokenizer = false;
+ }
+ CharBuffer charBuffer = CharBuffer.wrap(charArray);
+ charBuffer.limit(charArray.length);
+ charBuffer.position(0);
+ if (flushing) {
+ decoder.flush(charBuffer);
+ // return -1 if zero
+ int cPos = charBuffer.position();
+ return cPos == 0 ? -1 : cPos;
+ }
+ if (hasPendingReplacementCharacter) {
+ charBuffer.put('\uFFFD');
+ hasPendingReplacementCharacter = false;
+ }
+ for (;;) {
+ if (shouldReadBytes) {
+ int oldLimit = byteBuffer.limit();
+ int readLen;
+ if (charsetBoundaryPassed) {
+ readLen = byteArray.length - oldLimit;
+ } else {
+ readLen = SNIFFING_LIMIT - oldLimit;
+ }
+ int num = inputStream.read(byteArray, oldLimit, readLen);
+ if (num == -1) {
+ eofSeen = true;
+ inputStream.close();
+ } else {
+ byteBuffer.position(0);
+ byteBuffer.limit(oldLimit + num);
+ }
+ shouldReadBytes = false;
+ }
+ boolean finalDecode = false;
+ for (;;) {
+ int oldBytePos = byteBuffer.position();
+ CoderResult cr = decoder.decode(byteBuffer, charBuffer,
+ finalDecode);
+ bytesRead += byteBuffer.position() - oldBytePos;
+ if (cr == CoderResult.OVERFLOW) {
+ // Decoder will remember surrogates
+ return charBuffer.position();
+ } else if (cr == CoderResult.UNDERFLOW) {
+ int remaining = byteBuffer.remaining();
+ if (!charsetBoundaryPassed) {
+ if (bytesRead + remaining >= SNIFFING_LIMIT) {
+ needToNotifyTokenizer = true;
+ charsetBoundaryPassed = true;
+ }
+ }
+
+ // XXX what happens if the entire byte buffer consists of
+ // a pathologically long malformed sequence?
+
+ // If the buffer was not fully consumed, there may be an
+ // incomplete byte sequence that needs to seed the next
+ // buffer.
+ if (remaining > 0) {
+ System.arraycopy(byteArray, byteBuffer.position(),
+ byteArray, 0, remaining);
+ }
+ byteBuffer.position(0);
+ byteBuffer.limit(remaining);
+ if (flushing) {
+ // The final decode was successful. Not sure if this
+ // ever happens.
+ // Let's get out in any case.
+ int cPos = charBuffer.position();
+ return cPos == 0 ? -1 : cPos;
+ } else if (eofSeen) {
+ // If there's something left, it isn't something that
+ // would be
+ // consumed in the middle of the stream. Rerun the loop
+ // once
+ // in the final mode.
+ shouldReadBytes = false;
+ finalDecode = true;
+ flushing = true;
+ continue;
+ } else {
+ // The usual stuff. Want more bytes next time.
+ shouldReadBytes = true;
+ int cPos = charBuffer.position();
+ if (cPos == 0) {
+ // No output. Read more bytes right away
+ break;
+ }
+ return cPos;
+ }
+ } else {
+ // The result is in error. No need to test.
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < cr.length(); i++) {
+ if (i > 0) {
+ sb.append(", ");
+ }
+ sb.append('\u201C');
+ sb.append(Integer.toHexString(byteBuffer.get() & 0xFF));
+ bytesRead++;
+ sb.append('\u201D');
+ }
+ if (charBuffer.hasRemaining()) {
+ charBuffer.put('\uFFFD');
+ } else {
+ hasPendingReplacementCharacter = true;
+ }
+ calculateLineAndCol(charBuffer);
+ if (cr.isMalformed()) {
+ err("Malformed byte sequence: " + sb + ".");
+ } else if (cr.isUnmappable()) {
+ err("Unmappable byte sequence: " + sb + ".");
+ } else {
+ throw new RuntimeException(
+ "CoderResult was none of overflow, underflow, malformed or unmappable.");
+ }
+ if (finalDecode) {
+ // These were the last bytes of input. Return without
+ // relooping.
+ // return -1 if zero
+ int cPos = charBuffer.position();
+ return cPos == 0 ? -1 : cPos;
+ }
+ }
+ }
+ }
+ }
+
+ private void calculateLineAndCol(CharBuffer charBuffer) {
+ if (tokenizer != null) {
+ if (lineColPos == 0) {
+ line = tokenizer.getLine();
+ col = tokenizer.getCol();
+ nextCharOnNewLine = tokenizer.isNextCharOnNewLine();
+ prevWasCR = tokenizer.isPrevCR();
+ }
+
+ char[] charArray = charBuffer.array();
+ int i = lineColPos;
+ while (i < charBuffer.position()) {
+ char c;
+ if (nextCharOnNewLine) {
+ line++;
+ col = 1;
+ nextCharOnNewLine = false;
+ } else {
+ col++;
+ }
+
+ c = charArray[i];
+ switch (c) {
+ case '\r':
+ nextCharOnNewLine = true;
+ prevWasCR = true;
+ break;
+ case '\n':
+ if (prevWasCR) {
+ col--;
+ } else {
+ nextCharOnNewLine = true;
+ }
+ break;
+ }
+ i++;
+ }
+ lineColPos = i;
+ }
+ }
+
+ public int readByte() throws IOException {
+ if (!sniffing) {
+ throw new IllegalStateException(
+ "readByte() called when not in the sniffing state.");
+ }
+ if (position == SNIFFING_LIMIT) {
+ return -1;
+ } else if (position < limit) {
+ return byteArray[position++] & 0xFF;
+ } else {
+ int num = inputStream.read(byteArray, limit, SNIFFING_LIMIT - limit);
+ if (num == -1) {
+ return -1;
+ } else {
+ limit += num;
+ return byteArray[position++] & 0xFF;
+ }
+ }
+ }
+
+ public static void main(String[] args) {
+ CharsetDecoder dec = Charset.forName("UTF-8").newDecoder();
+ dec.onMalformedInput(CodingErrorAction.REPORT);
+ dec.onUnmappableCharacter(CodingErrorAction.REPORT);
+ byte[] bytes = { (byte) 0xF0, (byte) 0x9D, (byte) 0x80, (byte) 0x80 };
+ byte[] bytes2 = { (byte) 0xB8, (byte) 0x80, 0x63, 0x64, 0x65 };
+ ByteBuffer byteBuf = ByteBuffer.wrap(bytes);
+ ByteBuffer byteBuf2 = ByteBuffer.wrap(bytes2);
+ char[] chars = new char[1];
+ CharBuffer charBuf = CharBuffer.wrap(chars);
+
+ CoderResult cr = dec.decode(byteBuf, charBuf, false);
+ System.out.println(cr);
+ System.out.println(byteBuf);
+ // byteBuf.get();
+ cr = dec.decode(byteBuf2, charBuf, false);
+ System.out.println(cr);
+ System.out.println(byteBuf2);
+
+ }
+
+ public int getColumnNumber() {
+ if (tokenizer != null) {
+ return col;
+ }
+ return -1;
+ }
+
+ public int getLineNumber() {
+ if (tokenizer != null) {
+ return line;
+ }
+ return -1;
+ }
+
+ public String getPublicId() {
+ if (tokenizer != null) {
+ return tokenizer.getPublicId();
+ }
+ return null;
+ }
+
+ public String getSystemId() {
+ if (tokenizer != null) {
+ return tokenizer.getSystemId();
+ }
+ return null;
+ }
+
+ /**
+ * @param string
+ * @throws SAXException
+ */
+ private void err(String message) throws IOException {
+ // TODO remove wrapping when changing read() to take a CharBuffer
+ try {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.error(spe);
+ }
+ } catch (SAXException e) {
+ throw (IOException) new IOException(e.getMessage()).initCause(e);
+ }
+ }
+
+ private void warn(String message) throws IOException {
+ // TODO remove wrapping when changing read() to take a CharBuffer
+ try {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.warning(spe);
+ }
+ } catch (SAXException e) {
+ throw (IOException) new IOException(e.getMessage()).initCause(e);
+ }
+ }
+
+ public Charset getCharset() {
+ return decoder.charset();
+ }
+
+ /**
+ * @see java.io.Reader#read()
+ */
+ @Override public int read() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @see java.io.Reader#read(char[], int, int)
+ */
+ @Override public int read(char[] cbuf, int off, int len) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @see java.io.Reader#read(java.nio.CharBuffer)
+ */
+ @Override public int read(CharBuffer target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public void switchEncoding(Encoding newEnc) {
+ this.decoder = newEnc.newDecoder();
+ initDecoder();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java
new file mode 100644
index 000000000..baa04e44f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.io;
+
+import java.io.IOException;
+import java.nio.charset.UnsupportedCharsetException;
+
+import nu.validator.htmlparser.common.ByteReadable;
+import nu.validator.htmlparser.impl.MetaScanner;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class MetaSniffer extends MetaScanner implements Locator {
+
+ private Encoding characterEncoding = null;
+
+ private final ErrorHandler errorHandler;
+
+ private final Locator locator;
+
+ private int line = 1;
+
+ private int col = 0;
+
+ private boolean prevWasCR = false;
+
+ public MetaSniffer(ErrorHandler eh, Locator locator) {
+ this.errorHandler = eh;
+ this.locator = locator;
+ this.characterEncoding = null;
+ }
+
+ /**
+ * -1 means end.
+ * @return
+ * @throws IOException
+ */
+ protected int read() throws IOException {
+ int b = readable.readByte();
+ // [NOCPP[
+ switch (b) {
+ case '\n':
+ if (!prevWasCR) {
+ line++;
+ col = 0;
+ }
+ prevWasCR = false;
+ break;
+ case '\r':
+ line++;
+ col = 0;
+ prevWasCR = true;
+ break;
+ default:
+ col++;
+ prevWasCR = false;
+ break;
+ }
+ // ]NOCPP]
+ return b;
+ }
+
+ /**
+ * Main loop.
+ *
+ * @return
+ *
+ * @throws SAXException
+ * @throws IOException
+ * @throws
+ */
+ public Encoding sniff(ByteReadable readable) throws SAXException, IOException {
+ this.readable = readable;
+ stateLoop(stateSave);
+ return characterEncoding;
+ }
+
+
+ /**
+ * @param string
+ * @throws SAXException
+ */
+ private void err(String message) throws SAXException {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.error(spe);
+ }
+ }
+
+ /**
+ * @param string
+ * @throws SAXException
+ */
+ private void warn(String message) throws SAXException {
+ if (errorHandler != null) {
+ SAXParseException spe = new SAXParseException(message, this);
+ errorHandler.warning(spe);
+ }
+ }
+
+ public int getColumnNumber() {
+ return col;
+ }
+
+ public int getLineNumber() {
+ return line;
+ }
+
+ public String getPublicId() {
+ if (locator != null) {
+ return locator.getPublicId();
+ }
+ return null;
+ }
+
+ public String getSystemId() {
+ if (locator != null) {
+ return locator.getSystemId();
+ }
+ return null;
+ }
+
+ protected boolean tryCharset(String encoding) throws SAXException {
+ encoding = Encoding.toAsciiLowerCase(encoding);
+ try {
+ // XXX spec says only UTF-16
+ if ("utf-16".equals(encoding) || "utf-16be".equals(encoding) || "utf-16le".equals(encoding) || "utf-32".equals(encoding) || "utf-32be".equals(encoding) || "utf-32le".equals(encoding)) {
+ this.characterEncoding = Encoding.UTF8;
+ err("The internal character encoding declaration specified \u201C" + encoding + "\u201D which is not a rough superset of ASCII. Using \u201CUTF-8\u201D instead.");
+ return true;
+ } else {
+ Encoding cs = Encoding.forName(encoding);
+ String canonName = cs.getCanonName();
+ if (!cs.isAsciiSuperset()) {
+ err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration. Continuing the sniffing algorithm.");
+ return false;
+ }
+ if (!cs.isRegistered()) {
+ if (encoding.startsWith("x-")) {
+ err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding. (Charmod C022)");
+ } else {
+ err("The encoding \u201C"
+ + encoding
+ + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
+ }
+ } else if (!cs.getCanonName().equals(encoding)) {
+ err("The encoding \u201C" + encoding
+ + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
+ + canonName + "\u201D. (Charmod C024)");
+ }
+ if (cs.isShouldNot()) {
+ warn("Authors should not use the character encoding \u201C"
+ + encoding
+ + "\u201D. It is recommended to use \u201CUTF-8\u201D.");
+ } else if (cs.isObscure()) {
+ warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
+ }
+ Encoding actual = cs.getActualHtmlEncoding();
+ if (actual == null) {
+ this.characterEncoding = cs;
+ } else {
+ warn("Using \u201C" + actual.getCanonName() + "\u201D instead of the declared encoding \u201C" + encoding + "\u201D.");
+ this.characterEncoding = actual;
+ }
+ return true;
+ }
+ } catch (UnsupportedCharsetException e) {
+ err("Unsupported character encoding name: \u201C" + encoding + "\u201D. Will continue sniffing.");
+ }
+ return false;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java
new file mode 100644
index 000000000..47a3d5eb0
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the Thai Open Source Software Center Ltd nor
+ * the names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package nu.validator.htmlparser.rewindable;
+
+public interface Rewindable {
+ void willNotRewind();
+
+ void rewind();
+
+ boolean canRewind();
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java
new file mode 100644
index 000000000..3a1cc1b91
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the Thai Open Source Software Center Ltd nor
+ * the names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package nu.validator.htmlparser.rewindable;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class RewindableInputStream extends InputStream implements Rewindable {
+ static class Block {
+ Block next;
+
+ final byte[] buf;
+
+ int used = 0;
+
+ static final int MIN_SIZE = 1024;
+
+ Block(int minSize) {
+ buf = new byte[Math.max(MIN_SIZE, minSize)];
+ }
+
+ Block() {
+ this(0);
+ }
+
+ void append(byte b) {
+ buf[used++] = b;
+ }
+
+ void append(byte[] b, int off, int len) {
+ System.arraycopy(b, off, buf, used, len);
+ used += len;
+ }
+ }
+
+ private Block head;
+
+ /**
+ * If curBlockAvail > 0, then there are curBlockAvail bytes available to be
+ * returned starting at curBlockPos in curBlock.buf.
+ */
+ private int curBlockAvail;
+
+ private Block curBlock;
+
+ private int curBlockPos;
+
+ private Block lastBlock;
+
+ /**
+ * true unless willNotRewind has been called
+ */
+ private boolean saving = true;
+
+ private final InputStream in;
+
+ private boolean pretendClosed = false;
+
+ /**
+ * true if we have got an EOF from the underlying InputStream
+ */
+ private boolean eof;
+
+ public RewindableInputStream(InputStream in) {
+ if (in == null)
+ throw new NullPointerException();
+ this.in = in;
+ }
+
+ public void close() throws IOException {
+ if (saving) {
+ curBlockAvail = 0;
+ curBlock = null;
+ pretendClosed = true;
+ } else {
+ head = null;
+ curBlock = null;
+ lastBlock = null;
+ saving = false;
+ curBlockAvail = 0;
+ in.close();
+ }
+ }
+
+ public void rewind() {
+ if (!saving)
+ throw new IllegalStateException("rewind() after willNotRewind()");
+ pretendClosed = false;
+ if (head == null)
+ return;
+ curBlock = head;
+ curBlockPos = 0;
+ curBlockAvail = curBlock.used;
+ }
+
+ public boolean canRewind() {
+ return saving;
+ }
+
+ public void willNotRewind() {
+ saving = false;
+ head = null;
+ lastBlock = null;
+ if (pretendClosed) {
+ pretendClosed = false;
+ try {
+ in.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+
+ public int read() throws IOException {
+ if (curBlockAvail > 0) {
+ int c = curBlock.buf[curBlockPos++] & 0xFF;
+ --curBlockAvail;
+ if (curBlockAvail == 0) {
+ curBlock = curBlock.next;
+ if (curBlock != null) {
+ curBlockPos = 0;
+ curBlockAvail = curBlock.used;
+ }
+ }
+ return c;
+ }
+ int c = in.read();
+ if (saving && c != -1) {
+ if (lastBlock == null)
+ lastBlock = head = new Block();
+ else if (lastBlock.used == lastBlock.buf.length)
+ lastBlock = lastBlock.next = new Block();
+ lastBlock.append((byte) c);
+ }
+ return c;
+ }
+
+ public int read(byte b[], int off, int len) throws IOException {
+ if (curBlockAvail == 0 && !saving)
+ return in.read(b, off, len);
+ if (b == null)
+ throw new NullPointerException();
+ if (len < 0)
+ throw new IndexOutOfBoundsException();
+ int nRead = 0;
+ if (curBlockAvail != 0) {
+ for (;;) {
+ if (len == 0)
+ return nRead;
+ b[off++] = curBlock.buf[curBlockPos++];
+ --len;
+ nRead++;
+ --curBlockAvail;
+ if (curBlockAvail == 0) {
+ curBlock = curBlock.next;
+ if (curBlock == null)
+ break;
+ curBlockAvail = curBlock.used;
+ curBlockPos = 0;
+ }
+ }
+ }
+ if (len == 0)
+ return nRead;
+ if (eof)
+ return nRead > 0 ? nRead : -1;
+ try {
+ int n = in.read(b, off, len);
+ if (n < 0) {
+ eof = true;
+ return nRead > 0 ? nRead : -1;
+ }
+ nRead += n;
+ if (saving) {
+ if (lastBlock == null)
+ lastBlock = head = new Block(n);
+ else if (lastBlock.buf.length - lastBlock.used < n) {
+ if (lastBlock.used != lastBlock.buf.length) {
+ int free = lastBlock.buf.length - lastBlock.used;
+ lastBlock.append(b, off, free);
+ off += free;
+ n -= free;
+ }
+ lastBlock = lastBlock.next = new Block(n);
+ }
+ lastBlock.append(b, off, n);
+ }
+ } catch (IOException e) {
+ eof = true;
+ if (nRead == 0)
+ throw e;
+ }
+ return nRead;
+ }
+
+ public int available() throws IOException {
+ if (curBlockAvail == 0)
+ return in.available();
+ int n = curBlockAvail;
+ for (Block b = curBlock.next; b != null; b = b.next)
+ n += b.used;
+ return n + in.available();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java
new file mode 100644
index 000000000..714053e70
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java
@@ -0,0 +1,1097 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.HashMap;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.impl.TreeBuilder;
+import nu.validator.htmlparser.io.Driver;
+import nu.validator.saxtree.Document;
+import nu.validator.saxtree.DocumentFragment;
+import nu.validator.saxtree.TreeParser;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.DTDHandler;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.LexicalHandler;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the SAX2
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
+ * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
+ * XML violation policy. To make the parser support non-conforming HTML fully
+ * per the HTML 5 spec while on the other hand potentially violating the SAX2
+ * API contract, set the general XML violation policy to <code>ALLOW</code>.
+ * It is possible to treat XML 1.0 infoset violations as fatal by setting
+ * the general XML violation policy to <code>FATAL</code>.
+ *
+ * <p>By default, this parser doesn't do true streaming but buffers everything
+ * first. The parser can be made truly streaming by calling
+ * <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This
+ * has the consequence that errors that require non-streamable recovery are
+ * treated as fatal.
+ *
+ * <p>By default, in order to make the parse events emulate the parse events
+ * for a DTDless XML document, the parser does not report the doctype through
+ * <code>LexicalHandler</code>. Doctype reporting through
+ * <code>LexicalHandler</code> can be turned on by calling
+ * <code>setReportingDoctype(true)</code>.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class HtmlParser implements XMLReader {
+
+ private Driver driver = null;
+
+ private TreeBuilder<?> treeBuilder = null;
+
+ private SAXStreamer saxStreamer = null; // work around javac bug
+
+ private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug
+
+ private ContentHandler contentHandler = null;
+
+ private LexicalHandler lexicalHandler = null;
+
+ private DTDHandler dtdHandler = null;
+
+ private EntityResolver entityResolver = null;
+
+ private ErrorHandler errorHandler = null;
+
+ private DocumentModeHandler documentModeHandler = null;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private boolean checkingNormalization = false;
+
+ private boolean scriptingEnabled = false;
+
+ private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
+
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata = false;
+
+ private boolean mappingLangToXmlLang = false;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
+
+ private boolean reportingDoctype = true;
+
+ private ErrorHandler treeBuilderErrorHandler = null;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private HashMap<String, String> errorProfileMap = null;
+
+ private TransitionHandler transitionHandler = null;
+
+ /**
+ * Instantiates the parser with a fatal XML violation policy.
+ *
+ */
+ public HtmlParser() {
+ this(XmlViolationPolicy.FATAL);
+ }
+
+ /**
+ * Instantiates the parser with a specific XML violation policy.
+ * @param xmlPolicy the policy
+ */
+ public HtmlParser(XmlViolationPolicy xmlPolicy) {
+ setXmlPolicy(xmlPolicy);
+ }
+
+ private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
+ if (errorHandler == null && transitionHandler == null &&
+ contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
+ return new Tokenizer(handler, newAttributesEachTime);
+ }
+ ErrorReportingTokenizer tokenizer =
+ new ErrorReportingTokenizer(handler, newAttributesEachTime);
+ tokenizer.setErrorProfile(errorProfileMap);
+ return tokenizer;
+ }
+
+ /**
+ * This class wraps different tree builders depending on configuration. This
+ * method does the work of hiding this from the user of the class.
+ */
+ private void lazyInit() {
+ if (driver == null) {
+ if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) {
+ this.saxTreeBuilder = new SAXTreeBuilder();
+ this.treeBuilder = this.saxTreeBuilder;
+ this.saxStreamer = null;
+ this.driver = new Driver(newTokenizer(treeBuilder, true));
+ } else {
+ this.saxStreamer = new SAXStreamer();
+ this.treeBuilder = this.saxStreamer;
+ this.saxTreeBuilder = null;
+ this.driver = new Driver(newTokenizer(treeBuilder, false));
+ }
+ this.driver.setErrorHandler(errorHandler);
+ this.driver.setTransitionHandler(transitionHandler);
+ this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
+ this.driver.setCheckingNormalization(checkingNormalization);
+ this.driver.setCommentPolicy(commentPolicy);
+ this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ this.driver.setContentSpacePolicy(contentSpacePolicy);
+ this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ this.driver.setXmlnsPolicy(xmlnsPolicy);
+ this.driver.setHeuristics(heuristics);
+ for (CharacterHandler characterHandler : characterHandlers) {
+ this.driver.addCharacterHandler(characterHandler);
+ }
+ this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ this.treeBuilder.setDocumentModeHandler(documentModeHandler);
+ this.treeBuilder.setIgnoringComments(lexicalHandler == null);
+ this.treeBuilder.setScriptingEnabled(scriptingEnabled);
+ this.treeBuilder.setReportingDoctype(reportingDoctype);
+ this.treeBuilder.setNamePolicy(namePolicy);
+ if (saxStreamer != null) {
+ saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
+ : contentHandler);
+ saxStreamer.setLexicalHandler(lexicalHandler);
+ driver.setAllowRewinding(false);
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getContentHandler()
+ */
+ public ContentHandler getContentHandler() {
+ return contentHandler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getDTDHandler()
+ */
+ public DTDHandler getDTDHandler() {
+ return dtdHandler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getEntityResolver()
+ */
+ public EntityResolver getEntityResolver() {
+ return entityResolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getErrorHandler()
+ */
+ public ErrorHandler getErrorHandler() {
+ return errorHandler;
+ }
+
+ /**
+ * Exposes the configuration of the emulated XML parser as well as
+ * boolean-valued configuration without using non-<code>XMLReader</code>
+ * getters directly.
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/features/external-general-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/is-standalone</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/namespaces</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/string-interning</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * <dd><code>isCheckingNormalization</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-attributes2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-locator2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/validation</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/xml-1.1</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
+ * <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
+ * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
+ * <dd><code>isMappingLangToXmlLang</code></dd>
+ * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
+ * <dd><code>isScriptingEnabled</code></dd>
+ * </dl>
+ *
+ * @param name
+ * feature URI string
+ * @return a value per the list above
+ * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
+ */
+ public boolean getFeature(String name) throws SAXNotRecognizedException,
+ SAXNotSupportedException {
+ if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
+ return true; // default value--applicable scenario never happens
+ } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
+ return isCheckingNormalization(); // the checks aren't really per
+ // XML 1.1
+ } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/validation".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
+ return false;
+ } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
+ return isHtml4ModeCompatibleWithXhtml1Schemata();
+ } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
+ return isMappingLangToXmlLang();
+ } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
+ return isScriptingEnabled();
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Allows <code>XMLReader</code>-level access to non-boolean valued
+ * getters.
+ *
+ * <p>
+ * The properties are mapped as follows:
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt>
+ * <dd><code>"1.0"</code></dd>
+ * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
+ * <dd><code>getLexicalHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
+ * <dd><code>getContentSpacePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
+ * <dd><code>getContentNonXmlCharPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
+ * <dd><code>getCommentPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
+ * <dd><code>getXmlnsPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/name-policy</code></dt>
+ * <dd><code>getNamePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
+ * <dd><code>getStreamabilityViolationPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
+ * <dd><code>getDocumentModeHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
+ * <dd><code>getDoctypeExpectation</code></dd>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * </dl>
+ *
+ * @param name
+ * property URI string
+ * @return a value per the list above
+ * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
+ */
+ public Object getProperty(String name) throws SAXNotRecognizedException,
+ SAXNotSupportedException {
+ if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not suppert DeclHandler.");
+ } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
+ return "1.0"; // Emulating an XML 1.1 parser is not supported.
+ } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not walk the DOM.");
+ } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
+ return getLexicalHandler();
+ } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not expose the source as a string.");
+ } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
+ return getContentSpacePolicy();
+ } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
+ return getContentNonXmlCharPolicy();
+ } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
+ return getCommentPolicy();
+ } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
+ return getXmlnsPolicy();
+ } else if ("http://validator.nu/properties/name-policy".equals(name)) {
+ return getNamePolicy();
+ } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
+ return getStreamabilityViolationPolicy();
+ } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
+ return getDocumentModeHandler();
+ } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
+ return getDoctypeExpectation();
+ } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
+ throw new SAXNotSupportedException(
+ "Cannot get a convenience setter.");
+ } else if ("http://validator.nu/properties/heuristics".equals(name)) {
+ return getHeuristics();
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
+ */
+ public void parse(InputSource input) throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(null);
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ Document document = saxTreeBuilder.getDocument();
+ if (document != null) {
+ new TreeParser(contentHandler, lexicalHandler).parse(document);
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses a fragment with HTML context.
+ *
+ * @param input the input to parse
+ * @param context the name of the context element (HTML namespace assumed)
+ * @throws IOException
+ * @throws SAXException
+ */
+ public void parseFragment(InputSource input, String context)
+ throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(context.intern());
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
+ new TreeParser(contentHandler, lexicalHandler).parse(fragment);
+ }
+ }
+ }
+
+ /**
+ * Parses a fragment.
+ *
+ * @param input the input to parse
+ * @param contextLocal the local name of the context element
+ * @param contextNamespace the namespace of the context element
+ * @throws IOException
+ * @throws SAXException
+ */
+ public void parseFragment(InputSource input, String contextLocal, String contextNamespace)
+ throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false);
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
+ new TreeParser(contentHandler, lexicalHandler).parse(fragment);
+ }
+ }
+ }
+
+ /**
+ * @param is
+ * @throws SAXException
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException {
+ if (is == null) {
+ throw new IllegalArgumentException("Null input.");
+ }
+ if (is.getByteStream() == null && is.getCharacterStream() == null) {
+ String systemId = is.getSystemId();
+ if (systemId == null) {
+ throw new IllegalArgumentException("No byte stream, no character stream nor URI.");
+ }
+ if (entityResolver != null) {
+ is = entityResolver.resolveEntity(is.getPublicId(), systemId);
+ }
+ if (is.getByteStream() == null || is.getCharacterStream() == null) {
+ is = new InputSource();
+ is.setSystemId(systemId);
+ is.setByteStream(new URL(systemId).openStream());
+ }
+ }
+ driver.tokenize(is);
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#parse(java.lang.String)
+ */
+ public void parse(String systemId) throws IOException, SAXException {
+ parse(new InputSource(systemId));
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
+ */
+ public void setContentHandler(ContentHandler handler) {
+ contentHandler = handler;
+ if (saxStreamer != null) {
+ saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
+ : contentHandler);
+ }
+ }
+
+ /**
+ * Sets the lexical handler.
+ * @param handler the hander.
+ */
+ public void setLexicalHandler(LexicalHandler handler) {
+ lexicalHandler = handler;
+ if (treeBuilder != null) {
+ treeBuilder.setIgnoringComments(handler == null);
+ if (saxStreamer != null) {
+ saxStreamer.setLexicalHandler(handler);
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
+ */
+ public void setDTDHandler(DTDHandler handler) {
+ dtdHandler = handler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
+ */
+ public void setEntityResolver(EntityResolver resolver) {
+ entityResolver = resolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler handler) {
+ errorHandler = handler;
+ treeBuilderErrorHandler = handler;
+ driver = null;
+ }
+
+ public void setTransitionHandler(TransitionHandler handler) {
+ transitionHandler = handler;
+ driver = null;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ * @deprecated For Validator.nu internal use
+ */
+ public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) {
+ treeBuilderErrorHandler = handler;
+ if (driver != null) {
+ treeBuilder.setErrorHandler(handler);
+ }
+ }
+
+ /**
+ * Sets a boolean feature without having to use non-<code>XMLReader</code>
+ * setters directly.
+ *
+ * <p>
+ * The supported features are:
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * <dd><code>setCheckingNormalization</code></dd>
+ * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
+ * <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
+ * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
+ * <dd><code>setMappingLangToXmlLang</code></dd>
+ * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
+ * <dd><code>setScriptingEnabled</code></dd>
+ * </dl>
+ *
+ * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
+ */
+ public void setFeature(String name, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
+ if (!value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
+ setCheckingNormalization(value);
+ } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/validation".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
+ if (value) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ }
+ } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
+ setHtml4ModeCompatibleWithXhtml1Schemata(value);
+ } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
+ setMappingLangToXmlLang(value);
+ } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
+ setScriptingEnabled(value);
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Sets a non-boolean property without having to use non-<code>XMLReader</code>
+ * setters directly.
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
+ * <dd><code>setLexicalHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
+ * <dd><code>setContentSpacePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
+ * <dd><code>setContentNonXmlCharPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
+ * <dd><code>setCommentPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
+ * <dd><code>setXmlnsPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/name-policy</code></dt>
+ * <dd><code>setNamePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
+ * <dd><code>setStreamabilityViolationPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
+ * <dd><code>setDocumentModeHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
+ * <dd><code>setDoctypeExpectation</code></dd>
+ * <dt><code>http://validator.nu/properties/xml-policy</code></dt>
+ * <dd><code>setXmlPolicy</code></dd>
+ * </dl>
+ *
+ * @see org.xml.sax.XMLReader#setProperty(java.lang.String,
+ * java.lang.Object)
+ */
+ public void setProperty(String name, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not suppert DeclHandler.");
+ } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
+ throw new SAXNotSupportedException(
+ "Can't set document-xml-version.");
+ } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
+ throw new SAXNotSupportedException("Can't set dom-node.");
+ } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
+ setLexicalHandler((LexicalHandler) value);
+ } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
+ throw new SAXNotSupportedException("Can't set xml-string.");
+ } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
+ setContentSpacePolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
+ setContentNonXmlCharPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
+ setCommentPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
+ setXmlnsPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/name-policy".equals(name)) {
+ setNamePolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
+ setStreamabilityViolationPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
+ setDocumentModeHandler((DocumentModeHandler) value);
+ } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
+ setDoctypeExpectation((DoctypeExpectation) value);
+ } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
+ setXmlPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/heuristics".equals(name)) {
+ setHeuristics((Heuristics) value);
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Indicates whether NFC normalization of source is being checked.
+ * @return <code>true</code> if NFC normalization of source is being checked.
+ * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
+ */
+ public boolean isCheckingNormalization() {
+ return checkingNormalization;
+ }
+
+ /**
+ * Toggles the checking of the NFC normalization of source.
+ * @param enable <code>true</code> to check normalization
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
+ */
+ public void setCheckingNormalization(boolean enable) {
+ this.checkingNormalization = enable;
+ if (driver != null) {
+ driver.setCheckingNormalization(checkingNormalization);
+ }
+ }
+
+ /**
+ * Sets the policy for consecutive hyphens in comments.
+ * @param commentPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ if (driver != null) {
+ driver.setCommentPolicy(commentPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML characters except white space.
+ * @param contentNonXmlCharPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ driver = null;
+ }
+
+ /**
+ * Sets the policy for non-XML white space.
+ * @param contentSpacePolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ if (driver != null) {
+ driver.setContentSpacePolicy(contentSpacePolicy);
+ }
+ }
+
+ /**
+ * Whether the parser considers scripting to be enabled for noscript treatment.
+ *
+ * @return <code>true</code> if enabled
+ * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ if (treeBuilder != null) {
+ treeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+ }
+
+ /**
+ * Returns the doctype expectation.
+ *
+ * @return the doctypeExpectation
+ */
+ public DoctypeExpectation getDoctypeExpectation() {
+ return doctypeExpectation;
+ }
+
+ /**
+ * Sets the doctype expectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ if (treeBuilder != null) {
+ treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ }
+ }
+
+ /**
+ * Returns the document mode handler.
+ *
+ * @return the documentModeHandler
+ */
+ public DocumentModeHandler getDocumentModeHandler() {
+ return documentModeHandler;
+ }
+
+ /**
+ * Sets the document mode handler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Returns the streamabilityViolationPolicy.
+ *
+ * @return the streamabilityViolationPolicy
+ */
+ public XmlViolationPolicy getStreamabilityViolationPolicy() {
+ return streamabilityViolationPolicy;
+ }
+
+ /**
+ * Sets the streamabilityViolationPolicy.
+ *
+ * @param streamabilityViolationPolicy
+ * the streamabilityViolationPolicy to set
+ */
+ public void setStreamabilityViolationPolicy(
+ XmlViolationPolicy streamabilityViolationPolicy) {
+ this.streamabilityViolationPolicy = streamabilityViolationPolicy;
+ driver = null;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ if (driver != null) {
+ driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+ }
+
+ /**
+ * Returns the <code>Locator</code> during parse.
+ * @return the <code>Locator</code>
+ */
+ public Locator getDocumentLocator() {
+ return driver.getDocumentLocator();
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ *
+ * @return the html4ModeCompatibleWithXhtml1Schemata
+ */
+ public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
+ return html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang;
+ if (driver != null) {
+ driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang;
+ }
+
+ /**
+ * Whether the <code>xmlns</code> attribute on the root element is
+ * passed to through. (FATAL not allowed.)
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ if (driver != null) {
+ driver.setXmlnsPolicy(xmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the xmlnsPolicy.
+ *
+ * @return the xmlnsPolicy
+ */
+ public XmlViolationPolicy getXmlnsPolicy() {
+ return xmlnsPolicy;
+ }
+
+ /**
+ * Returns the lexicalHandler.
+ *
+ * @return the lexicalHandler
+ */
+ public LexicalHandler getLexicalHandler() {
+ return lexicalHandler;
+ }
+
+ /**
+ * Returns the commentPolicy.
+ *
+ * @return the commentPolicy
+ */
+ public XmlViolationPolicy getCommentPolicy() {
+ return commentPolicy;
+ }
+
+ /**
+ * Returns the contentNonXmlCharPolicy.
+ *
+ * @return the contentNonXmlCharPolicy
+ */
+ public XmlViolationPolicy getContentNonXmlCharPolicy() {
+ return contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Returns the contentSpacePolicy.
+ *
+ * @return the contentSpacePolicy
+ */
+ public XmlViolationPolicy getContentSpacePolicy() {
+ return contentSpacePolicy;
+ }
+
+ /**
+ * @param reportingDoctype
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ if (treeBuilder != null) {
+ treeBuilder.setReportingDoctype(reportingDoctype);
+ }
+ }
+
+ /**
+ * Returns the reportingDoctype.
+ *
+ * @return the reportingDoctype
+ */
+ public boolean isReportingDoctype() {
+ return reportingDoctype;
+ }
+
+ /**
+ * @param errorProfile
+ * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set)
+ */
+ public void setErrorProfile(HashMap<String, String> errorProfileMap) {
+ this.errorProfileMap = errorProfileMap;
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ if (driver != null) {
+ driver.setNamePolicy(namePolicy);
+ treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics the heuristics to set
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ if (driver != null) {
+ driver.setHeuristics(heuristics);
+ }
+ }
+
+ public Heuristics getHeuristics() {
+ return this.heuristics;
+ }
+
+ /**
+ * This is a catch-all convenience method for setting name, xmlns, content space,
+ * content non-XML char and comment policies in one go. This does not affect the
+ * streamability policy or doctype reporting.
+ *
+ * @param xmlPolicy
+ */
+ public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
+ setNamePolicy(xmlPolicy);
+ setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
+ setContentSpacePolicy(xmlPolicy);
+ setContentNonXmlCharPolicy(xmlPolicy);
+ setCommentPolicy(xmlPolicy);
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ *
+ * @return the namePolicy
+ */
+ public XmlViolationPolicy getNamePolicy() {
+ return namePolicy;
+ }
+
+ /**
+ * Does nothing.
+ * @deprecated
+ */
+ public void setBogusXmlnsPolicy(
+ XmlViolationPolicy bogusXmlnsPolicy) {
+ }
+
+ /**
+ * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
+ * @deprecated
+ * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
+ */
+ public XmlViolationPolicy getBogusXmlnsPolicy() {
+ return XmlViolationPolicy.ALTER_INFOSET;
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ this.characterHandlers.add(characterHandler);
+ if (driver != null) {
+ driver.addCharacterHandler(characterHandler);
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java
new file mode 100644
index 000000000..3312398d5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.util.Arrays;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class HtmlSerializer implements ContentHandler, LexicalHandler {
+
+ private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
+ "bgsound", "br", "col", "command", "embed", "frame", "hr", "img",
+ "input", "keygen", "link", "meta", "param", "source", "track",
+ "wbr" };
+
+ private static final String[] NON_ESCAPING = { "iframe", "noembed",
+ "noframes", "noscript", "plaintext", "script", "style", "xmp" };
+
+ private static Writer wrap(OutputStream out) {
+ try {
+ return new OutputStreamWriter(out, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private int ignoreLevel = 0;
+
+ private int escapeLevel = 0;
+
+ private final Writer writer;
+
+ public HtmlSerializer(OutputStream out) {
+ this(wrap(out));
+ }
+
+ public HtmlSerializer(Writer out) {
+ this.writer = out;
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ if (escapeLevel > 0) {
+ writer.write(ch, start, length);
+ } else {
+ for (int i = start; i < start + length; i++) {
+ char c = ch[i];
+ switch (c) {
+ case '<':
+ writer.write("&lt;");
+ break;
+ case '>':
+ writer.write("&gt;");
+ break;
+ case '&':
+ writer.write("&amp;");
+ break;
+ case '\u00A0':
+ writer.write("&nbsp;");
+ break;
+ default:
+ writer.write(c);
+ break;
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endDocument() throws SAXException {
+ try {
+ writer.flush();
+ writer.close();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ if (escapeLevel > 0) {
+ escapeLevel--;
+ }
+ if (ignoreLevel > 0) {
+ ignoreLevel--;
+ } else {
+ try {
+ writer.write('<');
+ writer.write('/');
+ writer.write(localName);
+ writer.write('>');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException {
+ }
+
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ public void startDocument() throws SAXException {
+ try {
+ writer.write("<!DOCTYPE html>\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startElement(String uri, String localName, String qName,
+ Attributes atts) throws SAXException {
+ if (escapeLevel > 0) {
+ escapeLevel++;
+ }
+ boolean xhtml = "http://www.w3.org/1999/xhtml".equals(uri);
+ if (ignoreLevel > 0
+ || !(xhtml || "http://www.w3.org/2000/svg".equals(uri) || "http://www.w3.org/1998/Math/MathML".equals(uri))) {
+ ignoreLevel++;
+ return;
+ }
+ try {
+ writer.write('<');
+ writer.write(localName);
+ for (int i = 0; i < atts.getLength(); i++) {
+ String attUri = atts.getURI(i);
+ String attLocal = atts.getLocalName(i);
+ if (attUri.length() == 0) {
+ writer.write(' ');
+ } else if (!xhtml
+ && "http://www.w3.org/1999/xlink".equals(attUri)) {
+ writer.write(" xlink:");
+ } else if ("http://www.w3.org/XML/1998/namespace".equals(attUri)) {
+ if (xhtml) {
+ if ("lang".equals(attLocal)) {
+ writer.write(' ');
+ } else {
+ continue;
+ }
+ } else {
+ writer.write(" xml:");
+ }
+ } else {
+ continue;
+ }
+ writer.write(atts.getLocalName(i));
+ writer.write('=');
+ writer.write('"');
+ String val = atts.getValue(i);
+ for (int j = 0; j < val.length(); j++) {
+ char c = val.charAt(j);
+ switch (c) {
+ case '"':
+ writer.write("&quot;");
+ break;
+ case '&':
+ writer.write("&amp;");
+ break;
+ case '\u00A0':
+ writer.write("&nbsp;");
+ break;
+ default:
+ writer.write(c);
+ break;
+ }
+ }
+ writer.write('"');
+ }
+ writer.write('>');
+ if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
+ ignoreLevel++;
+ return;
+ }
+ if ("pre".equals(localName) || "textarea".equals(localName)
+ || "listing".equals(localName)) {
+ writer.write('\n');
+ }
+ if (escapeLevel == 0
+ && Arrays.binarySearch(NON_ESCAPING, localName) > -1) {
+ escapeLevel = 1;
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ if (ignoreLevel > 0 || escapeLevel > 0) {
+ return;
+ }
+ try {
+ writer.write("<!--");
+ writer.write(ch, start, length);
+ writer.write("-->");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endCDATA() throws SAXException {
+ }
+
+ public void endDTD() throws SAXException {
+ }
+
+ public void endEntity(String name) throws SAXException {
+ }
+
+ public void startCDATA() throws SAXException {
+ }
+
+ public void startDTD(String name, String publicId, String systemId)
+ throws SAXException {
+ }
+
+ public void startEntity(String name) throws SAXException {
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java
new file mode 100644
index 000000000..33e98dbe8
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+
+/**
+ * This subclass of <code>HtmlParser</code> simply provides a no-argument
+ * constructor that calls the constructor of the superclass with the
+ * <code>ALTER_INFOSET</code> policy. This is convenient when another Java
+ * component wants an implementation of <code>XMLReader</code> with a
+ * no-argument constructor and infoset coercion is the wanted behavior.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class InfosetCoercingHtmlParser extends HtmlParser {
+
+ /**
+ * A constructor that passes <code>ALTER_INFOSET</code> to the superclass'
+ * constructor.
+ */
+ public InfosetCoercingHtmlParser() {
+ super(XmlViolationPolicy.ALTER_INFOSET);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java
new file mode 100644
index 000000000..b6cb2f872
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.OutputStream;
+import java.io.Writer;
+
+import nu.validator.htmlparser.impl.NCName;
+
+import org.xml.sax.SAXException;
+
+public class NameCheckingXmlSerializer extends XmlSerializer {
+
+ public NameCheckingXmlSerializer(OutputStream out) {
+ super(out);
+ }
+
+ public NameCheckingXmlSerializer(Writer out) {
+ super(out);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName()
+ */
+ @Override protected void checkNCName(String name) throws SAXException {
+ if (!NCName.isNCName(name)) {
+ throw new SAXException("Not an XML 1.0 4th ed. NCName: " + name);
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java
new file mode 100644
index 000000000..07ff5da4a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.TreeBuilder;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.ext.LexicalHandler;
+
+class SAXStreamer extends TreeBuilder<Attributes>{
+
+ private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray();
+
+ private ContentHandler contentHandler = null;
+ private LexicalHandler lexicalHandler = null;
+
+ SAXStreamer() {
+ super();
+ }
+
+ @Override
+ protected void addAttributesToElement(Attributes element, HtmlAttributes attributes) throws SAXException {
+ Attributes existingAttrs = element;
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String qName = attributes.getQNameNoBoundsCheck(i);
+ if (existingAttrs.getIndex(qName) < 0) {
+ fatal();
+ }
+ }
+ }
+
+ @Override
+ protected void appendCharacters(Attributes parent, char[] buf, int start, int length) throws SAXException {
+ contentHandler.characters(buf, start, length);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object)
+ */
+ @Override protected void appendIsindexPrompt(Attributes parent)
+ throws SAXException {
+ contentHandler.characters(ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length);
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Attributes oldParent, Attributes newParent) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected void appendComment(Attributes parent, char[] buf, int start, int length) throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.comment(buf, start, length);
+ }
+ }
+
+ @Override
+ protected void appendCommentToDocument(char[] buf, int start, int length)
+ throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.comment(buf, start, length);
+ }
+ }
+
+ @Override
+ protected Attributes createElement(String ns, String name, HtmlAttributes attributes, Attributes intendedParent) throws SAXException {
+ return attributes;
+ }
+
+ @Override
+ protected Attributes createHtmlElementSetAsRoot(HtmlAttributes attributes) throws SAXException {
+ return attributes;
+ }
+
+ @Override
+ protected void detachFromParent(Attributes element) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected void appendElement(Attributes child, Attributes newParent) throws SAXException {
+ }
+
+ @Override
+ protected boolean hasChildren(Attributes element) throws SAXException {
+ return false;
+ }
+
+ public void setContentHandler(ContentHandler handler) {
+ contentHandler = handler;
+ }
+
+ public void setLexicalHandler(LexicalHandler handler) {
+ lexicalHandler = handler;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String)
+ */
+ @Override
+ protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.startDTD(name, publicIdentifier, systemIdentifier);
+ lexicalHandler.endDTD();
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(String, java.lang.String, java.lang.Object)
+ */
+ @Override
+ protected void elementPopped(String ns, String name, Attributes node) throws SAXException {
+ contentHandler.endElement(ns, name, name);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#elementPushed(String, java.lang.String, java.lang.Object)
+ */
+ @Override
+ protected void elementPushed(String ns, String name, Attributes node) throws SAXException {
+ contentHandler.startElement(ns, name, name, node);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override
+ protected void end() throws SAXException {
+ contentHandler.endDocument();
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) throws SAXException {
+ contentHandler.setDocumentLocator(tokenizer);
+ if (!fragment) {
+ contentHandler.startDocument();
+ }
+ }
+
+ protected void fatal() throws SAXException {
+ SAXParseException spe = new SAXParseException(
+ "Cannot recover after last error. Any further errors will be ignored.",
+ tokenizer);
+ if (errorHandler != null) {
+ errorHandler.fatalError(spe);
+ }
+ throw spe;
+ }
+
+ @Override
+ protected Attributes createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Attributes table, Attributes stackParent) throws SAXException {
+ fatal();
+ throw new RuntimeException("Unreachable");
+ }
+
+ @Override protected void insertFosterParentedCharacters(char[] buf,
+ int start, int length, Attributes table, Attributes stackParent)
+ throws SAXException {
+ fatal();
+ }
+
+ @Override protected void insertFosterParentedChild(Attributes child,
+ Attributes table, Attributes stackParent) throws SAXException {
+ fatal();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java
new file mode 100644
index 000000000..ef51d2a51
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.TreeBuilder;
+import nu.validator.saxtree.Characters;
+import nu.validator.saxtree.Comment;
+import nu.validator.saxtree.DTD;
+import nu.validator.saxtree.Document;
+import nu.validator.saxtree.DocumentFragment;
+import nu.validator.saxtree.Element;
+import nu.validator.saxtree.Node;
+import nu.validator.saxtree.ParentNode;
+
+import org.xml.sax.SAXException;
+
+class SAXTreeBuilder extends TreeBuilder<Element> {
+
+ private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray();
+
+ private Document document;
+
+ private Node cachedTable = null;
+
+ private Node cachedTablePreviousSibling = null;
+
+ SAXTreeBuilder() {
+ super();
+ }
+
+ @Override
+ protected void appendComment(Element parent, char[] buf, int start, int length) {
+ parent.appendChild(new Comment(tokenizer, buf, start, length));
+ }
+
+ @Override
+ protected void appendCommentToDocument(char[] buf, int start, int length) {
+ document.appendChild(new Comment(tokenizer, buf, start, length));
+ }
+
+ @Override
+ protected void appendCharacters(Element parent, char[] buf, int start, int length) {
+ parent.appendChild(new Characters(tokenizer, buf, start, length));
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object)
+ */
+ @Override protected void appendIsindexPrompt(Element parent)
+ throws SAXException {
+ parent.appendChild(new Characters(tokenizer, ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length));
+ }
+
+ @Override
+ protected boolean hasChildren(Element element) {
+ return element.getFirstChild() != null;
+ }
+
+ @Override
+ protected void appendElement(Element child, Element newParent) {
+ newParent.appendChild(child);
+ }
+
+ @Override
+ protected Element createHtmlElementSetAsRoot(HtmlAttributes attributes) {
+ Element newElt = new Element(tokenizer, "http://www.w3.org/1999/xhtml", "html", "html", attributes, true, null);
+ document.appendChild(newElt);
+ return newElt;
+ }
+
+ @Override
+ protected void addAttributesToElement(Element element, HtmlAttributes attributes) throws SAXException {
+ HtmlAttributes existingAttrs = (HtmlAttributes) element.getAttributes();
+ existingAttrs.merge(attributes);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String)
+ */
+ @Override
+ protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) {
+ DTD dtd = new DTD(tokenizer, name, publicIdentifier, systemIdentifier);
+ dtd.setEndLocator(tokenizer);
+ document.appendChild(dtd);
+ }
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ Document getDocument() {
+ Document rv = document;
+ document = null;
+ return rv;
+ }
+
+ DocumentFragment getDocumentFragment() {
+ DocumentFragment rv = new DocumentFragment();
+ rv.appendChildren(document.getFirstChild());
+ document = null;
+ return rv;
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override
+ protected void end() throws SAXException {
+ document.setEndLocator(tokenizer);
+ cachedTable = null;
+ cachedTablePreviousSibling = null;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) {
+ document = new Document(tokenizer);
+ cachedTable = null;
+ cachedTablePreviousSibling = null;
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Element oldParent, Element newParent) throws SAXException {
+ newParent.appendChildren(oldParent);
+ }
+
+ @Override
+ protected Element createElement(String ns, String name, HtmlAttributes attributes,
+ Element intendedParent) throws SAXException {
+ return new Element(tokenizer, ns, name, name, attributes, true, null);
+ }
+
+ @Override
+ protected Element createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Element table, Element stackParent) throws SAXException {
+ ParentNode parent = table.getParentNode();
+ Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent);
+ if (parent != null) { // always an element if not null
+ parent.insertBetween(child, previousSibling(table), table);
+ cachedTablePreviousSibling = child;
+ } else {
+ stackParent.appendChild(child);
+ }
+
+ return child;
+ }
+
+ @Override protected void insertFosterParentedCharacters(char[] buf,
+ int start, int length, Element table, Element stackParent) throws SAXException {
+ Node child = new Characters(tokenizer, buf, start, length);
+ ParentNode parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ parent.insertBetween(child, previousSibling(table), table);
+ cachedTablePreviousSibling = child;
+ } else {
+ stackParent.appendChild(child);
+ }
+ }
+
+ @Override protected void insertFosterParentedChild(Element child,
+ Element table, Element stackParent) throws SAXException {
+ ParentNode parent = table.getParentNode();
+ if (parent != null) { // always an element if not null
+ parent.insertBetween(child, previousSibling(table), table);
+ cachedTablePreviousSibling = child;
+ } else {
+ stackParent.appendChild(child);
+ }
+ }
+
+ private Node previousSibling(Node table) {
+ if (table == cachedTable) {
+ return cachedTablePreviousSibling;
+ } else {
+ cachedTable = table;
+ return (cachedTablePreviousSibling = table.getPreviousSibling());
+ }
+ }
+
+ @Override protected void detachFromParent(Element element)
+ throws SAXException {
+ element.detach();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java
new file mode 100644
index 000000000..5dccf5d3a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Set;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class XmlSerializer implements ContentHandler, LexicalHandler {
+
+ private final class PrefixMapping {
+ public final String uri;
+
+ public final String prefix;
+
+ /**
+ * @param uri
+ * @param prefix
+ */
+ public PrefixMapping(String uri, String prefix) {
+ this.uri = uri;
+ this.prefix = prefix;
+ }
+
+ /**
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override public final boolean equals(Object obj) {
+ if (obj instanceof PrefixMapping) {
+ PrefixMapping other = (PrefixMapping) obj;
+ return this.prefix.equals(other.prefix);
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * @see java.lang.Object#hashCode()
+ */
+ @Override public final int hashCode() {
+ return prefix.hashCode();
+ }
+
+ }
+
+ private final class StackNode {
+ public final String uri;
+
+ public final String prefix;
+
+ public final String qName;
+
+ public final Set<PrefixMapping> mappings = new HashSet<PrefixMapping>();
+
+ /**
+ * @param uri
+ * @param qName
+ */
+ public StackNode(String uri, String qName, String prefix) {
+ this.uri = uri;
+ this.qName = qName;
+ this.prefix = prefix;
+ }
+ }
+
+ private final static Map<String, String> WELL_KNOWN_ATTRIBUTE_PREFIXES = new HashMap<String, String>();
+
+ static {
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("adobe:ns:meta/", "x");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/AdobeIllustrator/10.0/", "i");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/Extensibility/1.0/", "x");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/illustrator/1.0/", "illustrator");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/",
+ "photoshop");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/tiff/1.0/",
+ "tiff");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/",
+ "xapG");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/",
+ "xapMM");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/rights/", "xapRights");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/",
+ "xapTPg");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://purl.org/dc/elements/1.1/",
+ "dc");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://w3.org/1999/xlink", "xlink");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.carto.net/attrib/",
+ "attrib");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.iki.fi/pav/software/textext/", "textext");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.inkscape.org/namespaces/inkscape", "inkscape");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.justsystem.co.jp/hanako13/svg", "jsh");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink",
+ "xlink");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
+ "http://www.w3.org/2001/XMLSchema-instance", "xsi");
+ WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink",
+ "xlink");
+ }
+
+ private final static Map<String, String> WELL_KNOWN_ELEMENT_PREFIXES = new HashMap<String, String>();
+
+ static {
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.w3.org/1999/XSL/Transform",
+ "xsl");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.org/dc/elements/1.1/",
+ "dc");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.w3.org/2001/XMLSchema-instance", "xsi");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.ascc.net/xml/schematron",
+ "sch");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.oclc.org/dsdl/schematron",
+ "sch");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.inkscape.org/namespaces/inkscape", "inkscape");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/AdobeIllustrator/10.0/", "i");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("adobe:ns:meta/", "x");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/tiff/1.0/", "tiff");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://creativecommons.org/ns#", "cc");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd",
+ "sodipodi");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/", "Iptc4xmpCore");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/exif/1.0/", "exif");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/Extensibility/1.0/", "x");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/illustrator/1.0/",
+ "illustrator");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdfx/1.3/", "pdfx");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/",
+ "photoshop");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/Variables/1.0/",
+ "v");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/",
+ "xapG");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/img/",
+ "xapGImg");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/",
+ "xapMM");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/rights/",
+ "xapRights");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/Font#", "stFnt");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/",
+ "xapTPg");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://product.corel.com/CGS/11/cddns/", "odm");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://web.resource.org/cc/", "cc");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.freesoftware.fsf.org/bkchem/cdml", "cdml");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.opengis.net/gml", "gml");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.svgmaker.com/svgns",
+ "svgmaker");
+ WELL_KNOWN_ELEMENT_PREFIXES.put(
+ "http://www.w3.org/2000/01/rdf-schema#", "rdfs");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://xmlns.com/foaf/0.1/", "foaf");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.xml-cml.org/schema/stmml",
+ "stm");
+ WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.iupac.org/foo/ichi", "ichi");
+ }
+
+ private final static Writer wrap(OutputStream out) {
+ Charset charset = Charset.forName("utf-8");
+ CharsetEncoder encoder = charset.newEncoder();
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ try {
+ encoder.replaceWith("\uFFFD".getBytes("utf-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ return new OutputStreamWriter(out, encoder);
+ }
+
+ // grows from head
+ private final LinkedList<StackNode> stack = new LinkedList<StackNode>();
+
+ private final Writer writer;
+
+ public XmlSerializer(OutputStream out) {
+ this(wrap(out));
+ }
+
+ public XmlSerializer(Writer out) {
+ this.writer = out;
+ }
+
+ protected void checkNCName(String name) throws SAXException {
+
+ }
+
+ private final void push(String uri, String local, String prefix) {
+ stack.addFirst(new StackNode(uri, local, prefix));
+ }
+
+ private final String pop() {
+ String rv = stack.removeFirst().qName;
+ stack.getFirst().mappings.clear();
+ return rv;
+ }
+
+ private final String lookupPrefixAttribute(String ns) {
+ if ("http://www.w3.org/XML/1998/namespace".equals(ns)) {
+ return "xml";
+ }
+ Set<String> hidden = new HashSet<String>();
+ for (StackNode node : stack) {
+ for (PrefixMapping mapping : node.mappings) {
+ if (mapping.prefix.length() != 0 && mapping.uri.equals(ns)
+ && !hidden.contains(mapping.prefix)) {
+ return mapping.prefix;
+ }
+ hidden.add(mapping.prefix);
+ }
+ }
+ return null;
+ }
+
+ private final String lookupUri(String prefix) {
+ for (StackNode node : stack) {
+ for (PrefixMapping mapping : node.mappings) {
+ if (mapping.prefix.equals(prefix)) {
+ return mapping.uri;
+ }
+ }
+ }
+ return null;
+ }
+
+ private final boolean xmlNsQname(String name) {
+ if (name == null) {
+ return false;
+ } else if ("xmlns".equals(name)) {
+ return true;
+ } else if (name.startsWith("xmlns:")) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private final void writeAttributeValue(String val) throws IOException {
+ boolean prevWasSpace = true;
+ int last = val.length() - 1;
+ for (int i = 0; i <= last; i++) {
+ char c = val.charAt(i);
+ switch (c) {
+ case '<':
+ writer.write("&lt;");
+ prevWasSpace = false;
+ break;
+ case '>':
+ writer.write("&gt;");
+ prevWasSpace = false;
+ break;
+ case '&':
+ writer.write("&amp;");
+ prevWasSpace = false;
+ break;
+ case '"':
+ writer.write("&quot;");
+ prevWasSpace = false;
+ break;
+ case '\r':
+ writer.write("&#xD;");
+ prevWasSpace = false;
+ break;
+ case '\t':
+ writer.write("&#x9;");
+ prevWasSpace = false;
+ break;
+ case '\n':
+ writer.write("&#xA;");
+ prevWasSpace = false;
+ break;
+ case ' ':
+ if (prevWasSpace || i == last) {
+ writer.write("&#x20;");
+ prevWasSpace = false;
+ } else {
+ writer.write(' ');
+ prevWasSpace = true;
+ }
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ prevWasSpace = false;
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ prevWasSpace = false;
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ prevWasSpace = false;
+ break;
+ }
+ }
+ }
+
+ private final void generatePrefix(String uri) throws SAXException {
+ int counter = 0;
+ String candidate = WELL_KNOWN_ATTRIBUTE_PREFIXES.get(uri);
+ if (candidate == null) {
+ candidate = "p" + (counter++);
+ }
+ while (lookupUri(candidate) != null) {
+ candidate = "p" + (counter++);
+ }
+ startPrefixMappingPrivate(candidate, uri);
+ }
+
+ public final void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ for (int i = start; i < start + length; i++) {
+ char c = ch[i];
+ switch (c) {
+ case '<':
+ writer.write("&lt;");
+ break;
+ case '>':
+ writer.write("&gt;");
+ break;
+ case '&':
+ writer.write("&amp;");
+ break;
+ case '\r':
+ writer.write("&#xD;");
+ break;
+ case '\t':
+ writer.write('\t');
+ break;
+ case '\n':
+ writer.write('\n');
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ break;
+ }
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void endDocument() throws SAXException {
+ try {
+ stack.clear();
+ writer.flush();
+ writer.close();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ try {
+ writer.write('<');
+ writer.write('/');
+ writer.write(pop());
+ writer.write('>');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+
+ public final void processingInstruction(String target, String data)
+ throws SAXException {
+ try {
+ checkNCName(target);
+ writer.write("<?");
+ writer.write(target);
+ writer.write(' ');
+ boolean prevWasQuestionmark = false;
+ for (int i = 0; i < data.length(); i++) {
+ char c = data.charAt(i);
+ switch (c) {
+ case '?':
+ writer.write('?');
+ prevWasQuestionmark = true;
+ break;
+ case '>':
+ if (prevWasQuestionmark) {
+ writer.write(" >");
+ } else {
+ writer.write('>');
+ }
+ prevWasQuestionmark = false;
+ break;
+ case '\t':
+ writer.write('\t');
+ prevWasQuestionmark = false;
+ break;
+ case '\r':
+ case '\n':
+ writer.write('\n');
+ prevWasQuestionmark = false;
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ prevWasQuestionmark = false;
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ prevWasQuestionmark = false;
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ prevWasQuestionmark = false;
+ break;
+ }
+ }
+ writer.write("?>");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void setDocumentLocator(Locator locator) {
+ }
+
+ public final void startDocument() throws SAXException {
+ try {
+ writer.write("<?xml version='1.0' encoding='utf-8'?>\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ stack.clear();
+ push(null, null, null);
+ }
+
+ public final void startElement(String uri, String localName, String q,
+ Attributes atts) throws SAXException {
+ checkNCName(localName);
+ String prefix;
+ String qName;
+ if (uri.length() == 0) {
+ prefix = "";
+ qName = localName;
+ // generate xmlns
+ startPrefixMappingPrivate(prefix, uri);
+ } else {
+ prefix = WELL_KNOWN_ELEMENT_PREFIXES.get(uri);
+ if (prefix == null) {
+ prefix = "";
+ }
+ String lookup = lookupUri(prefix);
+ if (lookup != null && !lookup.equals(uri)) {
+ prefix = "";
+ }
+ startPrefixMappingPrivate(prefix, uri);
+ if (prefix.length() == 0) {
+ qName = localName;
+ } else {
+ qName = prefix + ':' + localName;
+ }
+ }
+
+ int attLen = atts.getLength();
+ for (int i = 0; i < attLen; i++) {
+ String attUri = atts.getURI(i);
+ if (attUri.length() == 0
+ || "http://www.w3.org/XML/1998/namespace".equals(attUri)
+ || "http://www.w3.org/2000/xmlns/".equals(attUri)
+ || atts.getLocalName(i).length() == 0
+ || xmlNsQname(atts.getQName(i))) {
+ continue;
+ }
+ if (lookupPrefixAttribute(attUri) == null) {
+ generatePrefix(attUri);
+ }
+ }
+
+ try {
+ writer.write('<');
+ writer.write(qName);
+ for (PrefixMapping mapping : stack.getFirst().mappings) {
+ writer.write(' ');
+ if (mapping.prefix.length() == 0) {
+ writer.write("xmlns");
+ } else {
+ writer.write("xmlns:");
+ writer.write(mapping.prefix);
+ }
+ writer.write('=');
+ writer.write('"');
+ writeAttributeValue(mapping.uri);
+ writer.write('"');
+ }
+
+ for (int i = 0; i < attLen; i++) {
+ String attUri = atts.getURI(i);
+ if ("http://www.w3.org/XML/1998/namespace".equals(attUri)
+ || "http://www.w3.org/2000/xmlns/".equals(attUri)
+ || atts.getLocalName(i).length() == 0
+ || xmlNsQname(atts.getQName(i))) {
+ continue;
+ }
+ writer.write(' ');
+ if (attUri.length() != 0) {
+ writer.write(lookupPrefixAttribute(attUri));
+ writer.write(':');
+ }
+ String attLocal = atts.getLocalName(i);
+ checkNCName(attLocal);
+ writer.write(attLocal);
+ writer.write('=');
+ writer.write('"');
+ writeAttributeValue(atts.getValue(i));
+ writer.write('"');
+ }
+ writer.write('>');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ push(uri, qName, prefix);
+ }
+
+ public final void comment(char[] ch, int start, int length) throws SAXException {
+ try {
+ boolean prevWasHyphen = false;
+ writer.write("<!--");
+ for (int i = start; i < start + length; i++) {
+ char c = ch[i];
+ switch (c) {
+ case '-':
+ if (prevWasHyphen) {
+ writer.write(" -");
+ } else {
+ writer.write('-');
+ prevWasHyphen = true;
+ }
+ break;
+ case '\t':
+ writer.write('\t');
+ prevWasHyphen = false;
+ break;
+ case '\r':
+ case '\n':
+ writer.write('\n');
+ prevWasHyphen = false;
+ break;
+ case '\uFFFE':
+ writer.write('\uFFFD');
+ prevWasHyphen = false;
+ break;
+ case '\uFFFF':
+ writer.write('\uFFFD');
+ prevWasHyphen = false;
+ break;
+ default:
+ if (c < ' ') {
+ writer.write('\uFFFD');
+ } else {
+ writer.write(c);
+ }
+ prevWasHyphen = false;
+ break;
+ }
+ }
+ if (prevWasHyphen) {
+ writer.write(' ');
+ }
+ writer.write("-->");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public final void endCDATA() throws SAXException {
+ }
+
+ public final void endDTD() throws SAXException {
+ }
+
+ public final void endEntity(String name) throws SAXException {
+ }
+
+ public final void startCDATA() throws SAXException {
+ }
+
+ public final void startDTD(String name, String publicId, String systemId)
+ throws SAXException {
+ }
+
+ public final void startEntity(String name) throws SAXException {
+ }
+
+ public final void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ if (prefix.length() == 0 || uri.equals(lookupUri(prefix))) {
+ return;
+ }
+ if (uri.equals(lookupUri(prefix))) {
+ return;
+ }
+ if ("http://www.w3.org/XML/1998/namespace".equals(uri)) {
+ if ("xml".equals(prefix)) {
+ return;
+ } else {
+ throw new SAXException("Attempt to declare a reserved NS uri.");
+ }
+ }
+ if ("http://www.w3.org/2000/xmlns/".equals(uri)) {
+ throw new SAXException("Attempt to declare a reserved NS uri.");
+ }
+ if (uri.length() == 0 && prefix.length() != 0) {
+ throw new SAXException("Can bind a prefix to no namespace.");
+ }
+ checkNCName(prefix);
+ Set<PrefixMapping> theSet = stack.getFirst().mappings;
+ PrefixMapping mapping = new PrefixMapping(uri, prefix);
+ if (theSet.contains(mapping)) {
+ throw new SAXException(
+ "Attempt to map one prefix to two URIs on one element.");
+ }
+ theSet.add(mapping);
+ }
+
+ public final void startPrefixMappingPrivate(String prefix, String uri)
+ throws SAXException {
+ if (uri.equals(lookupUri(prefix))) {
+ return;
+ }
+ stack.getFirst().mappings.add(new PrefixMapping(uri, prefix));
+ }
+
+ public final void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public final void skippedEntity(String name) throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html
new file mode 100644
index 000000000..60532962f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides an HTML5 parser that exposes the document through the SAX API.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java
new file mode 100644
index 000000000..6dcff5600
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.xom.Element;
+
+/**
+ * Interface for elements that have an associated form pointer.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface FormPointer {
+
+ /**
+ * Returns the form.
+ *
+ * @return the form
+ */
+ public abstract Element getForm();
+
+ /**
+ * Sets the form.
+ *
+ * @param form the form to set
+ */
+ public abstract void setForm(Element form);
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java
new file mode 100644
index 000000000..2e2e18df7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.xom.Element;
+
+/**
+ * Element with an associated form.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class FormPtrElement extends Element implements FormPointer {
+
+ private Element form = null;
+
+ /**
+ * Copy constructor (<code>FormPointer</code>-aware).
+ * @param elt
+ */
+ public FormPtrElement(Element elt) {
+ super(elt);
+ if (elt instanceof FormPointer) {
+ FormPointer other = (FormPointer) elt;
+ this.setForm(other.getForm());
+ }
+ }
+
+ /**
+ * Null form.
+ *
+ * @param name
+ * @param uri
+ */
+ public FormPtrElement(String name, String uri) {
+ super(name, uri);
+ }
+
+ /**
+ * Full constructor.
+ *
+ * @param name
+ * @param uri
+ * @param form
+ */
+ public FormPtrElement(String name, String uri, Element form) {
+ super(name, uri);
+ this.form = form;
+ }
+
+ /**
+ * Gets the form.
+ * @see nu.validator.htmlparser.xom.FormPointer#getForm()
+ */
+ public Element getForm() {
+ return form;
+ }
+
+ /**
+ * Sets the form.
+ * @see nu.validator.htmlparser.xom.FormPointer#setForm(nu.xom.Element)
+ */
+ public void setForm(Element form) {
+ this.form = form;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java
new file mode 100644
index 000000000..845ea15cf
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java
@@ -0,0 +1,773 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+
+import nu.validator.htmlparser.common.CharacterHandler;
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.common.TransitionHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+import nu.xom.Builder;
+import nu.xom.Document;
+import nu.xom.Nodes;
+import nu.xom.ParsingException;
+import nu.xom.ValidityException;
+
+import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the XOM
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
+ * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
+ * XML violation policy. It is possible to treat XML 1.0 infoset violations
+ * as fatal by setting the general XML violation policy to <code>FATAL</code>.
+ *
+ * <p>The doctype is not represented in the tree.
+ *
+ * <p>The document mode is represented via the <code>Mode</code>
+ * interface on the <code>Document</code> node if the node implements
+ * that interface (depends on the used node factory).
+ *
+ * <p>The form pointer is stored if the node factory supports storing it.
+ *
+ * <p>This package has its own node factory class because the official
+ * XOM node factory may return multiple nodes instead of one confusing
+ * the assumptions of the DOM-oriented HTML5 parsing algorithm.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class HtmlBuilder extends Builder {
+
+ private Driver driver;
+
+ private final XOMTreeBuilder treeBuilder;
+
+ private final SimpleNodeFactory simpleNodeFactory;
+
+ private EntityResolver entityResolver;
+
+ private ErrorHandler errorHandler = null;
+
+ private DocumentModeHandler documentModeHandler = null;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private boolean checkingNormalization = false;
+
+ private boolean scriptingEnabled = false;
+
+ private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
+
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata = false;
+
+ private boolean mappingLangToXmlLang = false;
+
+ private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
+
+ private boolean reportingDoctype = true;
+
+ private ErrorHandler treeBuilderErrorHandler = null;
+
+ private Heuristics heuristics = Heuristics.NONE;
+
+ private TransitionHandler transitionHandler = null;
+
+ /**
+ * Constructor with default node factory and fatal XML violation policy.
+ */
+ public HtmlBuilder() {
+ this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL);
+ }
+
+ /**
+ * Constructor with given node factory and fatal XML violation policy.
+ * @param nodeFactory the factory
+ */
+ public HtmlBuilder(SimpleNodeFactory nodeFactory) {
+ this(nodeFactory, XmlViolationPolicy.FATAL);
+ }
+
+ /**
+ * Constructor with default node factory and given XML violation policy.
+ * @param xmlPolicy the policy
+ */
+ public HtmlBuilder(XmlViolationPolicy xmlPolicy) {
+ this(new SimpleNodeFactory(), xmlPolicy);
+ }
+
+ /**
+ * Constructor with given node factory and given XML violation policy.
+ * @param nodeFactory the factory
+ * @param xmlPolicy the policy
+ */
+ public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) {
+ super();
+ this.simpleNodeFactory = nodeFactory;
+ this.treeBuilder = new XOMTreeBuilder(nodeFactory);
+ this.driver = null;
+ setXmlPolicy(xmlPolicy);
+ }
+
+ private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
+ if (errorHandler == null && transitionHandler == null
+ && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
+ return new Tokenizer(handler, newAttributesEachTime);
+ } else {
+ return new ErrorReportingTokenizer(handler, newAttributesEachTime);
+ }
+ }
+
+ /**
+ * This class wraps different tree builders depending on configuration. This
+ * method does the work of hiding this from the user of the class.
+ */
+ private void lazyInit() {
+ if (driver == null) {
+ this.driver = new Driver(newTokenizer(treeBuilder, false));
+ this.driver.setErrorHandler(errorHandler);
+ this.driver.setTransitionHandler(transitionHandler);
+ this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
+ this.driver.setCheckingNormalization(checkingNormalization);
+ this.driver.setCommentPolicy(commentPolicy);
+ this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ this.driver.setContentSpacePolicy(contentSpacePolicy);
+ this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ this.driver.setXmlnsPolicy(xmlnsPolicy);
+ this.driver.setHeuristics(heuristics);
+ for (CharacterHandler characterHandler : characterHandlers) {
+ this.driver.addCharacterHandler(characterHandler);
+ }
+ this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ this.treeBuilder.setDocumentModeHandler(documentModeHandler);
+ this.treeBuilder.setScriptingEnabled(scriptingEnabled);
+ this.treeBuilder.setReportingDoctype(reportingDoctype);
+ this.treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+
+ private void tokenize(InputSource is) throws ParsingException, IOException,
+ MalformedURLException {
+ try {
+ if (is == null) {
+ throw new IllegalArgumentException("Null input.");
+ }
+ if (is.getByteStream() == null && is.getCharacterStream() == null) {
+ String systemId = is.getSystemId();
+ if (systemId == null) {
+ throw new IllegalArgumentException(
+ "No byte stream, no character stream nor URI.");
+ }
+ if (entityResolver != null) {
+ is = entityResolver.resolveEntity(is.getPublicId(),
+ systemId);
+ }
+ if (is.getByteStream() == null
+ || is.getCharacterStream() == null) {
+ is = new InputSource();
+ is.setSystemId(systemId);
+ is.setByteStream(new URL(systemId).openStream());
+ }
+ }
+ driver.tokenize(is);
+ } catch (SAXParseException e) {
+ throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(),
+ e.getColumnNumber(), e);
+ } catch (SAXException e) {
+ throw new ParsingException(e.getMessage(), e);
+ }
+ }
+
+ /**
+ * Parse from SAX <code>InputSource</code>.
+ * @param is the <code>InputSource</code>
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ */
+ public Document build(InputSource is) throws ParsingException, IOException {
+ lazyInit();
+ treeBuilder.setFragmentContext(null);
+ tokenize(is);
+ return treeBuilder.getDocument();
+ }
+
+ /**
+ * Parse a fragment from SAX <code>InputSource</code> assuming an HTML
+ * context.
+ * @param is the <code>InputSource</code>
+ * @param context the name of the context element (HTML namespace assumed)
+ * @return the fragment
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ */
+ public Nodes buildFragment(InputSource is, String context)
+ throws IOException, ParsingException {
+ lazyInit();
+ treeBuilder.setFragmentContext(context.intern());
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Parse a fragment from SAX <code>InputSource</code>.
+ * @param is the <code>InputSource</code>
+ * @param contextLocal the local name of the context element
+ * @parem contextNamespace the namespace of the context element
+ * @return the fragment
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ */
+ public Nodes buildFragment(InputSource is, String contextLocal, String contextNamespace)
+ throws IOException, ParsingException {
+ lazyInit();
+ treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false);
+ tokenize(is);
+ return treeBuilder.getDocumentFragment();
+ }
+
+ /**
+ * Parse from <code>File</code>.
+ * @param file the file
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.File)
+ */
+ @Override
+ public Document build(File file) throws ParsingException,
+ ValidityException, IOException {
+ return build(new FileInputStream(file), file.toURI().toASCIIString());
+ }
+
+ /**
+ * Parse from <code>InputStream</code>.
+ * @param stream the stream
+ * @param uri the base URI
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String)
+ */
+ @Override
+ public Document build(InputStream stream, String uri)
+ throws ParsingException, ValidityException, IOException {
+ InputSource is = new InputSource(stream);
+ is.setSystemId(uri);
+ return build(is);
+ }
+
+ /**
+ * Parse from <code>InputStream</code>.
+ * @param stream the stream
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.InputStream)
+ */
+ @Override
+ public Document build(InputStream stream) throws ParsingException,
+ ValidityException, IOException {
+ return build(new InputSource(stream));
+ }
+
+ /**
+ * Parse from <code>Reader</code>.
+ * @param stream the reader
+ * @param uri the base URI
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.Reader, java.lang.String)
+ */
+ @Override
+ public Document build(Reader stream, String uri) throws ParsingException,
+ ValidityException, IOException {
+ InputSource is = new InputSource(stream);
+ is.setSystemId(uri);
+ return build(is);
+ }
+
+ /**
+ * Parse from <code>Reader</code>.
+ * @param stream the reader
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.io.Reader)
+ */
+ @Override
+ public Document build(Reader stream) throws ParsingException,
+ ValidityException, IOException {
+ return build(new InputSource(stream));
+ }
+
+ /**
+ * Parse from <code>String</code>.
+ * @param content the HTML source as string
+ * @param uri the base URI
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.lang.String, java.lang.String)
+ */
+ @Override
+ public Document build(String content, String uri) throws ParsingException,
+ ValidityException, IOException {
+ return build(new StringReader(content), uri);
+ }
+
+ /**
+ * Parse from URI.
+ * @param uri the URI of the document
+ * @return the document
+ * @throws ParsingException in case of an XML violation
+ * @throws IOException if IO goes wrang
+ * @see nu.xom.Builder#build(java.lang.String)
+ */
+ @Override
+ public Document build(String uri) throws ParsingException,
+ ValidityException, IOException {
+ return build(new InputSource(uri));
+ }
+
+ /**
+ * Gets the node factory
+ */
+ public SimpleNodeFactory getSimpleNodeFactory() {
+ return simpleNodeFactory;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
+ */
+ public void setEntityResolver(EntityResolver resolver) {
+ entityResolver = resolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler handler) {
+ errorHandler = handler;
+ treeBuilderErrorHandler = handler;
+ driver = null;
+ }
+
+ public void setTransitionHander(TransitionHandler handler) {
+ transitionHandler = handler;
+ driver = null;
+ }
+
+ /**
+ * Indicates whether NFC normalization of source is being checked.
+ * @return <code>true</code> if NFC normalization of source is being checked.
+ * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
+ */
+ public boolean isCheckingNormalization() {
+ return checkingNormalization;
+ }
+
+ /**
+ * Toggles the checking of the NFC normalization of source.
+ * @param enable <code>true</code> to check normalization
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
+ */
+ public void setCheckingNormalization(boolean enable) {
+ this.checkingNormalization = enable;
+ if (driver != null) {
+ driver.setCheckingNormalization(checkingNormalization);
+ }
+ }
+
+ /**
+ * Sets the policy for consecutive hyphens in comments.
+ * @param commentPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ if (driver != null) {
+ driver.setCommentPolicy(commentPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML characters except white space.
+ * @param contentNonXmlCharPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ driver = null;
+ }
+
+ /**
+ * Sets the policy for non-XML white space.
+ * @param contentSpacePolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ if (driver != null) {
+ driver.setContentSpacePolicy(contentSpacePolicy);
+ }
+ }
+
+ /**
+ * Whether the parser considers scripting to be enabled for noscript treatment.
+ *
+ * @return <code>true</code> if enabled
+ * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ if (treeBuilder != null) {
+ treeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+ }
+
+ /**
+ * Returns the doctype expectation.
+ *
+ * @return the doctypeExpectation
+ */
+ public DoctypeExpectation getDoctypeExpectation() {
+ return doctypeExpectation;
+ }
+
+ /**
+ * Sets the doctype expectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ if (treeBuilder != null) {
+ treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ }
+ }
+
+ /**
+ * Returns the document mode handler.
+ *
+ * @return the documentModeHandler
+ */
+ public DocumentModeHandler getDocumentModeHandler() {
+ return documentModeHandler;
+ }
+
+ /**
+ * Sets the document mode handler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Returns the streamabilityViolationPolicy.
+ *
+ * @return the streamabilityViolationPolicy
+ */
+ public XmlViolationPolicy getStreamabilityViolationPolicy() {
+ return streamabilityViolationPolicy;
+ }
+
+ /**
+ * Sets the streamabilityViolationPolicy.
+ *
+ * @param streamabilityViolationPolicy
+ * the streamabilityViolationPolicy to set
+ */
+ public void setStreamabilityViolationPolicy(
+ XmlViolationPolicy streamabilityViolationPolicy) {
+ this.streamabilityViolationPolicy = streamabilityViolationPolicy;
+ driver = null;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ if (driver != null) {
+ driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+ }
+
+ /**
+ * Returns the <code>Locator</code> during parse.
+ * @return the <code>Locator</code>
+ */
+ public Locator getDocumentLocator() {
+ return driver.getDocumentLocator();
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ *
+ * @return the html4ModeCompatibleWithXhtml1Schemata
+ */
+ public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
+ return html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang;
+ if (driver != null) {
+ driver.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang;
+ }
+
+ /**
+ * Whether the <code>xmlns</code> attribute on the root element is
+ * passed to through. (FATAL not allowed.)
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ if (driver != null) {
+ driver.setXmlnsPolicy(xmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the xmlnsPolicy.
+ *
+ * @return the xmlnsPolicy
+ */
+ public XmlViolationPolicy getXmlnsPolicy() {
+ return xmlnsPolicy;
+ }
+
+ /**
+ * Returns the commentPolicy.
+ *
+ * @return the commentPolicy
+ */
+ public XmlViolationPolicy getCommentPolicy() {
+ return commentPolicy;
+ }
+
+ /**
+ * Returns the contentNonXmlCharPolicy.
+ *
+ * @return the contentNonXmlCharPolicy
+ */
+ public XmlViolationPolicy getContentNonXmlCharPolicy() {
+ return contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Returns the contentSpacePolicy.
+ *
+ * @return the contentSpacePolicy
+ */
+ public XmlViolationPolicy getContentSpacePolicy() {
+ return contentSpacePolicy;
+ }
+
+ /**
+ * @param reportingDoctype
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ if (treeBuilder != null) {
+ treeBuilder.setReportingDoctype(reportingDoctype);
+ }
+ }
+
+ /**
+ * Returns the reportingDoctype.
+ *
+ * @return the reportingDoctype
+ */
+ public boolean isReportingDoctype() {
+ return reportingDoctype;
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ if (driver != null) {
+ driver.setNamePolicy(namePolicy);
+ treeBuilder.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * Sets the encoding sniffing heuristics.
+ *
+ * @param heuristics the heuristics to set
+ * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
+ */
+ public void setHeuristics(Heuristics heuristics) {
+ this.heuristics = heuristics;
+ if (driver != null) {
+ driver.setHeuristics(heuristics);
+ }
+ }
+
+ public Heuristics getHeuristics() {
+ return this.heuristics;
+ }
+
+ /**
+ * This is a catch-all convenience method for setting name, xmlns, content space,
+ * content non-XML char and comment policies in one go. This does not affect the
+ * streamability policy or doctype reporting.
+ *
+ * @param xmlPolicy
+ */
+ public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
+ setNamePolicy(xmlPolicy);
+ setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
+ setContentSpacePolicy(xmlPolicy);
+ setContentNonXmlCharPolicy(xmlPolicy);
+ setCommentPolicy(xmlPolicy);
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ *
+ * @return the namePolicy
+ */
+ public XmlViolationPolicy getNamePolicy() {
+ return namePolicy;
+ }
+
+ /**
+ * Does nothing.
+ * @deprecated
+ */
+ public void setBogusXmlnsPolicy(
+ XmlViolationPolicy bogusXmlnsPolicy) {
+ }
+
+ /**
+ * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
+ * @deprecated
+ * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
+ */
+ public XmlViolationPolicy getBogusXmlnsPolicy() {
+ return XmlViolationPolicy.ALTER_INFOSET;
+ }
+
+ public void addCharacterHandler(CharacterHandler characterHandler) {
+ this.characterHandlers.add(characterHandler);
+ if (driver != null) {
+ driver.addCharacterHandler(characterHandler);
+ }
+ }
+
+
+ /**
+ * Sets whether comment nodes appear in the tree.
+ * @param ignoreComments <code>true</code> to ignore comments
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
+ */
+ public void setIgnoringComments(boolean ignoreComments) {
+ treeBuilder.setIgnoringComments(ignoreComments);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java
new file mode 100644
index 000000000..3b76b1421
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.xom.Document;
+import nu.xom.Element;
+
+/**
+ * Document with <code>Mode</code>.
+ * @version $Id$
+ * @author hsivonen
+ */
+public class ModalDocument extends Document implements Mode {
+
+ private DocumentMode mode = null;
+
+ /**
+ * Copy constructor (<code>Mode</code>-aware).
+ * @param doc
+ */
+ public ModalDocument(Document doc) {
+ super(doc);
+ if (doc instanceof Mode) {
+ Mode modal = (Mode) doc;
+ setMode(modal.getMode());
+ }
+ }
+
+ /**
+ * With root.
+ *
+ * @param elt
+ */
+ public ModalDocument(Element elt) {
+ super(elt);
+ }
+
+ /**
+ * Gets the mode.
+ * @see nu.validator.htmlparser.xom.Mode#getMode()
+ */
+ public DocumentMode getMode() {
+ return mode;
+ }
+
+ /**
+ * Sets the mode.
+ * @see nu.validator.htmlparser.xom.Mode#setMode(nu.validator.htmlparser.common.DocumentMode)
+ */
+ public void setMode(DocumentMode mode) {
+ this.mode = mode;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java
new file mode 100644
index 000000000..bd2dcbc26
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+
+/**
+ * Interface for attaching a <code>DocumentMode</code> on a Document.
+ * @version $Id$
+ * @author hsivonen
+ */
+public interface Mode {
+
+ /**
+ * Returns the mode.
+ *
+ * @return the mode
+ */
+ public abstract DocumentMode getMode();
+
+ /**
+ * Sets the mode.
+ *
+ * @param mode the mode to set
+ */
+ public abstract void setMode(DocumentMode mode);
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java
new file mode 100644
index 000000000..147b5d930
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.xom.Attribute;
+import nu.xom.Comment;
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Text;
+import nu.xom.Attribute.Type;
+
+/**
+ * A simpler node factory that does not use <code>Nodes</code>..
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class SimpleNodeFactory {
+
+ /**
+ * <code>return new Attribute(localName, uri, value, type);</code>
+ * @param localName
+ * @param uri
+ * @param value
+ * @param type
+ * @return
+ */
+ public Attribute makeAttribute(String localName, String uri, String value, Type type) {
+ return new Attribute(localName, uri, value, type);
+ }
+
+ /**
+ * <code>return new Text(string);</code>
+ * @param string
+ * @return
+ */
+ public Text makeText(String string) {
+ return new Text(string);
+ }
+
+ /**
+ * <code>return new Comment(string);</code>
+ * @param string
+ * @return
+ */
+ public Comment makeComment(String string) {
+ return new Comment(string);
+ }
+
+ /**
+ * <code>return new Element(name, namespace);</code>
+ * @param name
+ * @param namespace
+ * @return
+ */
+ public Element makeElement(String name, String namespace) {
+ return new Element(name, namespace);
+ }
+
+ /**
+ * <code>return new FormPtrElement(name, namespace, form);</code>
+ * @param name
+ * @param namespace
+ * @param form
+ * @return
+ */
+ public Element makeElement(String name, String namespace, Element form) {
+ return new FormPtrElement(name, namespace, form);
+ }
+
+ /**
+ * <code>return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot"));</code>
+ *
+ * <p>Subclasses adviced to return an instance of <code>Mode</code>. (Not required, though.)
+ *
+ * @return
+ */
+ public Document makeDocument() {
+ return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot"));
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java
new file mode 100644
index 000000000..623f31927
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2010 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.xom;
+
+import nu.validator.htmlparser.common.DocumentMode;
+import nu.validator.htmlparser.impl.CoalescingTreeBuilder;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.xom.Attribute;
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Node;
+import nu.xom.Nodes;
+import nu.xom.ParentNode;
+import nu.xom.Text;
+import nu.xom.XMLException;
+
+import org.xml.sax.SAXException;
+
+class XOMTreeBuilder extends CoalescingTreeBuilder<Element> {
+
+ private final SimpleNodeFactory nodeFactory;
+
+ private Document document;
+
+ private int cachedTableIndex = -1;
+
+ private Element cachedTable = null;
+
+ protected XOMTreeBuilder(SimpleNodeFactory nodeFactory) {
+ super();
+ this.nodeFactory = nodeFactory;
+ }
+
+ @Override
+ protected void addAttributesToElement(Element element, HtmlAttributes attributes)
+ throws SAXException {
+ try {
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String localName = attributes.getLocalNameNoBoundsCheck(i);
+ String uri = attributes.getURINoBoundsCheck(i);
+ if (element.getAttribute(localName, uri) == null) {
+ element.addAttribute(nodeFactory.makeAttribute(
+ localName,
+ uri,
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void appendCharacters(Element parent, String text)
+ throws SAXException {
+ try {
+ int childCount = parent.getChildCount();
+ Node lastChild;
+ if (childCount != 0
+ && ((lastChild = parent.getChild(childCount - 1)) instanceof Text)) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setValue(lastAsText.getValue() + text);
+ return;
+ }
+ parent.appendChild(nodeFactory.makeText(text));
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Element oldParent,
+ Element newParent) throws SAXException {
+ try {
+ Nodes children = oldParent.removeChildren();
+ for (int i = 0; i < children.size(); i++) {
+ newParent.appendChild(children.get(i));
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendComment(Element parent, String comment) throws SAXException {
+ try {
+ parent.appendChild(nodeFactory.makeComment(comment));
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendCommentToDocument(String comment)
+ throws SAXException {
+ try {
+ Element root = document.getRootElement();
+ if ("http://www.xom.nu/fakeRoot".equals(root.getNamespaceURI())) {
+ document.insertChild(nodeFactory.makeComment(comment), document.indexOf(root));
+ } else {
+ document.appendChild(nodeFactory.makeComment(comment));
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element intendedParent) throws SAXException {
+ try {
+ Element rv = nodeFactory.makeElement(name, ns);
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.addAttribute(nodeFactory.makeAttribute(
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getURINoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ return rv;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override
+ protected Element createHtmlElementSetAsRoot(
+ HtmlAttributes attributes) throws SAXException {
+ try {
+ Element rv = nodeFactory.makeElement("html",
+ "http://www.w3.org/1999/xhtml");
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.addAttribute(nodeFactory.makeAttribute(
+ attributes.getLocalNameNoBoundsCheck(i),
+ attributes.getURINoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ document.setRootElement(rv);
+ return rv;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override
+ protected void detachFromParent(Element element) throws SAXException {
+ try {
+ element.detach();
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected void appendElement(Element child,
+ Element newParent) throws SAXException {
+ try {
+ child.detach();
+ newParent.appendChild(child);
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override
+ protected boolean hasChildren(Element element) throws SAXException {
+ try {
+ return element.getChildCount() != 0;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ Document getDocument() {
+ Document rv = document;
+ document = null;
+ return rv;
+ }
+
+ Nodes getDocumentFragment() {
+ Element rootElt = document.getRootElement();
+ Nodes rv = rootElt.removeChildren();
+ document = null;
+ return rv;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String,
+ * java.lang.String, org.xml.sax.Attributes, java.lang.Object)
+ */
+ @Override
+ protected Element createElement(String ns, String name,
+ HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException {
+ try {
+ Element rv = nodeFactory.makeElement(name,
+ ns, form);
+ for (int i = 0; i < attributes.getLength(); i++) {
+ rv.addAttribute(nodeFactory.makeAttribute(
+ attributes.getLocalName(i),
+ attributes.getURINoBoundsCheck(i),
+ attributes.getValueNoBoundsCheck(i),
+ attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID
+ : Attribute.Type.CDATA));
+ }
+ return rv;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) throws SAXException {
+ document = nodeFactory.makeDocument();
+ cachedTableIndex = -1;
+ cachedTable = null;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode,
+ * java.lang.String, java.lang.String, boolean)
+ */
+ @Override
+ protected void documentMode(DocumentMode mode, String publicIdentifier,
+ String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
+ throws SAXException {
+ if (document instanceof Mode) {
+ Mode modal = (Mode) document;
+ modal.setMode(mode);
+ }
+ }
+
+ @Override
+ protected Element createAndInsertFosterParentedElement(String ns, String name,
+ HtmlAttributes attributes, Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParent();
+ Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent);
+ if (parent != null) { // always an element if not null
+ ((ParentNode) parent).insertChild(child, indexOfTable(table, stackParent));
+ cachedTableIndex++;
+ } else {
+ stackParent.appendChild(child);
+ }
+ return child;
+ } catch (XMLException e) {
+ fatal(e);
+ throw new RuntimeException("Unreachable");
+ }
+ }
+
+ @Override protected void insertFosterParentedCharacters(String text,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParent();
+ if (parent != null) { // always an element if not null
+ Element parentAsElt = (Element) parent;
+ int tableIndex = indexOfTable(table, parentAsElt);
+ Node prevSibling;
+ if (tableIndex != 0
+ && ((prevSibling = parentAsElt.getChild(tableIndex - 1)) instanceof Text)) {
+ Text prevAsText = (Text) prevSibling;
+ prevAsText.setValue(prevAsText.getValue() + text);
+ return;
+ }
+ parentAsElt.insertChild(nodeFactory.makeText(text), tableIndex);
+ cachedTableIndex++;
+ return;
+ }
+ int childCount = stackParent.getChildCount();
+ Node lastChild;
+ if (childCount != 0
+ && ((lastChild = stackParent.getChild(childCount - 1)) instanceof Text)) {
+ Text lastAsText = (Text) lastChild;
+ lastAsText.setValue(lastAsText.getValue() + text);
+ return;
+ }
+ stackParent.appendChild(nodeFactory.makeText(text));
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ @Override protected void insertFosterParentedChild(Element child,
+ Element table, Element stackParent) throws SAXException {
+ try {
+ Node parent = table.getParent();
+ if (parent != null) { // always an element if not null
+ ((ParentNode)parent).insertChild(child, indexOfTable(table, stackParent));
+ cachedTableIndex++;
+ } else {
+ stackParent.appendChild(child);
+ }
+ } catch (XMLException e) {
+ fatal(e);
+ }
+ }
+
+ private int indexOfTable(Element table, Element stackParent) {
+ if (table == cachedTable) {
+ return cachedTableIndex;
+ } else {
+ cachedTable = table;
+ return (cachedTableIndex = stackParent.indexOf(table));
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override protected void end() throws SAXException {
+ cachedTableIndex = -1;
+ cachedTable = null;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html
new file mode 100644
index 000000000..a936d5e3a
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides an HTML5 parser that exposes the document through the XOM API.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java
new file mode 100644
index 000000000..f17ce3f89
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A CDATA section.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class CDATA extends ParentNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ */
+ public CDATA(Locator locator) {
+ super(locator);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startCDATA(this);
+ }
+
+ /**
+ *
+ * @throws SAXException if things go wrong
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endCDATA(endLocator);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.CDATA;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java
new file mode 100644
index 000000000..55c7715f6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+
+/**
+ * A common superclass for character buffer node classes.
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class CharBufferNode extends Node {
+
+ /**
+ * The buffer.
+ */
+ protected final char[] buffer;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset
+ * @param length the length
+ */
+ CharBufferNode(Locator locator, char[] buf, int start, int length) {
+ super(locator);
+ this.buffer = new char[length];
+ System.arraycopy(buf, start, buffer, 0, length);
+ }
+
+ /**
+ * Returns the wrapped buffer as a string.
+ *
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return new String(buffer);
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java
new file mode 100644
index 000000000..b8cc2d6d6
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A run of characters
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Characters extends CharBufferNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset in the buffer
+ * @param length the length
+ */
+ public Characters(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.characters(buffer, 0, buffer.length, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.CHARACTERS;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java
new file mode 100644
index 000000000..f010462fb
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A comment.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Comment extends CharBufferNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset
+ * @param length the length
+ */
+ public Comment(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.comment(buffer, 0, buffer.length, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.COMMENT;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java
new file mode 100644
index 000000000..2169e0571
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A doctype.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class DTD extends ParentNode {
+
+ /**
+ * The name.
+ */
+ private final String name;
+
+ /**
+ * The public id.
+ */
+ private final String publicIdentifier;
+
+ /**
+ * The system id.
+ */
+ private final String systemIdentifier;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param name the name
+ * @param publicIdentifier the public id
+ * @param systemIdentifier the system id
+ */
+ public DTD(Locator locator, String name, String publicIdentifier, String systemIdentifier) {
+ super(locator);
+ this.name = name;
+ this.publicIdentifier = publicIdentifier;
+ this.systemIdentifier = systemIdentifier;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startDTD(name, publicIdentifier, systemIdentifier, this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endDTD(endLocator);
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Returns the publicIdentifier.
+ *
+ * @return the publicIdentifier
+ */
+ public String getPublicIdentifier() {
+ return publicIdentifier;
+ }
+
+ /**
+ * Returns the systemIdentifier.
+ *
+ * @return the systemIdentifier
+ */
+ public String getSystemIdentifier() {
+ return systemIdentifier;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.DTD;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java
new file mode 100644
index 000000000..3bb6f09c7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A document.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Document extends ParentNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ */
+ public Document(Locator locator) {
+ super(locator);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startDocument(this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endDocument(endLocator);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.DOCUMENT;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java
new file mode 100644
index 000000000..06816932f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.helpers.LocatorImpl;
+
+/**
+ * A document fragment.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class DocumentFragment extends ParentNode {
+
+ /**
+ * The constructor.
+ */
+ public DocumentFragment() {
+ super(new LocatorImpl());
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override void visit(TreeParser treeParser) {
+ // nothing
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override public NodeType getNodeType() {
+ return NodeType.DOCUMENT_FRAGMENT;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java
new file mode 100644
index 000000000..3d33164e5
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * An element.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Element extends ParentNode {
+
+ /**
+ * The namespace URI.
+ */
+ private final String uri;
+
+ /**
+ * The local name.
+ */
+ private final String localName;
+
+ /**
+ * The qualified name.
+ */
+ private final String qName;
+
+ /**
+ * The attributes.
+ */
+ private final Attributes attributes;
+
+ /**
+ * The namespace prefix mappings.
+ */
+ private final List<PrefixMapping> prefixMappings;
+
+ /**
+ * The contructor.
+ * @param locator the locator.
+ * @param uri the namespace URI
+ * @param localName the local name
+ * @param qName the qualified name
+ * @param atts the attributes
+ * @param retainAttributes <code>true</code> to retain the attributes instead of copying
+ * @param prefixMappings the prefix mappings
+ */
+ public Element(Locator locator, String uri, String localName, String qName,
+ Attributes atts, boolean retainAttributes,
+ List<PrefixMapping> prefixMappings) {
+ super(locator);
+ this.uri = uri;
+ this.localName = localName;
+ this.qName = qName;
+ if (retainAttributes) {
+ this.attributes = atts;
+ } else {
+ this.attributes = new AttributesImpl(atts);
+ }
+ this.prefixMappings = prefixMappings;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ if (prefixMappings != null) {
+ for (PrefixMapping mapping : prefixMappings) {
+ treeParser.startPrefixMapping(mapping.getPrefix(),
+ mapping.getUri(), this);
+ }
+ }
+ treeParser.startElement(uri, localName, qName, attributes, this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endElement(uri, localName, qName, endLocator);
+ if (prefixMappings != null) {
+ for (PrefixMapping mapping : prefixMappings) {
+ treeParser.endPrefixMapping(mapping.getPrefix(), endLocator);
+ }
+ }
+ }
+
+ /**
+ * Returns the attributes.
+ *
+ * @return the attributes
+ */
+ public Attributes getAttributes() {
+ return attributes;
+ }
+
+ /**
+ * Returns the localName.
+ *
+ * @return the localName
+ */
+ public String getLocalName() {
+ return localName;
+ }
+
+ /**
+ * Returns the prefixMappings.
+ *
+ * @return the prefixMappings
+ */
+ public List<PrefixMapping> getPrefixMappings() {
+ return prefixMappings;
+ }
+
+ /**
+ * Returns the qName.
+ *
+ * @return the qName
+ */
+ public String getQName() {
+ return qName;
+ }
+
+ /**
+ * Returns the uri.
+ *
+ * @return the uri
+ */
+ public String getUri() {
+ return uri;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.ELEMENT;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java
new file mode 100644
index 000000000..091013736
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * An entity.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class Entity extends ParentNode {
+
+ /**
+ * The name.
+ */
+ private final String name;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param name the name
+ */
+ public Entity(Locator locator, String name) {
+ super(locator);
+ this.name = name;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startEntity(name, this);
+ }
+
+ /**
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endEntity(name, endLocator);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.ENTITY;
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java
new file mode 100644
index 000000000..e5fcf350f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A run ignorable whitespace.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class IgnorableWhitespace extends CharBufferNode {
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ * @param buf the buffer
+ * @param start the offset
+ * @param length the length
+ */
+ public IgnorableWhitespace(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.ignorableWhitespace(buffer, 0, buffer.length, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.IGNORABLE_WHITESPACE;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java
new file mode 100644
index 000000000..37c0c6325
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+
+/**
+ * A locator implementation.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class LocatorImpl implements Locator {
+
+ /**
+ * The system id.
+ */
+ private final String systemId;
+
+ /**
+ * The public id.
+ */
+ private final String publicId;
+
+ /**
+ * The column.
+ */
+ private final int column;
+
+ /**
+ * The line.
+ */
+ private final int line;
+
+ /**
+ * The constructor.
+ * @param locator the locator
+ */
+ public LocatorImpl(Locator locator) {
+ if (locator == null) {
+ this.systemId = null;
+ this.publicId = null;
+ this.column = -1;
+ this.line = -1;
+ } else {
+ this.systemId = locator.getSystemId();
+ this.publicId = locator.getPublicId();
+ this.column = locator.getColumnNumber();
+ this.line = locator.getLineNumber();
+ }
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ return column;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ return line;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java
new file mode 100644
index 000000000..7aed83b75
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * The common node superclass.
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class Node implements Locator {
+
+ /**
+ * The system id.
+ */
+ private final String systemId;
+
+ /**
+ * The public id.
+ */
+ private final String publicId;
+
+ /**
+ * The column.
+ */
+ private final int column;
+
+ /**
+ * The line.
+ */
+ private final int line;
+
+ /**
+ * The next sibling.
+ */
+ private Node nextSibling = null;
+
+ /**
+ * The parent.
+ */
+ private ParentNode parentNode = null;
+
+ /**
+ * The constructor.
+ *
+ * @param locator the locator
+ */
+ Node(Locator locator) {
+ if (locator == null) {
+ this.systemId = null;
+ this.publicId = null;
+ this.column = -1;
+ this.line = -1;
+ } else {
+ this.systemId = locator.getSystemId();
+ this.publicId = locator.getPublicId();
+ this.column = locator.getColumnNumber();
+ this.line = locator.getLineNumber();
+ }
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ return column;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ return line;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ return publicId;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ /**
+ * Visit the node.
+ *
+ * @param treeParser the visitor
+ * @throws SAXException if stuff goes wrong
+ */
+ abstract void visit(TreeParser treeParser) throws SAXException;
+
+ /**
+ * Revisit the node.
+ *
+ * @param treeParser the visitor
+ * @throws SAXException if stuff goes wrong
+ */
+ void revisit(TreeParser treeParser) throws SAXException {
+ return;
+ }
+
+ /**
+ * Return the first child.
+ * @return the first child
+ */
+ public Node getFirstChild() {
+ return null;
+ }
+
+ /**
+ * Returns the nextSibling.
+ *
+ * @return the nextSibling
+ */
+ public final Node getNextSibling() {
+ return nextSibling;
+ }
+
+ /**
+ * Returns the previous sibling
+ * @return the previous sibling
+ */
+ public final Node getPreviousSibling() {
+ Node prev = null;
+ Node next = parentNode.getFirstChild();
+ for(;;) {
+ if (this == next) {
+ return prev;
+ }
+ prev = next;
+ next = next.nextSibling;
+ }
+ }
+
+ /**
+ * Sets the nextSibling.
+ *
+ * @param nextSibling the nextSibling to set
+ */
+ void setNextSibling(Node nextSibling) {
+ this.nextSibling = nextSibling;
+ }
+
+
+ /**
+ * Returns the parentNode.
+ *
+ * @return the parentNode
+ */
+ public final ParentNode getParentNode() {
+ return parentNode;
+ }
+
+ /**
+ * Sets the parentNode.
+ *
+ * @param parentNode the parentNode to set
+ */
+ void setParentNode(ParentNode parentNode) {
+ this.parentNode = parentNode;
+ }
+
+ /**
+ * Return the node type.
+ * @return the node type
+ */
+ public abstract NodeType getNodeType();
+
+ // Subclass-specific accessors that are hoisted here to
+ // avoid casting.
+
+ /**
+ * Detach this node from its parent.
+ */
+ public void detach() {
+ if (parentNode != null) {
+ parentNode.removeChild(this);
+ parentNode = null;
+ }
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the publicIdentifier.
+ *
+ * @return the publicIdentifier
+ */
+ public String getPublicIdentifier() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the systemIdentifier.
+ *
+ * @return the systemIdentifier
+ */
+ public String getSystemIdentifier() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the attributes.
+ *
+ * @return the attributes
+ */
+ public Attributes getAttributes() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the localName.
+ *
+ * @return the localName
+ */
+ public String getLocalName() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the prefixMappings.
+ *
+ * @return the prefixMappings
+ */
+ public List<PrefixMapping> getPrefixMappings() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the qName.
+ *
+ * @return the qName
+ */
+ public String getQName() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the uri.
+ *
+ * @return the uri
+ */
+ public String getUri() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the data.
+ *
+ * @return the data
+ */
+ public String getData() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Returns the target.
+ *
+ * @return the target
+ */
+ public String getTarget() {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java
new file mode 100644
index 000000000..c3c927f0d
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+/**
+ * The node type.
+ * @version $Id$
+ * @author hsivonen
+ */
+public enum NodeType {
+ /**
+ * A CDATA section.
+ */
+ CDATA,
+ /**
+ * A run of characters.
+ */
+ CHARACTERS,
+ /**
+ * A comment.
+ */
+ COMMENT,
+ /**
+ * A document.
+ */
+ DOCUMENT,
+ /**
+ * A document fragment.
+ */
+ DOCUMENT_FRAGMENT,
+ /**
+ * A DTD.
+ */
+ DTD,
+ /**
+ * An element.
+ */
+ ELEMENT,
+ /**
+ * An entity.
+ */
+ ENTITY,
+ /**
+ * A run of ignorable whitespace.
+ */
+ IGNORABLE_WHITESPACE,
+ /**
+ * A processing instruction.
+ */
+ PROCESSING_INSTRUCTION,
+ /**
+ * A skipped entity.
+ */
+ SKIPPED_ENTITY
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java
new file mode 100644
index 000000000..de63f3b57
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * A lexical handler that does nothing.
+ * @version $Id$
+ * @author hsivonen
+ */
+final class NullLexicalHandler implements LexicalHandler {
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
+ */
+ public void comment(char[] arg0, int arg1, int arg2) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endCDATA()
+ */
+ public void endCDATA() throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endDTD()
+ */
+ public void endDTD() throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
+ */
+ public void endEntity(String arg0) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startCDATA()
+ */
+ public void startCDATA() throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void startDTD(String arg0, String arg1, String arg2) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
+ */
+ public void startEntity(String arg0) throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java
new file mode 100644
index 000000000..6cc96003f
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+
+/**
+ * Common superclass for parent nodes.
+ * @version $Id$
+ * @author hsivonen
+ */
+public abstract class ParentNode extends Node {
+
+ /**
+ * The end locator.
+ */
+ protected Locator endLocator;
+
+ /**
+ * The first child.
+ */
+ private Node firstChild = null;
+
+ /**
+ * The last child (for efficiency).
+ */
+ private Node lastChild = null;
+
+ /**
+ * The constuctor.
+ * @param locator the locator
+ */
+ ParentNode(Locator locator) {
+ super(locator);
+ }
+
+ /**
+ * Sets the endLocator.
+ *
+ * @param endLocator the endLocator to set
+ */
+ public void setEndLocator(Locator endLocator) {
+ this.endLocator = new LocatorImpl(endLocator);
+ }
+
+ /**
+ * Copies the endLocator from another node.
+ *
+ * @param another the another node
+ */
+ public void copyEndLocator(ParentNode another) {
+ this.endLocator = another.endLocator;
+ }
+
+ /**
+ * Returns the firstChild.
+ *
+ * @return the firstChild
+ */
+ public final Node getFirstChild() {
+ return firstChild;
+ }
+
+ /**
+ * Returns the lastChild.
+ *
+ * @return the lastChild
+ */
+ public final Node getLastChild() {
+ return lastChild;
+ }
+
+ /**
+ * Insert a new child before a pre-existing child and return the newly inserted child.
+ * @param child the new child
+ * @param sibling the existing child before which to insert (must be a child of this node) or <code>null</code> to append
+ * @return <code>child</code>
+ */
+ public Node insertBefore(Node child, Node sibling) {
+ assert sibling == null || this == sibling.getParentNode();
+ if (sibling == null) {
+ return appendChild(child);
+ }
+ child.detach();
+ child.setParentNode(this);
+ if (firstChild == sibling) {
+ child.setNextSibling(sibling);
+ firstChild = child;
+ } else {
+ Node prev = firstChild;
+ Node next = firstChild.getNextSibling();
+ while (next != sibling) {
+ prev = next;
+ next = next.getNextSibling();
+ }
+ prev.setNextSibling(child);
+ child.setNextSibling(next);
+ }
+ return child;
+ }
+
+ public Node insertBetween(Node child, Node prev, Node next) {
+ assert prev == null || this == prev.getParentNode();
+ assert next == null || this == next.getParentNode();
+ assert prev != null || next == firstChild;
+ assert next != null || prev == lastChild;
+ assert prev == null || next == null || prev.getNextSibling() == next;
+ if (next == null) {
+ return appendChild(child);
+ }
+ child.detach();
+ child.setParentNode(this);
+ child.setNextSibling(next);
+ if (prev == null) {
+ firstChild = child;
+ } else {
+ prev.setNextSibling(child);
+ }
+ return child;
+ }
+
+ /**
+ * Append a child to this node and return the child.
+ *
+ * @param child the child to append.
+ * @return <code>child</code>
+ */
+ public Node appendChild(Node child) {
+ child.detach();
+ child.setParentNode(this);
+ if (firstChild == null) {
+ firstChild = child;
+ } else {
+ lastChild.setNextSibling(child);
+ }
+ lastChild = child;
+ return child;
+ }
+
+ /**
+ * Append the children of another node to this node removing them from the other node .
+ * @param parent the other node whose children to append to this one
+ */
+ public void appendChildren(Node parent) {
+ Node child = parent.getFirstChild();
+ if (child == null) {
+ return;
+ }
+ ParentNode another = (ParentNode) parent;
+ if (firstChild == null) {
+ firstChild = child;
+ } else {
+ lastChild.setNextSibling(child);
+ }
+ lastChild = another.lastChild;
+ do {
+ child.setParentNode(this);
+ } while ((child = child.getNextSibling()) != null);
+ another.firstChild = null;
+ another.lastChild = null;
+ }
+
+ /**
+ * Remove a child from this node.
+ * @param node the child to remove
+ */
+ void removeChild(Node node) {
+ assert this == node.getParentNode();
+ if (firstChild == node) {
+ firstChild = node.getNextSibling();
+ if (lastChild == node) {
+ lastChild = null;
+ }
+ } else {
+ Node prev = firstChild;
+ Node next = firstChild.getNextSibling();
+ while (next != node) {
+ prev = next;
+ next = next.getNextSibling();
+ }
+ prev.setNextSibling(node.getNextSibling());
+ if (lastChild == node) {
+ lastChild = prev;
+ }
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java
new file mode 100644
index 000000000..8ffaf4a2c
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+/**
+ * A prefix mapping.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class PrefixMapping {
+ /**
+ * The namespace prefix.
+ */
+ private final String prefix;
+ /**
+ * The namespace URI.
+ */
+ private final String uri;
+ /**
+ * Constructor.
+ * @param prefix the prefix
+ * @param uri the URI
+ */
+ public PrefixMapping(final String prefix, final String uri) {
+ this.prefix = prefix;
+ this.uri = uri;
+ }
+ /**
+ * Returns the prefix.
+ *
+ * @return the prefix
+ */
+ public String getPrefix() {
+ return prefix;
+ }
+ /**
+ * Returns the uri.
+ *
+ * @return the uri
+ */
+ public String getUri() {
+ return uri;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java
new file mode 100644
index 000000000..014e63821
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A processing instruction.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class ProcessingInstruction extends Node {
+
+ /**
+ * PI target.
+ */
+ private final String target;
+
+ /**
+ * PI data.
+ */
+ private final String data;
+
+ /**
+ * Constructor.
+ * @param locator the locator
+ * @param target PI target
+ * @param data PI data
+ */
+ public ProcessingInstruction(Locator locator, String target, String data) {
+ super(locator);
+ this.target = target;
+ this.data = data;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.processingInstruction(target, data, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.PROCESSING_INSTRUCTION;
+ }
+
+ /**
+ * Returns the data.
+ *
+ * @return the data
+ */
+ public String getData() {
+ return data;
+ }
+
+ /**
+ * Returns the target.
+ *
+ * @return the target
+ */
+ public String getTarget() {
+ return target;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java
new file mode 100644
index 000000000..01ca61490
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A skipped entity.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class SkippedEntity extends Node {
+
+ /**
+ * The name.
+ */
+ private final String name;
+
+ /**
+ * Constructor.
+ * @param locator the locator
+ * @param name the name
+ */
+ public SkippedEntity(Locator locator, String name) {
+ super(locator);
+ this.name = name;
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.skippedEntity(name, this);
+ }
+
+ /**
+ *
+ * @see nu.validator.saxtree.Node#getNodeType()
+ */
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.SKIPPED_ENTITY;
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java
new file mode 100644
index 000000000..39fe236b3
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * Builds a SAX Tree representation of a document or a fragment
+ * streamed as <code>ContentHandler</code> and
+ * <code>LexicalHandler</code> events. The start/end event matching
+ * is expected to adhere to the SAX API contract. Things will
+ * simply break if this is not the case. Fragments are expected to
+ * omit <code>startDocument()</code> and <code>endDocument()</code>
+ * calls.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class TreeBuilder implements ContentHandler, LexicalHandler {
+
+ /**
+ * The locator.
+ */
+ private Locator locator;
+
+ /**
+ * The current node.
+ */
+ private ParentNode current;
+
+ /**
+ * Whether to retain attribute objects.
+ */
+ private final boolean retainAttributes;
+
+ /**
+ * The prefix mappings for the next element to be inserted.
+ */
+ private List<PrefixMapping> prefixMappings;
+
+ /**
+ * Constructs a reusable <code>TreeBuilder</code> that builds
+ * <code>Document</code>s and copies attributes.
+ */
+ public TreeBuilder() {
+ this(false, false);
+ }
+
+ /**
+ * The constructor. The instance will be reusabe if building a full
+ * document and not reusable if building a fragment.
+ *
+ * @param fragment whether this <code>TreeBuilder</code> should build
+ * a <code>DocumentFragment</code> instead of a <code>Document</code>.
+ * @param retainAttributes whether instances of the <code>Attributes</code>
+ * interface passed to <code>startElement</code> should be retained
+ * (the alternative is copying).
+ */
+ public TreeBuilder(boolean fragment, boolean retainAttributes) {
+ if (fragment) {
+ current = new DocumentFragment();
+ }
+ this.retainAttributes = retainAttributes;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ current.appendChild(new Characters(locator, ch, start, length));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#endDocument()
+ */
+ public void endDocument() throws SAXException {
+ current.setEndLocator(locator);
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
+ */
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
+ */
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ current.appendChild(new IgnorableWhitespace(locator, ch, start, length));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
+ */
+ public void processingInstruction(String target, String data) throws SAXException {
+ current.appendChild(new ProcessingInstruction(locator, target, data));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
+ */
+ public void setDocumentLocator(Locator locator) {
+ this.locator = locator;
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ current.appendChild(new SkippedEntity(locator, name));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#startDocument()
+ */
+ public void startDocument() throws SAXException {
+ current = new Document(locator);
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
+ */
+ public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
+ current = (ParentNode) current.appendChild(new Element(locator, uri, localName, qName, atts, retainAttributes, prefixMappings));
+ prefixMappings = null;
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
+ */
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ if (prefixMappings == null) {
+ prefixMappings = new LinkedList<PrefixMapping>();
+ }
+ prefixMappings.add(new PrefixMapping(prefix, uri));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
+ */
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ current.appendChild(new Comment(locator, ch, start, length));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endCDATA()
+ */
+ public void endCDATA() throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endDTD()
+ */
+ public void endDTD() throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
+ */
+ public void endEntity(String name) throws SAXException {
+ current.setEndLocator(locator);
+ current = current.getParentNode();
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startCDATA()
+ */
+ public void startCDATA() throws SAXException {
+ current = (ParentNode) current.appendChild(new CDATA(locator));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void startDTD(String name, String publicId, String systemId) throws SAXException {
+ current = (ParentNode) current.appendChild(new DTD(locator, name, publicId, systemId));
+ }
+
+ /**
+ *
+ * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
+ */
+ public void startEntity(String name) throws SAXException {
+ current = (ParentNode) current.appendChild(new Entity(locator, name));
+ }
+
+ /**
+ * Returns the root (<code>Document</code> if building a full document or
+ * <code>DocumentFragment</code> if building a fragment.).
+ *
+ * @return the root
+ */
+ public ParentNode getRoot() {
+ return current;
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java
new file mode 100644
index 000000000..a9d92deb0
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * A tree visitor that replays a tree as SAX events.
+ * @version $Id$
+ * @author hsivonen
+ */
+public final class TreeParser implements Locator {
+
+ /**
+ * The content handler.
+ */
+ private final ContentHandler contentHandler;
+
+ /**
+ * The lexical handler.
+ */
+ private final LexicalHandler lexicalHandler;
+
+ /**
+ * The current locator.
+ */
+ private Locator locatorDelegate;
+
+ /**
+ * The constructor.
+ *
+ * @param contentHandler
+ * must not be <code>null</code>
+ * @param lexicalHandler
+ * may be <code>null</code>
+ */
+ public TreeParser(final ContentHandler contentHandler,
+ final LexicalHandler lexicalHandler) {
+ if (contentHandler == null) {
+ throw new IllegalArgumentException("contentHandler was null.");
+ }
+ this.contentHandler = contentHandler;
+ if (lexicalHandler == null) {
+ this.lexicalHandler = new NullLexicalHandler();
+ } else {
+ this.lexicalHandler = lexicalHandler;
+ }
+ }
+
+ /**
+ * Causes SAX events for the tree rooted at the argument to be emitted.
+ * <code>startDocument()</code> and <code>endDocument()</code> are only
+ * emitted for a <code>Document</code> node.
+ *
+ * @param node
+ * the root
+ * @throws SAXException
+ */
+ public void parse(Node node) throws SAXException {
+ contentHandler.setDocumentLocator(this);
+ Node current = node;
+ Node next;
+ for (;;) {
+ current.visit(this);
+ if ((next = current.getFirstChild()) != null) {
+ current = next;
+ continue;
+ }
+ for (;;) {
+ current.revisit(this);
+ if (current == node) {
+ return;
+ }
+ if ((next = current.getNextSibling()) != null) {
+ current = next;
+ break;
+ }
+ current = current.getParentNode();
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ void characters(char[] ch, int start, int length, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.characters(ch, start, length);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#endDocument()
+ */
+ void endDocument(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.endDocument();
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#endElement(java.lang.String,
+ * java.lang.String, java.lang.String)
+ */
+ void endElement(String uri, String localName, String qName, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.endElement(uri, localName, qName);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
+ */
+ void endPrefixMapping(String prefix, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.endPrefixMapping(prefix);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
+ */
+ void ignorableWhitespace(char[] ch, int start, int length, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.ignorableWhitespace(ch, start, length);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String,
+ * java.lang.String)
+ */
+ void processingInstruction(String target, String data, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.processingInstruction(target, data);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
+ */
+ void skippedEntity(String name, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.skippedEntity(name);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#startDocument()
+ */
+ void startDocument(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.startDocument();
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#startElement(java.lang.String,
+ * java.lang.String, java.lang.String, org.xml.sax.Attributes)
+ */
+ void startElement(String uri, String localName, String qName,
+ Attributes atts, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.startElement(uri, localName, qName, atts);
+ }
+
+ /**
+ * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String,
+ * java.lang.String)
+ */
+ void startPrefixMapping(String prefix, String uri, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ contentHandler.startPrefixMapping(prefix, uri);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
+ */
+ void comment(char[] ch, int start, int length, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.comment(ch, start, length);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#endCDATA()
+ */
+ void endCDATA(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.endCDATA();
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#endDTD()
+ */
+ void endDTD(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.endDTD();
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
+ */
+ void endEntity(String name, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.endEntity(name);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#startCDATA()
+ */
+ void startCDATA(Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.startCDATA();
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String,
+ * java.lang.String, java.lang.String)
+ */
+ void startDTD(String name, String publicId, String systemId, Locator locator)
+ throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.startDTD(name, publicId, systemId);
+ }
+
+ /**
+ * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
+ */
+ void startEntity(String name, Locator locator) throws SAXException {
+ this.locatorDelegate = locator;
+ lexicalHandler.startEntity(name);
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getColumnNumber()
+ */
+ public int getColumnNumber() {
+ if (locatorDelegate == null) {
+ return -1;
+ } else {
+ return locatorDelegate.getColumnNumber();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getLineNumber()
+ */
+ public int getLineNumber() {
+ if (locatorDelegate == null) {
+ return -1;
+ } else {
+ return locatorDelegate.getLineNumber();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getPublicId()
+ */
+ public String getPublicId() {
+ if (locatorDelegate == null) {
+ return null;
+ } else {
+
+ return locatorDelegate.getPublicId();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.Locator#getSystemId()
+ */
+ public String getSystemId() {
+ if (locatorDelegate == null) {
+ return null;
+ } else {
+ return locatorDelegate.getSystemId();
+ }
+ }
+}
diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html
new file mode 100644
index 000000000..0c34dad81
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html
@@ -0,0 +1,46 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides SAX Tree: a tree model optimized for creation from SAX
+events and replay as SAX events.</p>
+<h2>Design Principles</h2>
+<ol>
+<li>Preserve information exposed through <code>ContentHandler</code>,
+<code>LexicalHandler</code> <em>and</em> <code>Locator</code>.
+<li>Creation from SAX events or as part of the parse of a conforming
+HTML5 document should be <em>fast</em>.</li>
+<li>Emitting SAX events based on the tree should be <em>fast</em>.</li>
+<li>Mutations should be <em>possible</em> but should not make the above
+"fast" cases slower.</li>
+<li>Concurrent reads should work without locking when there are no
+concurrent mutations.</li>
+<li>The user of the API has the responsibility of using the API properly:
+for the sake of performance, the model does not check if it is being
+used properly. Improper use may, therefore, put the model in and
+inconsistent state.</li>
+</ol>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java
new file mode 100644
index 000000000..f323f1e31
--- /dev/null
+++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package java.io;
+
+public class IOException extends Exception {
+
+ public IOException() {
+ }
+
+ public IOException(String arg0) {
+ super(arg0);
+ }
+
+ public IOException(Throwable arg0) {
+ super(arg0);
+ }
+
+ public IOException(String arg0, Throwable arg1) {
+ super(arg0, arg1);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java
new file mode 100644
index 000000000..b25432d45
--- /dev/null
+++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java
@@ -0,0 +1,257 @@
+// Attributes.java - attribute list with Namespace support
+// http://www.saxproject.org
+// Written by David Megginson
+// NO WARRANTY! This class is in the public domain.
+// $Id: Attributes.java,v 1.13 2004/03/18 12:28:05 dmegginson Exp $
+
+package org.xml.sax;
+
+
+/**
+ * Interface for a list of XML attributes.
+ *
+ * <blockquote>
+ * <em>This module, both source code and documentation, is in the
+ * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
+ * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ * for further information.
+ * </blockquote>
+ *
+ * <p>This interface allows access to a list of attributes in
+ * three different ways:</p>
+ *
+ * <ol>
+ * <li>by attribute index;</li>
+ * <li>by Namespace-qualified name; or</li>
+ * <li>by qualified (prefixed) name.</li>
+ * </ol>
+ *
+ * <p>The list will not contain attributes that were declared
+ * #IMPLIED but not specified in the start tag. It will also not
+ * contain attributes used as Namespace declarations (xmlns*) unless
+ * the <code>http://xml.org/sax/features/namespace-prefixes</code>
+ * feature is set to <var>true</var> (it is <var>false</var> by
+ * default).
+ * Because SAX2 conforms to the original "Namespaces in XML"
+ * recommendation, it normally does not
+ * give namespace declaration attributes a namespace URI.
+ * </p>
+ *
+ * <p>Some SAX2 parsers may support using an optional feature flag
+ * (<code>http://xml.org/sax/features/xmlns-uris</code>) to request
+ * that those attributes be given URIs, conforming to a later
+ * backwards-incompatible revision of that recommendation. (The
+ * attribute's "local name" will be the prefix, or "xmlns" when
+ * defining a default element namespace.) For portability, handler
+ * code should always resolve that conflict, rather than requiring
+ * parsers that can change the setting of that feature flag. </p>
+ *
+ * <p>If the namespace-prefixes feature (see above) is
+ * <var>false</var>, access by qualified name may not be available; if
+ * the <code>http://xml.org/sax/features/namespaces</code> feature is
+ * <var>false</var>, access by Namespace-qualified names may not be
+ * available.</p>
+ *
+ * <p>This interface replaces the now-deprecated SAX1 {@link
+ * org.xml.sax.AttributeList AttributeList} interface, which does not
+ * contain Namespace support. In addition to Namespace support, it
+ * adds the <var>getIndex</var> methods (below).</p>
+ *
+ * <p>The order of attributes in the list is unspecified, and will
+ * vary from implementation to implementation.</p>
+ *
+ * @since SAX 2.0
+ * @author David Megginson
+ * @version 2.0.1 (sax2r2)
+ * @see org.xml.sax.helpers.AttributesImpl
+ * @see org.xml.sax.ext.DeclHandler#attributeDecl
+ */
+public interface Attributes
+{
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Indexed access.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Return the number of attributes in the list.
+ *
+ * <p>Once you know the number of attributes, you can iterate
+ * through the list.</p>
+ *
+ * @return The number of attributes in the list.
+ * @see #getURI(int)
+ * @see #getLocalName(int)
+ * @see #getQName(int)
+ * @see #getType(int)
+ * @see #getValue(int)
+ */
+ public abstract int getLength ();
+
+
+ /**
+ * Look up an attribute's Namespace URI by index.
+ *
+ * @param index The attribute index (zero-based).
+ * @return The Namespace URI, or the empty string if none
+ * is available, or null if the index is out of
+ * range.
+ * @see #getLength
+ */
+ public abstract String getURI (int index);
+
+
+ /**
+ * Look up an attribute's local name by index.
+ *
+ * @param index The attribute index (zero-based).
+ * @return The local name, or the empty string if Namespace
+ * processing is not being performed, or null
+ * if the index is out of range.
+ * @see #getLength
+ */
+ public abstract String getLocalName (int index);
+
+
+ /**
+ * Look up an attribute's XML qualified (prefixed) name by index.
+ *
+ * @param index The attribute index (zero-based).
+ * @return The XML qualified name, or the empty string
+ * if none is available, or null if the index
+ * is out of range.
+ * @see #getLength
+ */
+ public abstract String getQName (int index);
+
+
+ /**
+ * Look up an attribute's type by index.
+ *
+ * <p>The attribute type is one of the strings "CDATA", "ID",
+ * "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES",
+ * or "NOTATION" (always in upper case).</p>
+ *
+ * <p>If the parser has not read a declaration for the attribute,
+ * or if the parser does not report attribute types, then it must
+ * return the value "CDATA" as stated in the XML 1.0 Recommendation
+ * (clause 3.3.3, "Attribute-Value Normalization").</p>
+ *
+ * <p>For an enumerated attribute that is not a notation, the
+ * parser will report the type as "NMTOKEN".</p>
+ *
+ * @param index The attribute index (zero-based).
+ * @return The attribute's type as a string, or null if the
+ * index is out of range.
+ * @see #getLength
+ */
+ public abstract String getType (int index);
+
+
+ /**
+ * Look up an attribute's value by index.
+ *
+ * <p>If the attribute value is a list of tokens (IDREFS,
+ * ENTITIES, or NMTOKENS), the tokens will be concatenated
+ * into a single string with each token separated by a
+ * single space.</p>
+ *
+ * @param index The attribute index (zero-based).
+ * @return The attribute's value as a string, or null if the
+ * index is out of range.
+ * @see #getLength
+ */
+ public abstract String getValue (int index);
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Name-based query.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Look up the index of an attribute by Namespace name.
+ *
+ * @param uri The Namespace URI, or the empty string if
+ * the name has no Namespace URI.
+ * @param localName The attribute's local name.
+ * @return The index of the attribute, or -1 if it does not
+ * appear in the list.
+ */
+ public int getIndex (String uri, String localName);
+
+
+ /**
+ * Look up the index of an attribute by XML qualified (prefixed) name.
+ *
+ * @param qName The qualified (prefixed) name.
+ * @return The index of the attribute, or -1 if it does not
+ * appear in the list.
+ */
+ public int getIndex (String qName);
+
+
+ /**
+ * Look up an attribute's type by Namespace name.
+ *
+ * <p>See {@link #getType(int) getType(int)} for a description
+ * of the possible types.</p>
+ *
+ * @param uri The Namespace URI, or the empty String if the
+ * name has no Namespace URI.
+ * @param localName The local name of the attribute.
+ * @return The attribute type as a string, or null if the
+ * attribute is not in the list or if Namespace
+ * processing is not being performed.
+ */
+ public abstract String getType (String uri, String localName);
+
+
+ /**
+ * Look up an attribute's type by XML qualified (prefixed) name.
+ *
+ * <p>See {@link #getType(int) getType(int)} for a description
+ * of the possible types.</p>
+ *
+ * @param qName The XML qualified name.
+ * @return The attribute type as a string, or null if the
+ * attribute is not in the list or if qualified names
+ * are not available.
+ */
+ public abstract String getType (String qName);
+
+
+ /**
+ * Look up an attribute's value by Namespace name.
+ *
+ * <p>See {@link #getValue(int) getValue(int)} for a description
+ * of the possible values.</p>
+ *
+ * @param uri The Namespace URI, or the empty String if the
+ * name has no Namespace URI.
+ * @param localName The local name of the attribute.
+ * @return The attribute value as a string, or null if the
+ * attribute is not in the list.
+ */
+ public abstract String getValue (String uri, String localName);
+
+
+ /**
+ * Look up an attribute's value by XML qualified (prefixed) name.
+ *
+ * <p>See {@link #getValue(int) getValue(int)} for a description
+ * of the possible values.</p>
+ *
+ * @param qName The XML qualified name.
+ * @return The attribute value as a string, or null if the
+ * attribute is not in the list or if qualified names
+ * are not available.
+ */
+ public abstract String getValue (String qName);
+
+}
+
+// end of Attributes.java
diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java
new file mode 100644
index 000000000..37d250143
--- /dev/null
+++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java
@@ -0,0 +1,139 @@
+// SAX error handler.
+// http://www.saxproject.org
+// No warranty; no copyright -- use this as you will.
+// $Id: ErrorHandler.java,v 1.10 2004/03/08 13:01:00 dmegginson Exp $
+
+package org.xml.sax;
+
+
+/**
+ * Basic interface for SAX error handlers.
+ *
+ * <blockquote>
+ * <em>This module, both source code and documentation, is in the
+ * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
+ * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ * for further information.
+ * </blockquote>
+ *
+ * <p>If a SAX application needs to implement customized error
+ * handling, it must implement this interface and then register an
+ * instance with the XML reader using the
+ * {@link org.xml.sax.XMLReader#setErrorHandler setErrorHandler}
+ * method. The parser will then report all errors and warnings
+ * through this interface.</p>
+ *
+ * <p><strong>WARNING:</strong> If an application does <em>not</em>
+ * register an ErrorHandler, XML parsing errors will go unreported,
+ * except that <em>SAXParseException</em>s will be thrown for fatal errors.
+ * In order to detect validity errors, an ErrorHandler that does something
+ * with {@link #error error()} calls must be registered.</p>
+ *
+ * <p>For XML processing errors, a SAX driver must use this interface
+ * in preference to throwing an exception: it is up to the application
+ * to decide whether to throw an exception for different types of
+ * errors and warnings. Note, however, that there is no requirement that
+ * the parser continue to report additional errors after a call to
+ * {@link #fatalError fatalError}. In other words, a SAX driver class
+ * may throw an exception after reporting any fatalError.
+ * Also parsers may throw appropriate exceptions for non-XML errors.
+ * For example, {@link XMLReader#parse XMLReader.parse()} would throw
+ * an IOException for errors accessing entities or the document.</p>
+ *
+ * @since SAX 1.0
+ * @author David Megginson
+ * @version 2.0.1+ (sax2r3pre1)
+ * @see org.xml.sax.XMLReader#setErrorHandler
+ * @see org.xml.sax.SAXParseException
+ */
+public interface ErrorHandler {
+
+
+ /**
+ * Receive notification of a warning.
+ *
+ * <p>SAX parsers will use this method to report conditions that
+ * are not errors or fatal errors as defined by the XML
+ * recommendation. The default behaviour is to take no
+ * action.</p>
+ *
+ * <p>The SAX parser must continue to provide normal parsing events
+ * after invoking this method: it should still be possible for the
+ * application to process the document through to the end.</p>
+ *
+ * <p>Filters may use this method to report other, non-XML warnings
+ * as well.</p>
+ *
+ * @param exception The warning information encapsulated in a
+ * SAX parse exception.
+ * @exception org.xml.sax.SAXException Any SAX exception, possibly
+ * wrapping another exception.
+ * @see org.xml.sax.SAXParseException
+ */
+ public abstract void warning (SAXParseException exception)
+ throws SAXException;
+
+
+ /**
+ * Receive notification of a recoverable error.
+ *
+ * <p>This corresponds to the definition of "error" in section 1.2
+ * of the W3C XML 1.0 Recommendation. For example, a validating
+ * parser would use this callback to report the violation of a
+ * validity constraint. The default behaviour is to take no
+ * action.</p>
+ *
+ * <p>The SAX parser must continue to provide normal parsing
+ * events after invoking this method: it should still be possible
+ * for the application to process the document through to the end.
+ * If the application cannot do so, then the parser should report
+ * a fatal error even if the XML recommendation does not require
+ * it to do so.</p>
+ *
+ * <p>Filters may use this method to report other, non-XML errors
+ * as well.</p>
+ *
+ * @param exception The error information encapsulated in a
+ * SAX parse exception.
+ * @exception org.xml.sax.SAXException Any SAX exception, possibly
+ * wrapping another exception.
+ * @see org.xml.sax.SAXParseException
+ */
+ public abstract void error (SAXParseException exception)
+ throws SAXException;
+
+
+ /**
+ * Receive notification of a non-recoverable error.
+ *
+ * <p><strong>There is an apparent contradiction between the
+ * documentation for this method and the documentation for {@link
+ * org.xml.sax.ContentHandler#endDocument}. Until this ambiguity
+ * is resolved in a future major release, clients should make no
+ * assumptions about whether endDocument() will or will not be
+ * invoked when the parser has reported a fatalError() or thrown
+ * an exception.</strong></p>
+ *
+ * <p>This corresponds to the definition of "fatal error" in
+ * section 1.2 of the W3C XML 1.0 Recommendation. For example, a
+ * parser would use this callback to report the violation of a
+ * well-formedness constraint.</p>
+ *
+ * <p>The application must assume that the document is unusable
+ * after the parser has invoked this method, and should continue
+ * (if at all) only for the sake of collecting additional error
+ * messages: in fact, SAX parsers are free to stop reporting any
+ * other events once this method has been invoked.</p>
+ *
+ * @param exception The error information encapsulated in a
+ * SAX parse exception.
+ * @exception org.xml.sax.SAXException Any SAX exception, possibly
+ * wrapping another exception.
+ * @see org.xml.sax.SAXParseException
+ */
+ public abstract void fatalError (SAXParseException exception)
+ throws SAXException;
+
+}
+
+// end of ErrorHandler.java
diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java
new file mode 100644
index 000000000..f8f3484c1
--- /dev/null
+++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java
@@ -0,0 +1,136 @@
+// SAX locator interface for document events.
+// http://www.saxproject.org
+// No warranty; no copyright -- use this as you will.
+// $Id: Locator.java,v 1.8 2002/01/30 21:13:47 dbrownell Exp $
+
+package org.xml.sax;
+
+
+/**
+ * Interface for associating a SAX event with a document location.
+ *
+ * <blockquote>
+ * <em>This module, both source code and documentation, is in the
+ * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
+ * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ * for further information.
+ * </blockquote>
+ *
+ * <p>If a SAX parser provides location information to the SAX
+ * application, it does so by implementing this interface and then
+ * passing an instance to the application using the content
+ * handler's {@link org.xml.sax.ContentHandler#setDocumentLocator
+ * setDocumentLocator} method. The application can use the
+ * object to obtain the location of any other SAX event
+ * in the XML source document.</p>
+ *
+ * <p>Note that the results returned by the object will be valid only
+ * during the scope of each callback method: the application
+ * will receive unpredictable results if it attempts to use the
+ * locator at any other time, or after parsing completes.</p>
+ *
+ * <p>SAX parsers are not required to supply a locator, but they are
+ * very strongly encouraged to do so. If the parser supplies a
+ * locator, it must do so before reporting any other document events.
+ * If no locator has been set by the time the application receives
+ * the {@link org.xml.sax.ContentHandler#startDocument startDocument}
+ * event, the application should assume that a locator is not
+ * available.</p>
+ *
+ * @since SAX 1.0
+ * @author David Megginson
+ * @version 2.0.1 (sax2r2)
+ * @see org.xml.sax.ContentHandler#setDocumentLocator
+ */
+public interface Locator {
+
+
+ /**
+ * Return the public identifier for the current document event.
+ *
+ * <p>The return value is the public identifier of the document
+ * entity or of the external parsed entity in which the markup
+ * triggering the event appears.</p>
+ *
+ * @return A string containing the public identifier, or
+ * null if none is available.
+ * @see #getSystemId
+ */
+ public abstract String getPublicId ();
+
+
+ /**
+ * Return the system identifier for the current document event.
+ *
+ * <p>The return value is the system identifier of the document
+ * entity or of the external parsed entity in which the markup
+ * triggering the event appears.</p>
+ *
+ * <p>If the system identifier is a URL, the parser must resolve it
+ * fully before passing it to the application. For example, a file
+ * name must always be provided as a <em>file:...</em> URL, and other
+ * kinds of relative URI are also resolved against their bases.</p>
+ *
+ * @return A string containing the system identifier, or null
+ * if none is available.
+ * @see #getPublicId
+ */
+ public abstract String getSystemId ();
+
+
+ /**
+ * Return the line number where the current document event ends.
+ * Lines are delimited by line ends, which are defined in
+ * the XML specification.
+ *
+ * <p><strong>Warning:</strong> The return value from the method
+ * is intended only as an approximation for the sake of diagnostics;
+ * it is not intended to provide sufficient information
+ * to edit the character content of the original XML document.
+ * In some cases, these "line" numbers match what would be displayed
+ * as columns, and in others they may not match the source text
+ * due to internal entity expansion. </p>
+ *
+ * <p>The return value is an approximation of the line number
+ * in the document entity or external parsed entity where the
+ * markup triggering the event appears.</p>
+ *
+ * <p>If possible, the SAX driver should provide the line position
+ * of the first character after the text associated with the document
+ * event. The first line is line 1.</p>
+ *
+ * @return The line number, or -1 if none is available.
+ * @see #getColumnNumber
+ */
+ public abstract int getLineNumber ();
+
+
+ /**
+ * Return the column number where the current document event ends.
+ * This is one-based number of Java <code>char</code> values since
+ * the last line end.
+ *
+ * <p><strong>Warning:</strong> The return value from the method
+ * is intended only as an approximation for the sake of diagnostics;
+ * it is not intended to provide sufficient information
+ * to edit the character content of the original XML document.
+ * For example, when lines contain combining character sequences, wide
+ * characters, surrogate pairs, or bi-directional text, the value may
+ * not correspond to the column in a text editor's display. </p>
+ *
+ * <p>The return value is an approximation of the column number
+ * in the document entity or external parsed entity where the
+ * markup triggering the event appears.</p>
+ *
+ * <p>If possible, the SAX driver should provide the line position
+ * of the first character after the text associated with the document
+ * event. The first column in each line is column 1.</p>
+ *
+ * @return The column number, or -1 if none is available.
+ * @see #getLineNumber
+ */
+ public abstract int getColumnNumber ();
+
+}
+
+// end of Locator.java
diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java
new file mode 100644
index 000000000..256719cef
--- /dev/null
+++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java
@@ -0,0 +1,153 @@
+// SAX exception class.
+// http://www.saxproject.org
+// No warranty; no copyright -- use this as you will.
+// $Id: SAXException.java,v 1.7 2002/01/30 21:13:48 dbrownell Exp $
+
+package org.xml.sax;
+
+/**
+ * Encapsulate a general SAX error or warning.
+ *
+ * <blockquote>
+ * <em>This module, both source code and documentation, is in the
+ * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
+ * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ * for further information.
+ * </blockquote>
+ *
+ * <p>This class can contain basic error or warning information from
+ * either the XML parser or the application: a parser writer or
+ * application writer can subclass it to provide additional
+ * functionality. SAX handlers may throw this exception or
+ * any exception subclassed from it.</p>
+ *
+ * <p>If the application needs to pass through other types of
+ * exceptions, it must wrap those exceptions in a SAXException
+ * or an exception derived from a SAXException.</p>
+ *
+ * <p>If the parser or application needs to include information about a
+ * specific location in an XML document, it should use the
+ * {@link org.xml.sax.SAXParseException SAXParseException} subclass.</p>
+ *
+ * @since SAX 1.0
+ * @author David Megginson
+ * @version 2.0.1 (sax2r2)
+ * @see org.xml.sax.SAXParseException
+ */
+public class SAXException extends Exception {
+
+
+ /**
+ * Create a new SAXException.
+ */
+ public SAXException ()
+ {
+ super();
+ this.exception = null;
+ }
+
+
+ /**
+ * Create a new SAXException.
+ *
+ * @param message The error or warning message.
+ */
+ public SAXException (String message) {
+ super(message);
+ this.exception = null;
+ }
+
+
+ /**
+ * Create a new SAXException wrapping an existing exception.
+ *
+ * <p>The existing exception will be embedded in the new
+ * one, and its message will become the default message for
+ * the SAXException.</p>
+ *
+ * @param e The exception to be wrapped in a SAXException.
+ */
+ public SAXException (Exception e)
+ {
+ super();
+ this.exception = e;
+ }
+
+
+ /**
+ * Create a new SAXException from an existing exception.
+ *
+ * <p>The existing exception will be embedded in the new
+ * one, but the new exception will have its own message.</p>
+ *
+ * @param message The detail message.
+ * @param e The exception to be wrapped in a SAXException.
+ */
+ public SAXException (String message, Exception e)
+ {
+ super(message);
+ this.exception = e;
+ }
+
+
+ /**
+ * Return a detail message for this exception.
+ *
+ * <p>If there is an embedded exception, and if the SAXException
+ * has no detail message of its own, this method will return
+ * the detail message from the embedded exception.</p>
+ *
+ * @return The error or warning message.
+ */
+ public String getMessage ()
+ {
+ String message = super.getMessage();
+
+ if (message == null && exception != null) {
+ return exception.getMessage();
+ } else {
+ return message;
+ }
+ }
+
+
+ /**
+ * Return the embedded exception, if any.
+ *
+ * @return The embedded exception, or null if there is none.
+ */
+ public Exception getException ()
+ {
+ return exception;
+ }
+
+
+ /**
+ * Override toString to pick up any embedded exception.
+ *
+ * @return A string representation of this exception.
+ */
+ public String toString ()
+ {
+ if (exception != null) {
+ return exception.toString();
+ } else {
+ return super.toString();
+ }
+ }
+
+
+
+ //////////////////////////////////////////////////////////////////////
+ // Internal state.
+ //////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * @serial The embedded exception if tunnelling, or null.
+ */
+ private Exception exception;
+
+}
+
+// end of SAXException.java
diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java
new file mode 100644
index 000000000..1df5e1423
--- /dev/null
+++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java
@@ -0,0 +1,269 @@
+// SAX exception class.
+// http://www.saxproject.org
+// No warranty; no copyright -- use this as you will.
+// $Id: SAXParseException.java,v 1.11 2004/04/21 13:05:02 dmegginson Exp $
+
+package org.xml.sax;
+
+/**
+ * Encapsulate an XML parse error or warning.
+ *
+ * <blockquote>
+ * <em>This module, both source code and documentation, is in the
+ * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
+ * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ * for further information.
+ * </blockquote>
+ *
+ * <p>This exception may include information for locating the error
+ * in the original XML document, as if it came from a {@link Locator}
+ * object. Note that although the application
+ * will receive a SAXParseException as the argument to the handlers
+ * in the {@link org.xml.sax.ErrorHandler ErrorHandler} interface,
+ * the application is not actually required to throw the exception;
+ * instead, it can simply read the information in it and take a
+ * different action.</p>
+ *
+ * <p>Since this exception is a subclass of {@link org.xml.sax.SAXException
+ * SAXException}, it inherits the ability to wrap another exception.</p>
+ *
+ * @since SAX 1.0
+ * @author David Megginson
+ * @version 2.0.1 (sax2r2)
+ * @see org.xml.sax.SAXException
+ * @see org.xml.sax.Locator
+ * @see org.xml.sax.ErrorHandler
+ */
+public class SAXParseException extends SAXException {
+
+
+ //////////////////////////////////////////////////////////////////////
+ // Constructors.
+ //////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Create a new SAXParseException from a message and a Locator.
+ *
+ * <p>This constructor is especially useful when an application is
+ * creating its own exception from within a {@link org.xml.sax.ContentHandler
+ * ContentHandler} callback.</p>
+ *
+ * @param message The error or warning message.
+ * @param locator The locator object for the error or warning (may be
+ * null).
+ * @see org.xml.sax.Locator
+ */
+ public SAXParseException (String message, Locator locator) {
+ super(message);
+ if (locator != null) {
+ init(locator.getPublicId(), locator.getSystemId(),
+ locator.getLineNumber(), locator.getColumnNumber());
+ } else {
+ init(null, null, -1, -1);
+ }
+ }
+
+
+ /**
+ * Wrap an existing exception in a SAXParseException.
+ *
+ * <p>This constructor is especially useful when an application is
+ * creating its own exception from within a {@link org.xml.sax.ContentHandler
+ * ContentHandler} callback, and needs to wrap an existing exception that is not a
+ * subclass of {@link org.xml.sax.SAXException SAXException}.</p>
+ *
+ * @param message The error or warning message, or null to
+ * use the message from the embedded exception.
+ * @param locator The locator object for the error or warning (may be
+ * null).
+ * @param e Any exception.
+ * @see org.xml.sax.Locator
+ */
+ public SAXParseException (String message, Locator locator,
+ Exception e) {
+ super(message, e);
+ if (locator != null) {
+ init(locator.getPublicId(), locator.getSystemId(),
+ locator.getLineNumber(), locator.getColumnNumber());
+ } else {
+ init(null, null, -1, -1);
+ }
+ }
+
+
+ /**
+ * Create a new SAXParseException.
+ *
+ * <p>This constructor is most useful for parser writers.</p>
+ *
+ * <p>All parameters except the message are as if
+ * they were provided by a {@link Locator}. For example, if the
+ * system identifier is a URL (including relative filename), the
+ * caller must resolve it fully before creating the exception.</p>
+ *
+ *
+ * @param message The error or warning message.
+ * @param publicId The public identifier of the entity that generated
+ * the error or warning.
+ * @param systemId The system identifier of the entity that generated
+ * the error or warning.
+ * @param lineNumber The line number of the end of the text that
+ * caused the error or warning.
+ * @param columnNumber The column number of the end of the text that
+ * cause the error or warning.
+ */
+ public SAXParseException (String message, String publicId, String systemId,
+ int lineNumber, int columnNumber)
+ {
+ super(message);
+ init(publicId, systemId, lineNumber, columnNumber);
+ }
+
+
+ /**
+ * Create a new SAXParseException with an embedded exception.
+ *
+ * <p>This constructor is most useful for parser writers who
+ * need to wrap an exception that is not a subclass of
+ * {@link org.xml.sax.SAXException SAXException}.</p>
+ *
+ * <p>All parameters except the message and exception are as if
+ * they were provided by a {@link Locator}. For example, if the
+ * system identifier is a URL (including relative filename), the
+ * caller must resolve it fully before creating the exception.</p>
+ *
+ * @param message The error or warning message, or null to use
+ * the message from the embedded exception.
+ * @param publicId The public identifier of the entity that generated
+ * the error or warning.
+ * @param systemId The system identifier of the entity that generated
+ * the error or warning.
+ * @param lineNumber The line number of the end of the text that
+ * caused the error or warning.
+ * @param columnNumber The column number of the end of the text that
+ * cause the error or warning.
+ * @param e Another exception to embed in this one.
+ */
+ public SAXParseException (String message, String publicId, String systemId,
+ int lineNumber, int columnNumber, Exception e)
+ {
+ super(message, e);
+ init(publicId, systemId, lineNumber, columnNumber);
+ }
+
+
+ /**
+ * Internal initialization method.
+ *
+ * @param publicId The public identifier of the entity which generated the exception,
+ * or null.
+ * @param systemId The system identifier of the entity which generated the exception,
+ * or null.
+ * @param lineNumber The line number of the error, or -1.
+ * @param columnNumber The column number of the error, or -1.
+ */
+ private void init (String publicId, String systemId,
+ int lineNumber, int columnNumber)
+ {
+ this.publicId = publicId;
+ this.systemId = systemId;
+ this.lineNumber = lineNumber;
+ this.columnNumber = columnNumber;
+ }
+
+
+ /**
+ * Get the public identifier of the entity where the exception occurred.
+ *
+ * @return A string containing the public identifier, or null
+ * if none is available.
+ * @see org.xml.sax.Locator#getPublicId
+ */
+ public String getPublicId ()
+ {
+ return this.publicId;
+ }
+
+
+ /**
+ * Get the system identifier of the entity where the exception occurred.
+ *
+ * <p>If the system identifier is a URL, it will have been resolved
+ * fully.</p>
+ *
+ * @return A string containing the system identifier, or null
+ * if none is available.
+ * @see org.xml.sax.Locator#getSystemId
+ */
+ public String getSystemId ()
+ {
+ return this.systemId;
+ }
+
+
+ /**
+ * The line number of the end of the text where the exception occurred.
+ *
+ * <p>The first line is line 1.</p>
+ *
+ * @return An integer representing the line number, or -1
+ * if none is available.
+ * @see org.xml.sax.Locator#getLineNumber
+ */
+ public int getLineNumber ()
+ {
+ return this.lineNumber;
+ }
+
+
+ /**
+ * The column number of the end of the text where the exception occurred.
+ *
+ * <p>The first column in a line is position 1.</p>
+ *
+ * @return An integer representing the column number, or -1
+ * if none is available.
+ * @see org.xml.sax.Locator#getColumnNumber
+ */
+ public int getColumnNumber ()
+ {
+ return this.columnNumber;
+ }
+
+
+ //////////////////////////////////////////////////////////////////////
+ // Internal state.
+ //////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * @serial The public identifier, or null.
+ * @see #getPublicId
+ */
+ private String publicId;
+
+
+ /**
+ * @serial The system identifier, or null.
+ * @see #getSystemId
+ */
+ private String systemId;
+
+
+ /**
+ * @serial The line number, or -1.
+ * @see #getLineNumber
+ */
+ private int lineNumber;
+
+
+ /**
+ * @serial The column number, or -1.
+ * @see #getColumnNumber
+ */
+ private int columnNumber;
+
+}
+
+// end of SAXParseException.java
diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html
new file mode 100644
index 000000000..dd7030e24
--- /dev/null
+++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html
@@ -0,0 +1,297 @@
+<html><head>
+<!-- $Id: package.html,v 1.18 2004/04/21 13:06:01 dmegginson Exp $ -->
+</head><body>
+
+<p> This package provides the core SAX APIs.
+Some SAX1 APIs are deprecated to encourage integration of
+namespace-awareness into designs of new applications
+and into maintenance of existing infrastructure. </p>
+
+<p>See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+for more information about SAX.</p>
+
+
+<h2> SAX2 Standard Feature Flags </h2>
+
+<p> One of the essential characteristics of SAX2 is that it added
+feature flags which can be used to examine and perhaps modify
+parser modes, in particular modes such as validation.
+Since features are identified by (absolute) URIs, anyone
+can define such features.
+Currently defined standard feature URIs have the prefix
+<code>http://xml.org/sax/features/</code> before an identifier such as
+<code>validation</code>. Turn features on or off using
+<em>setFeature</em>. Those standard identifiers are: </p>
+
+
+<table border="1" cellpadding="3" cellspacing="0" width="100%">
+ <tr align="center" bgcolor="#ccccff">
+ <th>Feature ID</th>
+ <th>Access</th>
+ <th>Default</th>
+ <th>Description</th>
+ </tr>
+
+ <tr>
+ <td>external-general-entities</td>
+ <td><em>read/write</em></td>
+ <td><em>unspecified</em></td>
+ <td> Reports whether this parser processes external
+ general entities; always true if validating.
+ </td>
+ </tr>
+
+ <tr>
+ <td>external-parameter-entities</td>
+ <td><em>read/write</em></td>
+ <td><em>unspecified</em></td>
+ <td> Reports whether this parser processes external
+ parameter entities; always true if validating.
+ </td>
+ </tr>
+
+ <tr>
+ <td>is-standalone</td>
+ <td>(parsing) <em>read-only</em>, (not parsing) <em>none</em></td>
+ <td>not applicable</td>
+ <td> May be examined only during a parse, after the
+ <em>startDocument()</em> callback has been completed; read-only.
+ The value is true if the document specified standalone="yes" in
+ its XML declaration, and otherwise is false.
+ </td>
+ </tr>
+
+ <tr>
+ <td>lexical-handler/parameter-entities</td>
+ <td><em>read/write</em></td>
+ <td><em>unspecified</em></td>
+ <td> A value of "true" indicates that the LexicalHandler will report
+ the beginning and end of parameter entities.
+ </td>
+ </tr>
+
+ <tr>
+ <td>namespaces</td>
+ <td><em>read/write</em></td>
+ <td>true</td>
+ <td> A value of "true" indicates namespace URIs and unprefixed local names
+ for element and attribute names will be available.
+ </td>
+ </tr>
+
+ <tr>
+ <td>namespace-prefixes</td>
+ <td><em>read/write</em></td>
+ <td>false</td>
+ <td> A value of "true" indicates that XML qualified names (with prefixes) and
+ attributes (including <em>xmlns*</em> attributes) will be available.
+ </td>
+ </tr>
+
+ <tr>
+ <td>resolve-dtd-uris</td>
+ <td><em>read/write</em></td>
+ <td><em>true</em></td>
+ <td> A value of "true" indicates that system IDs in declarations will
+ be absolutized (relative to their base URIs) before reporting.
+ (That is the default behavior for all SAX2 XML parsers.)
+ A value of "false" indicates those IDs will not be absolutized;
+ parsers will provide the base URI from
+ <em>Locator.getSystemId()</em>.
+ This applies to system IDs passed in <ul>
+ <li><em>DTDHandler.notationDecl()</em>,
+ <li><em>DTDHandler.unparsedEntityDecl()</em>, and
+ <li><em>DeclHandler.externalEntityDecl()</em>.
+ </ul>
+ It does not apply to <em>EntityResolver.resolveEntity()</em>,
+ which is not used to report declarations, or to
+ <em>LexicalHandler.startDTD()</em>, which already provides
+ the non-absolutized URI.
+ </td>
+ </tr>
+
+ <tr>
+ <td>string-interning</td>
+ <td><em>read/write</em></td>
+ <td><em>unspecified</em></td>
+ <td> Has a value of "true" if all XML names (for elements, prefixes,
+ attributes, entities, notations, and local names),
+ as well as Namespace URIs, will have been interned
+ using <em>java.lang.String.intern</em>. This supports fast
+ testing of equality/inequality against string constants,
+ rather than forcing slower calls to <em>String.equals()</em>.
+ </td>
+ </tr>
+
+ <tr>
+ <td>unicode-normalization-checking</td>
+ <td><em>read/write</em></td>
+ <td><em>false</em></td>
+ <td> Controls whether the parser reports Unicode normalization
+ errors as described in section 2.13 and Appendix B of the
+ XML 1.1 Recommendation. If true, Unicode normalization
+ errors are reported using the ErrorHandler.error() callback.
+ Such errors are not fatal in themselves (though, obviously,
+ other Unicode-related encoding errors may be).
+ </td>
+ </tr>
+
+ <tr>
+ <td>use-attributes2</td>
+ <td><em>read-only</em></td>
+ <td>not applicable</td>
+ <td> Returns "true" if the <em>Attributes</em> objects passed by
+ this parser in <em>ContentHandler.startElement()</em>
+ implement the <a href="ext/Attributes2.html"
+ ><em>org.xml.sax.ext.Attributes2</em></a> interface.
+ That interface exposes additional DTD-related information,
+ such as whether the attribute was specified in the
+ source text rather than defaulted.
+ </td>
+ </tr>
+
+ <tr>
+ <td>use-locator2</td>
+ <td><em>read-only</em></td>
+ <td>not applicable</td>
+ <td> Returns "true" if the <em>Locator</em> objects passed by
+ this parser in <em>ContentHandler.setDocumentLocator()</em>
+ implement the <a href="ext/Locator2.html"
+ ><em>org.xml.sax.ext.Locator2</em></a> interface.
+ That interface exposes additional entity information,
+ such as the character encoding and XML version used.
+ </td>
+ </tr>
+
+ <tr>
+ <td>use-entity-resolver2</td>
+ <td><em>read/write</em></td>
+ <td><em>true</em></td>
+ <td> Returns "true" if, when <em>setEntityResolver</em> is given
+ an object implementing the <a href="ext/EntityResolver2.html"
+ ><em>org.xml.sax.ext.EntityResolver2</em></a> interface,
+ those new methods will be used.
+ Returns "false" to indicate that those methods will not be used.
+ </td>
+ </tr>
+
+ <tr>
+ <td>validation</td>
+ <td><em>read/write</em></td>
+ <td><em>unspecified</em></td>
+ <td> Controls whether the parser is reporting all validity
+ errors; if true, all external entities will be read.
+ </td>
+ </tr>
+
+ <tr>
+ <td>xmlns-uris</td>
+ <td><em>read/write</em></td>
+ <td><em>false</em></td>
+ <td> Controls whether, when the <em>namespace-prefixes</em> feature
+ is set, the parser treats namespace declaration attributes as
+ being in the <em>http://www.w3.org/2000/xmlns/</em> namespace.
+ By default, SAX2 conforms to the original "Namespaces in XML"
+ Recommendation, which explicitly states that such attributes are
+ not in any namespace.
+ Setting this optional flag to "true" makes the SAX2 events conform to
+ a later backwards-incompatible revision of that recommendation,
+ placing those attributes in a namespace.
+ </td>
+ </tr>
+
+ <tr>
+ <td>xml-1.1</td>
+ <td><em>read-only</em></td>
+ <td>not applicable</td>
+ <td> Returns "true" if the parser supports both XML 1.1 and XML 1.0.
+ Returns "false" if the parser supports only XML 1.0.
+ </td>
+ </tr>
+
+</table>
+
+<p> Support for the default values of the
+<em>namespaces</em> and <em>namespace-prefixes</em>
+properties is required.
+Support for any other feature flags is entirely optional.
+</p>
+
+<p> For default values not specified by SAX2,
+each XMLReader implementation specifies its default,
+or may choose not to expose the feature flag.
+Unless otherwise specified here,
+implementations may support changing current values
+of these standard feature flags, but not while parsing.
+</p>
+
+<h2> SAX2 Standard Handler and Property IDs </h2>
+
+<p> For parser interface characteristics that are described
+as objects, a separate namespace is defined. The
+objects in this namespace are again identified by URI, and
+the standard property URIs have the prefix
+<code>http://xml.org/sax/properties/</code> before an identifier such as
+<code>lexical-handler</code> or
+<code>dom-node</code>. Manage those properties using
+<em>setProperty()</em>. Those identifiers are: </p>
+
+<table border="1" cellpadding="3" cellspacing="0" width="100%">
+ <tr align="center" bgcolor="#ccccff">
+ <th>Property ID</th>
+ <th>Description</th>
+ </tr>
+
+ <tr>
+ <td>declaration-handler</td>
+ <td> Used to see most DTD declarations except those treated
+ as lexical ("document element name is ...") or which are
+ mandatory for all SAX parsers (<em>DTDHandler</em>).
+ The Object must implement <a href="ext/DeclHandler.html"
+ ><em>org.xml.sax.ext.DeclHandler</em></a>.
+ </td>
+ </tr>
+
+ <tr>
+ <td>document-xml-version</td>
+ <td> May be examined only during a parse, after the startDocument()
+ callback has been completed; read-only. This property is a
+ literal string describing the actual XML version of the document,
+ such as "1.0" or "1.1".
+ </td>
+ </tr>
+
+ <tr>
+ <td>dom-node</td>
+ <td> For "DOM Walker" style parsers, which ignore their
+ <em>parser.parse()</em> parameters, this is used to
+ specify the DOM (sub)tree being walked by the parser.
+ The Object must implement the
+ <em>org.w3c.dom.Node</em> interface.
+ </td>
+ </tr>
+
+ <tr>
+ <td>lexical-handler</td>
+ <td> Used to see some syntax events that are essential in some
+ applications: comments, CDATA delimiters, selected general
+ entity inclusions, and the start and end of the DTD
+ (and declaration of document element name).
+ The Object must implement <a href="ext/LexicalHandler.html"
+ ><em>org.xml.sax.ext.LexicalHandler</em></a>.
+ </td>
+ </tr>
+
+ <tr>
+ <td>xml-string</td>
+ <td> Readable only during a parser callback, this exposes a <b>TBS</b>
+ chunk of characters responsible for the current event. </td>
+ </tr>
+
+</table>
+
+<p> All of these standard properties are optional;
+XMLReader implementations need not support them.
+</p>
+
+</body></html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java
new file mode 100644
index 000000000..395f9eb15
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding.test;
+
+import nu.validator.encoding.Encoding;
+
+public class Big5Tester extends EncodingTester {
+
+ public static void main(String[] args) {
+ new Big5Tester().test();
+ }
+
+ private void test() {
+ // ASCII
+ decodeBig5("\u6162", "\u0061\u0062");
+ // Edge cases
+ decodeBig5("\u8740", "\u43F0");
+ decodeBig5("\uFEFE", "\u79D4");
+ decodeBig5("\uFEFD", "\uD864\uDD0D");
+ decodeBig5("\u8862", "\u00CA\u0304");
+ decodeBig5("\u8864", "\u00CA\u030C");
+ decodeBig5("\u8866", "\u00CA");
+ decodeBig5("\u88A3", "\u00EA\u0304");
+ decodeBig5("\u88A5", "\u00EA\u030C");
+ decodeBig5("\u88A7", "\u00EA");
+ decodeBig5("\u99D4", "\u8991");
+ decodeBig5("\u99D5", "\uD85E\uDD67");
+ decodeBig5("\u99D6", "\u8A29");
+ // Edge cases surrounded with ASCII
+ decodeBig5("\u6187\u4062", "\u0061\u43F0\u0062");
+ decodeBig5("\u61FE\uFE62", "\u0061\u79D4\u0062");
+ decodeBig5("\u61FE\uFD62", "\u0061\uD864\uDD0D\u0062");
+ decodeBig5("\u6188\u6262", "\u0061\u00CA\u0304\u0062");
+ decodeBig5("\u6188\u6462", "\u0061\u00CA\u030C\u0062");
+ decodeBig5("\u6188\u6662", "\u0061\u00CA\u0062");
+ decodeBig5("\u6188\uA362", "\u0061\u00EA\u0304\u0062");
+ decodeBig5("\u6188\uA562", "\u0061\u00EA\u030C\u0062");
+ decodeBig5("\u6188\uA762", "\u0061\u00EA\u0062");
+ decodeBig5("\u6199\uD462", "\u0061\u8991\u0062");
+ decodeBig5("\u6199\uD562", "\u0061\uD85E\uDD67\u0062");
+ decodeBig5("\u6199\uD662", "\u0061\u8A29\u0062");
+ // Bad sequences
+ decodeBig5("\u8061", "\uFFFD\u0061");
+ decodeBig5("\uFF61", "\uFFFD\u0061");
+ decodeBig5("\uFE39", "\uFFFD\u0039");
+ decodeBig5("\u8766", "\uFFFD\u0066");
+ decodeBig5("\u8140", "\uFFFD\u0040");
+ decodeBig5("\u6181", "\u0061\uFFFD");
+
+ // ASCII
+ encodeBig5("\u0061\u0062", "\u6162");
+ // Edge cases
+ encodeBig5("\u9EA6\u0061", "\u3F61");
+ encodeBig5("\uD858\uDE6B\u0061", "\u3F61");
+ encodeBig5("\u3000", "\uA140");
+ encodeBig5("\u20AC", "\uA3E1");
+ encodeBig5("\u4E00", "\uA440");
+ encodeBig5("\uD85D\uDE07", "\uC8A4");
+ encodeBig5("\uFFE2", "\uC8CD");
+ encodeBig5("\u79D4", "\uFEFE");
+ // Not in index
+ encodeBig5("\u2603\u0061", "\u3F61");
+ // duplicate low bits
+ encodeBig5("\uD840\uDFB5", "\uFD6A");
+ // prefer last
+ encodeBig5("\u2550", "\uF9F9");
+ }
+
+ private void decodeBig5(String input, String expectation) {
+ decode(input, expectation, Encoding.BIG5);
+ }
+
+ private void encodeBig5(String input, String expectation) {
+ encode(input, expectation, Encoding.BIG5);
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java
new file mode 100644
index 000000000..a910a01e9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding.test;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import nu.validator.encoding.Encoding;
+
+public class EncodingTester {
+
+ protected byte[] stringToBytes(String str) {
+ byte[] bytes = new byte[str.length() * 2];
+ for (int i = 0; i < str.length(); i++) {
+ int pair = (int) str.charAt(i);
+ bytes[i * 2] = (byte) (pair >> 8);
+ bytes[i * 2 + 1] = (byte) (pair & 0xFF);
+ }
+ return bytes;
+ }
+
+ protected void decode(String input, String expectation, Encoding encoding) {
+ // Use the convenience method from Charset
+
+ byte[] bytes = stringToBytes(input);
+ ByteBuffer byteBuf = ByteBuffer.wrap(bytes);
+ CharBuffer charBuf = encoding.decode(byteBuf);
+
+ if (charBuf.remaining() != expectation.length()) {
+ err("When decoding from a single long buffer, the output length was wrong. Expected: "
+ + expectation.length() + ", got: " + charBuf.remaining(),
+ bytes, expectation);
+ return;
+ }
+
+ for (int i = 0; i < expectation.length(); i++) {
+ char expect = expectation.charAt(i);
+ char actual = charBuf.get();
+ if (actual != expect) {
+ err("When decoding from a single long buffer, failed at position "
+ + i
+ + ", expected: "
+ + charToHex(expect)
+ + ", got: "
+ + charToHex(actual), bytes, expectation);
+ return;
+ }
+ }
+
+ // Decode with a 1-byte input buffer
+
+ byteBuf = ByteBuffer.allocate(1);
+ charBuf = CharBuffer.allocate(expectation.length() + 2);
+ CharsetDecoder decoder = encoding.newDecoder();
+ decoder.onMalformedInput(CodingErrorAction.REPLACE);
+ for (int i = 0; i < bytes.length; i++) {
+ byteBuf.position(0);
+ byteBuf.put(bytes[i]);
+ byteBuf.position(0);
+ CoderResult result = decoder.decode(byteBuf, charBuf,
+ (i + 1) == bytes.length);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace at index: "
+ + i, bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence, which none of these decoders should do.",
+ bytes, expectation);
+ return;
+ } else if (result.isOverflow()) {
+ err("Decoder claimed overflow when the output buffer is know to be large enough.",
+ bytes, expectation);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result, expected underflow.", bytes,
+ expectation);
+ }
+ }
+ CoderResult result = decoder.flush(charBuf);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace when flushing.",
+ bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence when flushing, which none of these decoders should do.",
+ bytes, expectation);
+ return;
+ } else if (result.isOverflow()) {
+ err("Decoder claimed overflow when flushing when the output buffer is know to be large enough.",
+ bytes, expectation);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow.", bytes,
+ expectation);
+ }
+
+ charBuf.limit(charBuf.position());
+ charBuf.position(0);
+
+ for (int i = 0; i < expectation.length(); i++) {
+ char expect = expectation.charAt(i);
+ char actual = charBuf.get();
+ if (actual != expect) {
+ err("When decoding one byte at a time in REPORT mode, failed at position "
+ + i
+ + ", expected: "
+ + charToHex(expect)
+ + ", got: "
+ + charToHex(actual), bytes, expectation);
+ return;
+ }
+ }
+
+ // Decode with 1-char output buffer
+
+ byteBuf = ByteBuffer.wrap(bytes);
+ charBuf = CharBuffer.allocate(1);
+
+ decoder.reset(); // Let's test this while at it
+ decoder.onMalformedInput(CodingErrorAction.REPLACE);
+ int codeUnitPos = 0;
+ while (byteBuf.hasRemaining()) {
+ charBuf.position(0);
+ charBuf.put('\u0000');
+ charBuf.position(0);
+ result = decoder.decode(byteBuf, charBuf, false);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace at index (decoding one output code unit at a time): "
+ + byteBuf.position(), bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence (decoding one output code unit at a time), which none of these decoders should do.",
+ bytes, expectation);
+ return;
+ } else if (result.isUnderflow()) {
+ if (byteBuf.hasRemaining()) {
+ err("When decoding one output code unit at a time, decoder claimed underflow when there was input remaining.",
+ bytes, expectation);
+ return;
+ }
+ } else if (!result.isOverflow()) {
+ err("Bogus coder result, expected overflow.", bytes,
+ expectation);
+ }
+ if (charBuf.position() == 1) {
+ charBuf.position(0);
+ char actual = charBuf.get();
+ char expect = expectation.charAt(codeUnitPos);
+ if (actual != expect) {
+ err("When decoding one output code unit at a time in REPLACE mode, failed at position "
+ + byteBuf.position()
+ + ", expected: "
+ + charToHex(expect) + ", got: " + charToHex(actual),
+ bytes, expectation);
+ return;
+ }
+ codeUnitPos++;
+ }
+ }
+
+ charBuf.position(0);
+ charBuf.put('\u0000');
+ charBuf.position(0);
+ result = decoder.decode(byteBuf, charBuf, true);
+
+ if (charBuf.position() == 1) {
+ charBuf.position(0);
+ char actual = charBuf.get();
+ char expect = expectation.charAt(codeUnitPos);
+ if (actual != expect) {
+ err("When decoding one output code unit at a time in REPLACE mode, failed at position "
+ + byteBuf.position()
+ + ", expected: "
+ + charToHex(expect) + ", got: " + charToHex(actual),
+ bytes, expectation);
+ return;
+ }
+ codeUnitPos++;
+ }
+
+ charBuf.position(0);
+ charBuf.put('\u0000');
+ charBuf.position(0);
+ result = decoder.flush(charBuf);
+ if (result.isMalformed()) {
+ err("Decoder reported a malformed sequence when asked to replace when flushing (one output at a time).",
+ bytes, expectation);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Decoder claimed unmappable sequence when flushing, which none of these decoders should do (one output at a time).",
+ bytes, expectation);
+ return;
+ } else if (result.isOverflow()) {
+ err("Decoder claimed overflow when flushing when the output buffer is know to be large enough (one output at a time).",
+ bytes, expectation);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow (one output at a time).",
+ bytes, expectation);
+ }
+
+ if (charBuf.position() == 1) {
+ charBuf.position(0);
+ char actual = charBuf.get();
+ char expect = expectation.charAt(codeUnitPos);
+ if (actual != expect) {
+ err("When decoding one output code unit at a time in REPLACE mode, failed when flushing, expected: "
+ + charToHex(expect) + ", got: " + charToHex(actual),
+ bytes, expectation);
+ return;
+ }
+ }
+
+ // TODO: 2 bytes at a time starting at 0 and 2 bytes at a time starting
+ // at 1
+ }
+
+ protected void encode(String input, String expectation, Encoding encoding) {
+ byte[] expectedBytes = stringToBytes(expectation);
+ CharBuffer charBuf = CharBuffer.wrap(input);
+
+ // Use the convenience method from Charset
+
+ ByteBuffer byteBuf = encoding.encode(charBuf);
+
+ if (byteBuf.remaining() != expectedBytes.length) {
+ err("When encoding from a single long buffer, the output length was wrong. Expected: "
+ + expectedBytes.length + ", got: " + byteBuf.remaining(),
+ input, expectedBytes);
+ return;
+ }
+
+ for (int i = 0; i < expectedBytes.length; i++) {
+ byte expect = expectedBytes[i];
+ byte actual = byteBuf.get();
+ if (actual != expect) {
+ err("When encoding from a single long buffer, failed at position "
+ + i
+ + ", expected: "
+ + byteToHex(expect)
+ + ", got: "
+ + byteToHex(actual), input, expectedBytes);
+ return;
+ }
+ }
+
+ // Encode with a 1-char input buffer
+
+ charBuf = CharBuffer.allocate(1);
+ byteBuf = ByteBuffer.allocate(expectedBytes.length + 2);
+ CharsetEncoder encoder = encoding.newEncoder();
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ for (int i = 0; i < input.length(); i++) {
+ charBuf.position(0);
+ charBuf.put(input.charAt(i));
+ charBuf.position(0);
+ CoderResult result = encoder.encode(charBuf, byteBuf,
+ (i + 1) == input.length());
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace at index: "
+ + i, input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an upmappable sequence when asked to replace at index: "
+ + i, input, expectedBytes);
+ return;
+ } else if (result.isOverflow()) {
+ err("Encoder claimed overflow when the output buffer is know to be large enough.",
+ input, expectedBytes);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result, expected underflow.", input,
+ expectedBytes);
+ }
+ }
+ CoderResult result = encoder.flush(byteBuf);
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace when flushing.",
+ input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an unmappable sequence when asked to replace when flushing.",
+ input, expectedBytes);
+ return;
+ } else if (result.isOverflow()) {
+ err("Encoder claimed overflow when flushing when the output buffer is know to be large enough.",
+ input, expectedBytes);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow.", input,
+ expectedBytes);
+
+ }
+
+ byteBuf.limit(byteBuf.position());
+ byteBuf.position(0);
+
+ for (int i = 0; i < expectedBytes.length; i++) {
+ byte expect = expectedBytes[i];
+ byte actual = byteBuf.get();
+ if (actual != expect) {
+ err("When encoding one char at a time in REPORT mode, failed at position "
+ + i
+ + ", expected: "
+ + byteToHex(expect)
+ + ", got: "
+ + byteToHex(actual), input, expectedBytes);
+ return;
+ }
+ }
+
+ // Decode with 1-byte output buffer
+
+ charBuf = CharBuffer.wrap(input);
+ byteBuf = ByteBuffer.allocate(1);
+
+ encoder.reset(); // Let's test this while at it
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ int bytePos = 0;
+ while (charBuf.hasRemaining()) {
+ byteBuf.position(0);
+ byteBuf.put((byte)0);
+ byteBuf.position(0);
+ result = encoder.encode(charBuf, byteBuf, false);
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace at index (decoding one output code unit at a time): "
+ + charBuf.position(), input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an unmappable sequence when asked to replace at index (decoding one output code unit at a time): "
+ + charBuf.position(), input, expectedBytes);
+ return;
+ } else if (result.isUnderflow()) {
+ if (charBuf.hasRemaining()) {
+ err("When encoding one output byte at a time, encoder claimed underflow when there was input remaining.",
+ input, expectedBytes);
+ return;
+ }
+ } else if (!result.isOverflow()) {
+ err("Bogus coder result, expected overflow.", input, expectedBytes);
+ }
+ if (byteBuf.position() == 1) {
+ byteBuf.position(0);
+ byte actual = byteBuf.get();
+ byte expect = expectedBytes[bytePos];
+ if (actual != expect) {
+ err("When encoding one output byte at a time in REPLACE mode, failed at position "
+ + charBuf.position()
+ + ", expected: "
+ + byteToHex(expect) + ", got: " + byteToHex(actual),
+ input, expectedBytes);
+ return;
+ }
+ bytePos++;
+ }
+ }
+
+ byteBuf.position(0);
+ byteBuf.put((byte)0);
+ byteBuf.position(0);
+ result = encoder.encode(charBuf, byteBuf, true);
+
+ if (byteBuf.position() == 1) {
+ byteBuf.position(0);
+ byte actual = byteBuf.get();
+ byte expect = expectedBytes[bytePos];
+ if (actual != expect) {
+ err("When encoding one output byte at a time in REPLACE mode, failed at position "
+ + charBuf.position()
+ + ", expected: "
+ + byteToHex(expect) + ", got: " + byteToHex(actual),
+ input, expectedBytes);
+ return;
+ }
+ bytePos++;
+ }
+
+ byteBuf.position(0);
+ byteBuf.put((byte)0);
+ byteBuf.position(0);
+ result = encoder.flush(byteBuf);
+ if (result.isMalformed()) {
+ err("Encoder reported a malformed sequence when asked to replace when flushing (one output at a time).",
+ input, expectedBytes);
+ return;
+ } else if (result.isUnmappable()) {
+ err("Encoder reported an unmappable sequence when asked to replace when flushing (one output at a time).",
+ input, expectedBytes);
+ return;
+ } else if (result.isOverflow()) {
+ err("Encoder claimed overflow when flushing when the output buffer is know to be large enough (one output at a time).",
+ input, expectedBytes);
+ } else if (!result.isUnderflow()) {
+ err("Bogus coder result when flushing, expected underflow (one output at a time).",
+ input, expectedBytes);
+ }
+
+ if (byteBuf.position() == 1) {
+ byteBuf.position(0);
+ byte actual = byteBuf.get();
+ byte expect = expectedBytes[bytePos];
+ if (actual != expect) {
+ err("When encoding one output code unit at a time in REPLACE mode, failed when flushing, expected: "
+ + byteToHex(expect) + ", got: " + byteToHex(actual),
+ input, expectedBytes);
+ return;
+ }
+ }
+
+ // TODO: 2 bytes at a time starting at 0 and 2 bytes at a time starting
+ // at 1
+ }
+
+ private String charToHex(char c) {
+ String hex = Integer.toHexString(c);
+ switch (hex.length()) {
+ case 1:
+ return "000" + hex;
+ case 2:
+ return "00" + hex;
+ case 3:
+ return "0" + hex;
+ default:
+ return hex;
+ }
+ }
+
+ private String byteToHex(byte b) {
+ String hex = Integer.toHexString(((int) b & 0xFF));
+ switch (hex.length()) {
+ case 1:
+ return "0" + hex;
+ default:
+ return hex;
+ }
+ }
+
+ private void err(String msg, byte[] bytes, String expectation) {
+ System.err.println(msg);
+ System.err.print("Input:");
+ for (int i = 0; i < bytes.length; i++) {
+ System.err.print(' ');
+ System.err.print(byteToHex(bytes[i]));
+ }
+ System.err.println();
+ System.err.print("Expect:");
+ for (int i = 0; i < expectation.length(); i++) {
+ System.err.print(' ');
+ System.err.print(charToHex(expectation.charAt(i)));
+ }
+ System.err.println();
+ }
+
+ private void err(String msg, String chars, byte[] expectation) {
+ System.err.println(msg);
+ System.err.print("Input:");
+ for (int i = 0; i < chars.length(); i++) {
+ System.err.print(' ');
+ System.err.print(charToHex(chars.charAt(i)));
+ }
+ System.err.println();
+ System.err.print("Expect:");
+ for (int i = 0; i < expectation.length; i++) {
+ System.err.print(' ');
+ System.err.print(byteToHex(expectation[i]));
+ }
+ System.err.println();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java
new file mode 100644
index 000000000..3337a6555
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.io.Encoding;
+import nu.validator.htmlparser.io.HtmlInputStreamReader;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+
+public class DecoderLoopTester {
+
+ private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
+
+ private static final int NUMBER_OR_ASTRAL_CHARS = 24500;
+
+ private void runTest(int padding) throws SAXException, IOException {
+ Encoding utf8 = Encoding.forName("UTF-8");
+ char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS];
+ byte[] byteArr;
+ int i = 0;
+ charArr[i++] = '\uFEFF';
+ for (int j = 0; j < padding; j++) {
+ charArr[i++] = 'x';
+ }
+ for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) {
+ int value = 0x10000 + j;
+ charArr[i++] = (char) (LEAD_OFFSET + (value >> 10));
+ charArr[i++] = (char) (0xDC00 + (value & 0x3FF));
+// charArr[i++] = 'y';
+// charArr[i++] = 'z';
+
+ }
+ CharBuffer charBuffer = CharBuffer.wrap(charArr);
+ CharsetEncoder enc = utf8.newEncoder();
+ enc.onMalformedInput(CodingErrorAction.REPORT);
+ enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+ ByteBuffer byteBuffer = enc.encode(charBuffer);
+ byteArr = new byte[byteBuffer.limit()];
+ byteBuffer.get(byteArr);
+
+ ErrorHandler eh = new SystemErrErrorHandler();
+ compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, Heuristics.NONE), padding, charArr, byteArr);
+ compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8), padding, charArr, byteArr);
+ }
+
+ /**
+ * @param padding
+ * @param charArr
+ * @param byteArr
+ * @throws SAXException
+ * @throws IOException
+ */
+ private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException {
+ char[] readBuffer = new char[2048];
+ int offset = 0;
+ int num = 0;
+ int readNum = 0;
+ while ((num = reader.read(readBuffer)) != -1) {
+ for (int j = 0; j < num; j++) {
+ System.out.println(offset + j);
+ if (readBuffer[j] != charArr[offset + j]) {
+ throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum);
+ }
+ }
+ offset += num;
+ readNum++;
+ }
+ }
+
+ void runTests() throws SAXException, IOException {
+ for (int i = 0; i < 4; i++) {
+ runTest(i);
+ }
+ }
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws IOException, SAXException {
+ new DecoderLoopTester().runTests();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java
new file mode 100644
index 000000000..a3866f5d9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
+
+public class DomIdTester {
+
+ private static final String testSrc = "<div><h1 id='bar' class='foo'>buoeoa</h1><p id='foo'>uoeuo</p></div>";
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws SAXException, IOException {
+ HtmlDocumentBuilder builder = new HtmlDocumentBuilder();
+ Document doc = builder.parse(new InputSource(new StringReader(testSrc)));
+ System.out.println(doc.getElementById("foo").getLocalName());
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java
new file mode 100644
index 000000000..07d054b9e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+public class DomTest {
+ public static void main(String[] args) throws Exception {
+ DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
+ f.setNamespaceAware(true); // not setting this causes pain and suffering with SVG
+ DocumentBuilder b = f.newDocumentBuilder();
+ Document d = b.newDocument();
+ Element e = d.createElementNS("http://www.w3.org/1999/xhtml", "html");
+ e.setAttribute("xmlns:foo", "bar");
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java
new file mode 100644
index 000000000..95cd3018e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+
+import nu.validator.htmlparser.common.Heuristics;
+import nu.validator.htmlparser.io.Encoding;
+import nu.validator.htmlparser.io.HtmlInputStreamReader;
+
+import org.xml.sax.SAXException;
+
+public class EncodingTester {
+
+ private final InputStream aggregateStream;
+
+ private final StringBuilder builder = new StringBuilder();
+
+ /**
+ * @param aggregateStream
+ */
+ public EncodingTester(InputStream aggregateStream) {
+ this.aggregateStream = aggregateStream;
+ }
+
+ private void runTests() throws IOException, SAXException {
+ while (runTest()) {
+ // spin
+ }
+ }
+
+ private boolean runTest() throws IOException, SAXException {
+ if (skipLabel()) {
+ return false;
+ }
+ UntilHashInputStream stream = new UntilHashInputStream(aggregateStream);
+ HtmlInputStreamReader reader = new HtmlInputStreamReader(stream, null,
+ null, null, Heuristics.NONE);
+ Charset charset = reader.getCharset();
+ stream.close();
+ if (skipLabel()) {
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ builder.setLength(0);
+ loop: for (;;) {
+ int b = aggregateStream.read();
+ switch (b) {
+ case '\n':
+ break loop;
+ case -1:
+ System.err.println("Premature end of test data.");
+ return false;
+ default:
+ builder.append(((char) b));
+ }
+ }
+ String sniffed = charset.name();
+ String expected = Encoding.forName(builder.toString()).newDecoder().charset().name();
+ if (expected.equalsIgnoreCase(sniffed)) {
+ System.err.println("Success.");
+ // System.err.println(stream);
+ } else {
+ System.err.println("Failure. Expected: " + expected + " got "
+ + sniffed + ".");
+ System.err.println(stream);
+ }
+ return true;
+ }
+
+ private boolean skipLabel() throws IOException {
+ int b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ }
+ for (;;) {
+ b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ } else if (b == 0x0A) {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * @param args
+ * @throws SAXException
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException, SAXException {
+ for (int i = 0; i < args.length; i++) {
+ EncodingTester tester = new EncodingTester(new FileInputStream(
+ args[i]));
+ tester.runTests();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java
new file mode 100644
index 000000000..2fcfc4960
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.impl.ElementName;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.Tokenizer;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import com.sdicons.json.model.JSONArray;
+import com.sdicons.json.model.JSONBoolean;
+import com.sdicons.json.model.JSONNull;
+import com.sdicons.json.model.JSONObject;
+import com.sdicons.json.model.JSONString;
+
+public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler {
+
+ private static final JSONString DOCTYPE = new JSONString("DOCTYPE");
+
+ private static final JSONString START_TAG = new JSONString("StartTag");
+
+ private static final JSONString END_TAG = new JSONString("EndTag");
+
+ private static final JSONString COMMENT = new JSONString("Comment");
+
+ private static final JSONString CHARACTER = new JSONString("Character");
+
+ private static final JSONString PARSE_ERROR = new JSONString("ParseError");
+
+ private static final char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+
+ private final StringBuilder builder = new StringBuilder();
+
+ private JSONArray array = null;
+
+ private int contentModelFlag;
+
+ private String contentModelElement;
+
+ public void setContentModelFlag(int contentModelFlag, String contentModelElement) {
+ this.contentModelFlag = contentModelFlag;
+ this.contentModelElement = contentModelElement;
+ }
+
+ public void characters(char[] buf, int start, int length)
+ throws SAXException {
+ builder.append(buf, start, length);
+ }
+
+ private void flushCharacters() {
+ if (builder.length() > 0) {
+ JSONArray token = new JSONArray();
+ token.getValue().add(CHARACTER);
+ token.getValue().add(new JSONString(builder.toString()));
+ array.getValue().add(token);
+ builder.setLength(0);
+ }
+ }
+
+ public void comment(char[] buf, int start, int length) throws SAXException {
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(COMMENT);
+ token.getValue().add(new JSONString(new String(buf, start, length)));
+ array.getValue().add(token);
+ }
+
+ public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(DOCTYPE);
+ token.getValue().add(new JSONString(name));
+ token.getValue().add(publicIdentifier == null ? JSONNull.NULL : new JSONString(publicIdentifier));
+ token.getValue().add(systemIdentifier == null ? JSONNull.NULL : new JSONString(systemIdentifier));
+ token.getValue().add(new JSONBoolean(!forceQuirks));
+ array.getValue().add(token);
+ }
+
+ public void endTag(ElementName eltName) throws SAXException {
+ String name = eltName.name;
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(END_TAG);
+ token.getValue().add(new JSONString(name));
+ array.getValue().add(token);
+ }
+
+ public void eof() throws SAXException {
+ flushCharacters();
+ }
+
+ public void startTokenization(Tokenizer self) throws SAXException {
+ array = new JSONArray();
+ if (contentModelElement != null) {
+ self.setStateAndEndTagExpectation(contentModelFlag, contentModelElement);
+ }
+ }
+
+ public void startTag(ElementName eltName, HtmlAttributes attributes,
+ boolean selfClosing) throws SAXException {
+ String name = eltName.name;
+ flushCharacters();
+ JSONArray token = new JSONArray();
+ token.getValue().add(START_TAG);
+ token.getValue().add(new JSONString(name));
+ JSONObject attrs = new JSONObject();
+ for (int i = 0; i < attributes.getLength(); i++) {
+ attrs.getValue().put(attributes.getQNameNoBoundsCheck(i),
+ new JSONString(attributes.getValueNoBoundsCheck(i)));
+ }
+ token.getValue().add(attrs);
+ if (selfClosing) {
+ token.getValue().add(JSONBoolean.TRUE);
+ }
+ array.getValue().add(token);
+ }
+
+ public boolean wantsComments() throws SAXException {
+ return true;
+ }
+
+ public void error(SAXParseException exception) throws SAXException {
+ flushCharacters();
+ array.getValue().add(PARSE_ERROR);
+ }
+
+ public void fatalError(SAXParseException exception) throws SAXException {
+ throw new RuntimeException("Should never happen.");
+ }
+
+ public void warning(SAXParseException exception) throws SAXException {
+ }
+
+ /**
+ * Returns the array.
+ *
+ * @return the array
+ */
+ public JSONArray getArray() {
+ return array;
+ }
+
+ public void endTokenization() throws SAXException {
+
+ }
+
+ @Override public void zeroOriginatingReplacementCharacter()
+ throws SAXException {
+ builder.append(REPLACEMENT_CHARACTER, 0, 1);
+ }
+
+ @Override public boolean cdataSectionAllowed() throws SAXException {
+ return false;
+ }
+
+ @Override public void ensureBufferSpace(int inputLength)
+ throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java
new file mode 100644
index 000000000..9a207f277
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.util.LinkedList;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class ListErrorHandler implements ErrorHandler {
+
+ private boolean fatal = false;
+
+ private LinkedList<String> errors = new LinkedList<String>();
+
+ public void error(SAXParseException spe) throws SAXException {
+ errors.add(Integer.toString(spe.getColumnNumber()) + ": " + spe.getMessage());
+ }
+
+ public void fatalError(SAXParseException arg0) throws SAXException {
+ fatal = true;
+ }
+
+ public void warning(SAXParseException arg0) throws SAXException {
+ }
+
+ /**
+ * Returns the errors.
+ *
+ * @return the errors
+ */
+ public LinkedList<String> getErrors() {
+ return errors;
+ }
+
+ /**
+ * Returns the fatal.
+ *
+ * @return the fatal
+ */
+ public boolean isFatal() {
+ return fatal;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java
new file mode 100644
index 000000000..9ee490b9e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2005, 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import javax.xml.transform.ErrorListener;
+import javax.xml.transform.SourceLocator;
+import javax.xml.transform.TransformerException;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+/**
+ * @version $Id$
+ * @author hsivonen
+ */
+public class SystemErrErrorHandler implements ErrorHandler, ErrorListener {
+
+ private Writer out;
+
+ private boolean inError = false;
+
+ public SystemErrErrorHandler() {
+ try {
+ out = new OutputStreamWriter(System.err, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
+ */
+ public void warning(SAXParseException e) throws SAXException {
+ try {
+ out.write("Warning:\n");
+ out.write(e.getMessage());
+ out.write("\nFile: ");
+ String systemId = e.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(e.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(e.getColumnNumber()));
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new SAXException(e1);
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
+ */
+ public void error(SAXParseException e) throws SAXException {
+ inError = true;
+ try {
+ out.write("Error:\n");
+ out.write(e.getMessage());
+ out.write("\nFile: ");
+ String systemId = e.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(e.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(e.getColumnNumber()));
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new SAXException(e1);
+ }
+ }
+
+ /**
+ * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
+ */
+ public void fatalError(SAXParseException e) throws SAXException {
+ inError = true;
+ try {
+ out.write("Fatal Error:\n");
+ out.write(e.getMessage());
+ out.write("\nFile: ");
+ String systemId = e.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(e.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(e.getColumnNumber()));
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new SAXException(e1);
+ }
+ }
+
+ /**
+ * Returns the inError.
+ *
+ * @return the inError
+ */
+ public boolean isInError() {
+ return inError;
+ }
+
+ public void reset() {
+ inError = false;
+ }
+
+ public void error(TransformerException e) throws TransformerException {
+ inError = true;
+ try {
+ out.write("Error:\n");
+ out.write(e.getMessage());
+ SourceLocator sourceLocator = e.getLocator();
+ if (sourceLocator != null) {
+ out.write("\nFile: ");
+ String systemId = sourceLocator.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(sourceLocator.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(sourceLocator.getColumnNumber()));
+ }
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new TransformerException(e1);
+ }
+ }
+
+ public void fatalError(TransformerException e)
+ throws TransformerException {
+ inError = true;
+ try {
+ out.write("Fatal Error:\n");
+ out.write(e.getMessage());
+ SourceLocator sourceLocator = e.getLocator();
+ if (sourceLocator != null) {
+ out.write("\nFile: ");
+ String systemId = sourceLocator.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(sourceLocator.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(sourceLocator.getColumnNumber()));
+ }
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new TransformerException(e1);
+ }
+ }
+
+ public void warning(TransformerException e)
+ throws TransformerException {
+ try {
+ out.write("Warning:\n");
+ out.write(e.getMessage());
+ SourceLocator sourceLocator = e.getLocator();
+ if (sourceLocator != null) {
+ out.write("\nFile: ");
+ String systemId = sourceLocator.getSystemId();
+ out.write((systemId == null) ? "Unknown" : systemId);
+ out.write("\nLine: ");
+ out.write(Integer.toString(sourceLocator.getLineNumber()));
+ out.write(" Col: ");
+ out.write(Integer.toString(sourceLocator.getColumnNumber()));
+ }
+ out.write("\n\n");
+ out.flush();
+ } catch (IOException e1) {
+ throw new TransformerException(e1);
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java
new file mode 100644
index 000000000..0fa5972c8
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+
+import nu.validator.htmlparser.common.TokenHandler;
+import nu.validator.htmlparser.impl.ElementName;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.HtmlAttributes;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class TokenPrinter implements TokenHandler, ErrorHandler {
+
+ private final Writer writer;
+
+ public void characters(char[] buf, int start, int length)
+ throws SAXException {
+ try {
+ boolean lineStarted = true;
+ writer.write('-');
+ for (int i = start; i < start + length; i++) {
+ if (!lineStarted) {
+ writer.write("\n-");
+ lineStarted = true;
+ }
+ char c = buf[i];
+ if (c == '\n') {
+ writer.write("\\n");
+ lineStarted = false;
+ } else {
+ writer.write(c);
+ }
+ }
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void comment(char[] buf, int start, int length) throws SAXException {
+ try {
+ writer.write('!');
+ writer.write(buf, start, length);
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
+ try {
+ writer.write('D');
+ writer.write(name);
+ writer.write(' ');
+ writer.write("" + forceQuirks);
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endTag(ElementName eltName) throws SAXException {
+ try {
+ writer.write(')');
+ writer.write(eltName.name);
+ writer.write('\n');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void eof() throws SAXException {
+ try {
+ writer.write("E\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startTokenization(Tokenizer self) throws SAXException {
+
+ }
+
+ public void startTag(ElementName eltName, HtmlAttributes attributes, boolean selfClosing)
+ throws SAXException {
+ try {
+ writer.write('(');
+ writer.write(eltName.name);
+ writer.write('\n');
+ for (int i = 0; i < attributes.getLength(); i++) {
+ writer.write('A');
+ writer.write(attributes.getQNameNoBoundsCheck(i));
+ writer.write(' ');
+ writer.write(attributes.getValueNoBoundsCheck(i));
+ writer.write('\n');
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public boolean wantsComments() throws SAXException {
+ return true;
+ }
+
+ public static void main(String[] args) throws SAXException, IOException {
+ TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8"));
+ Driver tokenizer = new Driver(new ErrorReportingTokenizer(printer));
+ tokenizer.setErrorHandler(printer);
+ File file = new File(args[0]);
+ InputSource is = new InputSource(new FileInputStream(file));
+ is.setSystemId(file.toURI().toASCIIString());
+ tokenizer.tokenize(is);
+ }
+
+ /**
+ * @param writer
+ */
+ public TokenPrinter(final Writer writer) {
+ this.writer = writer;
+ }
+
+ public void error(SAXParseException exception) throws SAXException {
+ try {
+ writer.write("R ");
+ writer.write(exception.getMessage());
+ writer.write("\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void fatalError(SAXParseException exception) throws SAXException {
+ try {
+ writer.write("F ");
+ writer.write(exception.getMessage());
+ writer.write("\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void warning(SAXParseException exception) throws SAXException {
+ try {
+ writer.write("W ");
+ writer.write(exception.getMessage());
+ writer.write("\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endTokenization() throws SAXException {
+ try {
+ writer.flush();
+ writer.close();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ @Override public void zeroOriginatingReplacementCharacter()
+ throws SAXException {
+ try {
+ writer.write("0\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ @Override public boolean cdataSectionAllowed() throws SAXException {
+ return false;
+ }
+
+ @Override public void ensureBufferSpace(int inputLength)
+ throws SAXException {
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java
new file mode 100644
index 000000000..76ea7543a
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.io.Driver;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import antlr.RecognitionException;
+import antlr.TokenStreamException;
+
+import com.sdicons.json.model.JSONArray;
+import com.sdicons.json.model.JSONObject;
+import com.sdicons.json.model.JSONString;
+import com.sdicons.json.model.JSONValue;
+import com.sdicons.json.parser.JSONParser;
+
+public class TokenizerTester {
+
+ private static JSONString PLAINTEXT = new JSONString("PLAINTEXT state");
+
+ private static JSONString PCDATA = new JSONString("DATA state");
+
+ private static JSONString RCDATA = new JSONString("RCDATA state");
+
+ private static JSONString RAWTEXT = new JSONString("RAWTEXT state");
+
+ private static boolean jsonDeepEquals(JSONValue one, JSONValue other) {
+ if (one.isSimple()) {
+ return one.equals(other);
+ } else if (one.isArray()) {
+ if (other.isArray()) {
+ JSONArray oneArr = (JSONArray) one;
+ JSONArray otherArr = (JSONArray) other;
+ return oneArr.getValue().equals(otherArr.getValue());
+ } else {
+ return false;
+ }
+ } else if (one.isObject()) {
+ if (other.isObject()) {
+ JSONObject oneObject = (JSONObject) one;
+ JSONObject otherObject = (JSONObject) other;
+ return oneObject.getValue().equals(otherObject.getValue());
+ } else {
+ return false;
+ }
+ } else {
+ throw new RuntimeException("Should never happen.");
+ }
+ }
+
+ private JSONArray tests;
+
+ private final JSONArrayTokenHandler tokenHandler;
+
+ private final Driver driver;
+
+ private final Writer writer;
+
+ private TokenizerTester(InputStream stream) throws TokenStreamException,
+ RecognitionException, UnsupportedEncodingException {
+ tokenHandler = new JSONArrayTokenHandler();
+ driver = new Driver(new ErrorReportingTokenizer(tokenHandler));
+ driver.setCommentPolicy(XmlViolationPolicy.ALLOW);
+ driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
+ driver.setContentSpacePolicy(XmlViolationPolicy.ALLOW);
+ driver.setNamePolicy(XmlViolationPolicy.ALLOW);
+ driver.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
+ driver.setErrorHandler(tokenHandler);
+ writer = new OutputStreamWriter(System.out, "UTF-8");
+ JSONParser jsonParser = new JSONParser(new InputStreamReader(stream,
+ "UTF-8"));
+ JSONObject obj = (JSONObject) jsonParser.nextValue();
+ tests = (JSONArray) obj.get("tests");
+ if (tests == null) {
+ tests = (JSONArray) obj.get("xmlViolationTests");
+ driver.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ driver.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
+ driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
+ }
+ }
+
+ private void runTests() throws SAXException, IOException {
+ for (JSONValue val : tests.getValue()) {
+ runTest((JSONObject) val);
+ }
+ writer.flush();
+ }
+
+ private void runTest(JSONObject test) throws SAXException, IOException {
+ String inputString = ((JSONString) test.get("input")).getValue();
+ JSONArray expectedTokens = (JSONArray) test.get("output");
+ String description = ((JSONString) test.get("description")).getValue();
+ JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag"));
+ String lastStartTag = lastStartTagJSON == null ? null
+ : lastStartTagJSON.getValue();
+ JSONArray contentModelFlags = (JSONArray) test.get("initialStates");
+ if (contentModelFlags == null) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.DATA, null);
+ } else {
+ for (JSONValue value : contentModelFlags.getValue()) {
+ if (PCDATA.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.DATA, lastStartTag);
+ } else if (RAWTEXT.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.RAWTEXT, lastStartTag);
+ } else if (RCDATA.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.RCDATA, lastStartTag);
+ } else if (PLAINTEXT.equals(value)) {
+ runTestInner(inputString, expectedTokens, description,
+ Tokenizer.PLAINTEXT, lastStartTag);
+ } else {
+ throw new RuntimeException("Broken test data.");
+ }
+ }
+ }
+ }
+
+ /**
+ * @param contentModelElement
+ * @param contentModelFlag
+ * @param test
+ * @throws SAXException
+ * @throws IOException
+ */
+ private void runTestInner(String inputString, JSONArray expectedTokens,
+ String description, int contentModelFlag,
+ String contentModelElement) throws SAXException, IOException {
+ tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement);
+ InputSource is = new InputSource(new StringReader(inputString));
+ try {
+ driver.tokenize(is);
+ JSONArray actualTokens = tokenHandler.getArray();
+ if (jsonDeepEquals(actualTokens, expectedTokens)) {
+ writer.write("Success\n");
+ } else {
+ writer.write("Failure\n");
+ writer.write(description);
+ writer.write("\nInput:\n");
+ writer.write(inputString);
+ writer.write("\nExpected tokens:\n");
+ writer.write(expectedTokens.render(false));
+ writer.write("\nActual tokens:\n");
+ writer.write(actualTokens.render(false));
+ writer.write("\n");
+ }
+ } catch (Throwable t) {
+ writer.write("Failure\n");
+ writer.write(description);
+ writer.write("\nInput:\n");
+ writer.write(inputString);
+ writer.write("\n");
+ t.printStackTrace(new PrintWriter(writer, false));
+ }
+ }
+
+ /**
+ * @param args
+ * @throws RecognitionException
+ * @throws TokenStreamException
+ * @throws IOException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws TokenStreamException,
+ RecognitionException, SAXException, IOException {
+ for (int i = 0; i < args.length; i++) {
+ TokenizerTester tester = new TokenizerTester(new FileInputStream(
+ args[i]));
+ tester.runTests();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java
new file mode 100644
index 000000000..9b95b763e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class TreeDumpContentHandler implements ContentHandler, LexicalHandler {
+
+ private final Writer writer;
+
+ private int level = 0;
+
+ private boolean inCharacters = false;
+
+ private boolean close;
+
+ /**
+ * @param writer
+ */
+ public TreeDumpContentHandler(final Writer writer, boolean close) {
+ this.writer = writer;
+ this.close = close;
+ }
+
+ public TreeDumpContentHandler(final Writer writer) {
+ this(writer, true);
+ }
+
+ private void printLead() throws IOException {
+ if (inCharacters) {
+ writer.write("\"\n");
+ inCharacters = false;
+ }
+ writer.write("| ");
+ for (int i = 0; i < level; i++) {
+ writer.write(" ");
+ }
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ if (!inCharacters) {
+ printLead();
+ writer.write('"');
+ inCharacters = true;
+ }
+ writer.write(ch, start, length);
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ try {
+ if (inCharacters) {
+ writer.write("\"\n");
+ inCharacters = false;
+ }
+ level--;
+ if ("http://www.w3.org/1999/xhtml" == uri &&
+ "template" == localName) {
+ // decrement level for the "content"
+ level--;
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startElement(String uri, String localName, String qName,
+ Attributes atts) throws SAXException {
+ try {
+ printLead();
+ writer.write('<');
+ if ("http://www.w3.org/1998/Math/MathML" == uri) {
+ writer.write("math ");
+ } else if ("http://www.w3.org/2000/svg" == uri) {
+ writer.write("svg ");
+ } else if ("http://www.w3.org/1999/xhtml" != uri) {
+ writer.write("otherns ");
+ }
+ writer.write(localName);
+ writer.write(">\n");
+ level++;
+ TreeMap<String, String> map = new TreeMap<String, String>();
+ for (int i = 0; i < atts.getLength(); i++) {
+ String ns = atts.getURI(i);
+ String name;
+ if ("http://www.w3.org/1999/xlink" == ns) {
+ name = "xlink " + atts.getLocalName(i);
+ } else if ("http://www.w3.org/XML/1998/namespace" == ns) {
+ name = "xml " + atts.getLocalName(i);
+ } else if ("http://www.w3.org/2000/xmlns/" == ns) {
+ name = "xmlns " + atts.getLocalName(i);
+ } else if ("" != uri) {
+ name = atts.getLocalName(i);
+ } else {
+ name = "otherns " + atts.getLocalName(i);
+ }
+ map.put(name, atts.getValue(i));
+ }
+ for (Map.Entry<String, String> entry : map.entrySet()) {
+ printLead();
+ writer.write(entry.getKey());
+ writer.write("=\"");
+ writer.write(entry.getValue());
+ writer.write("\"\n");
+ }
+ if ("http://www.w3.org/1999/xhtml" == uri &&
+ "template" == localName) {
+ printLead();
+ level++;
+ writer.write("content\n");
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void comment(char[] ch, int offset, int len) throws SAXException {
+ try {
+ printLead();
+ writer.write("<!-- ");
+ writer.write(ch, offset, len);
+ writer.write(" -->\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startDTD(String name, String publicIdentifier,
+ String systemIdentifier) throws SAXException {
+ try {
+ printLead();
+ writer.write("<!DOCTYPE ");
+ writer.write(name);
+ if (publicIdentifier.length() > 0 || systemIdentifier.length() > 0) {
+ writer.write(' ');
+ writer.write('\"');
+ writer.write(publicIdentifier);
+ writer.write('\"');
+ writer.write(' ');
+ writer.write('\"');
+ writer.write(systemIdentifier);
+ writer.write('\"');
+ }
+ writer.write(">\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endDocument() throws SAXException {
+ try {
+ if (inCharacters) {
+ writer.write("\"\n");
+ inCharacters = false;
+ }
+ if (close) {
+ writer.flush();
+ writer.close();
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ }
+
+ public void startEntity(String arg0) throws SAXException {
+ }
+
+ public void endCDATA() throws SAXException {
+ }
+
+ public void endDTD() throws SAXException {
+ }
+
+ public void endEntity(String arg0) throws SAXException {
+ }
+
+ public void startCDATA() throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException {
+ }
+
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+ public void startDocument() throws SAXException {
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java
new file mode 100644
index 000000000..c09169383
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+
+public class TreePrinter {
+
+ public static void main(String[] args) throws SAXException, IOException {
+ TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(new OutputStreamWriter(System.out, "UTF-8"));
+ HtmlParser htmlParser = new HtmlParser();
+ htmlParser.setContentHandler(treeDumpContentHandler);
+ htmlParser.setLexicalHandler(treeDumpContentHandler);
+ htmlParser.setErrorHandler(new SystemErrErrorHandler());
+ htmlParser.setXmlPolicy(XmlViolationPolicy.ALLOW);
+ File file = new File(args[0]);
+ InputSource is = new InputSource(new FileInputStream(file));
+ is.setSystemId(file.toURI().toASCIIString());
+ htmlParser.parse(is);
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java
new file mode 100644
index 000000000..62d3ab530
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.StringWriter;
+import java.util.LinkedList;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXParseException;
+
+public class TreeTester {
+
+ private final BufferedInputStream aggregateStream;
+
+ private boolean streaming = false;
+
+ /**
+ * @param aggregateStream
+ */
+ public TreeTester(InputStream aggregateStream) {
+ this.aggregateStream = new BufferedInputStream(aggregateStream);
+ }
+
+ private void runTests() throws Throwable {
+ if (aggregateStream.read() != '#') {
+ System.err.println("No hash at start!");
+ return;
+ }
+ while (runTest()) {
+ // spin
+ }
+ }
+
+ private boolean runTest() throws Throwable {
+ UntilHashInputStream stream = null;
+ try {
+ String context = null;
+ boolean scriptingEnabled = true;
+ boolean hadScriptingDirective = false;
+ aggregateStream.mark(12288);
+ if (skipLabel()) { // #data
+ return false;
+ }
+ stream = new UntilHashInputStream(aggregateStream);
+ while (stream.read() != -1) {
+ // spin
+ }
+ if (skipLabel()) { // #errors
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ stream = new UntilHashInputStream(aggregateStream);
+ while (stream.read() != -1) {
+ // spin
+ }
+
+ StringBuilder sb = new StringBuilder();
+ int c;
+ while ((c = aggregateStream.read()) != '\n') {
+ sb.append((char) c);
+ }
+ String label = sb.toString();
+ if ("document-fragment".equals(label)) {
+ sb.setLength(0);
+ while ((c = aggregateStream.read()) != '\n') {
+ sb.append((char) c);
+ }
+ context = sb.toString();
+ // Now potentially gather #script-on/off
+ sb.setLength(0);
+ while ((c = aggregateStream.read()) != '\n') {
+ sb.append((char) c);
+ }
+ label = sb.toString();
+ }
+ if ("script-on".equals(label)) {
+ hadScriptingDirective = true;
+ } else if ("script-off".equals(label)) {
+ hadScriptingDirective = true;
+ scriptingEnabled = false;
+ }
+ aggregateStream.reset();
+ if (skipLabel()) { // #data
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ stream = new UntilHashInputStream(aggregateStream);
+ InputSource is = new InputSource(stream);
+ is.setEncoding("UTF-8");
+ StringWriter sw = new StringWriter();
+ ListErrorHandler leh = new ListErrorHandler();
+ TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(
+ sw);
+ HtmlParser htmlParser = new HtmlParser(XmlViolationPolicy.ALLOW);
+ if (streaming) {
+ htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
+ }
+ htmlParser.setContentHandler(treeDumpContentHandler);
+ htmlParser.setLexicalHandler(treeDumpContentHandler);
+ htmlParser.setErrorHandler(leh);
+ htmlParser.setScriptingEnabled(scriptingEnabled);
+ try {
+ if (context == null) {
+ htmlParser.parse(is);
+ } else {
+ String ns = "http://www.w3.org/1999/xhtml";
+ if (context.startsWith("svg ")) {
+ ns = "http://www.w3.org/2000/svg";
+ context = context.substring(4);
+ } else if (context.startsWith("math ")) {
+ ns = "http://www.w3.org/1998/Math/MathML";
+ context = context.substring(5);
+ }
+ htmlParser.parseFragment(is, context, ns);
+ treeDumpContentHandler.endDocument();
+ }
+ } catch (SAXParseException e) {
+ // ignore
+ }
+ stream.close();
+
+ if (skipLabel()) { // #errors
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ LinkedList<String> expectedErrors = new LinkedList<String>();
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ new UntilHashInputStream(aggregateStream), "UTF-8"));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ expectedErrors.add(line);
+ }
+
+ if (context != null) {
+ if (skipLabel()) { // #document-fragment
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+ UntilHashInputStream stream2 = new UntilHashInputStream(aggregateStream);
+ while (stream2.read() != -1) {
+ // spin
+ }
+ }
+ if (hadScriptingDirective && skipLabel()) { // #script-on/off
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+
+ if (skipLabel()) { // #document
+ System.err.println("Premature end of test data.");
+ return false;
+ }
+
+ StringBuilder expectedBuilder = new StringBuilder();
+ br = new BufferedReader(new InputStreamReader(
+ new UntilHashInputStream(aggregateStream), "UTF-8"));
+ int ch;
+ while ((ch = br.read()) != -1) {
+ expectedBuilder.append((char)ch);
+ }
+ String expected = expectedBuilder.toString();
+ String actual = sw.toString();
+
+ LinkedList<String> actualErrors = leh.getErrors();
+
+ if (expected.equals(actual) || (streaming && leh.isFatal()) /*
+ * && expectedErrors.size() ==
+ * actualErrors.size()
+ */) {
+ System.err.println("Success.");
+ // System.err.println(stream);
+ } else {
+ System.err.print("Failure.\nData:\n" + stream + "\nExpected:\n"
+ + expected + "Got: \n" + actual);
+ System.err.println("Expected errors:");
+ for (String err : expectedErrors) {
+ System.err.println(err);
+ }
+ System.err.println("Actual errors:");
+ for (String err : actualErrors) {
+ System.err.println(err);
+ }
+ }
+ } catch (Throwable t) {
+ System.err.println("Failure.\nData:\n" + stream);
+ throw t;
+ }
+ return true;
+ }
+
+ private boolean skipLabel() throws IOException {
+ int b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ }
+ for (;;) {
+ b = aggregateStream.read();
+ if (b == -1) {
+ return true;
+ } else if (b == 0x0A) {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * @param args
+ * @throws Throwable
+ */
+ public static void main(String[] args) throws Throwable {
+ for (int i = 0; i < args.length; i++) {
+ TreeTester tester = new TreeTester(new FileInputStream(args[i]));
+ tester.runTests();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java
new file mode 100644
index 000000000..473a9f7f9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class UntilHashInputStream extends InputStream {
+
+ private final StringBuilder builder = new StringBuilder();
+
+ private final InputStream delegate;
+
+ private int buffer = -1;
+
+ private boolean closed = false;
+
+ /**
+ * @param delegate
+ * @throws IOException
+ */
+ public UntilHashInputStream(final InputStream delegate) throws IOException {
+ this.delegate = delegate;
+ this.buffer = delegate.read();
+ if (buffer == '#') {
+ closed = true;
+ }
+ }
+
+ public int read() throws IOException {
+ if (closed) {
+ return -1;
+ }
+ int rv = buffer;
+ buffer = delegate.read();
+ if (buffer == '#' && rv == '\n') {
+ // end of stream
+ closed = true;
+ return -1;
+ } else {
+ if (rv >= 0x20 && rv < 0x80) {
+ builder.append(((char)rv));
+ } else {
+ builder.append("0x");
+ builder.append(Integer.toHexString(rv));
+ }
+ return rv;
+ }
+ }
+
+ /**
+ * @see java.io.InputStream#close()
+ */
+ @Override
+ public void close() throws IOException {
+ super.close();
+ if (closed) {
+ return;
+ }
+ for (;;) {
+ int b = delegate.read();
+ if (b == 0x23 || b == -1) {
+ break;
+ }
+ }
+ closed = true;
+ }
+
+ /**
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return builder.toString();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java
new file mode 100644
index 000000000..0d23fda3c
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+import nu.validator.htmlparser.sax.XmlSerializer;
+
+public class XmlSerializerTester {
+
+
+
+ /**
+ * @param args
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws SAXException {
+ AttributesImpl attrs = new AttributesImpl();
+ XmlSerializer serializer = new XmlSerializer(System.out);
+ serializer.startDocument();
+ serializer.startElement("1", "a", null, attrs);
+ serializer.startElement("1", "b", null, attrs);
+ serializer.endElement("1", "b", null);
+ serializer.startElement("2", "c", null, attrs);
+ serializer.endElement("2", "c", null);
+ attrs.addAttribute("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about", null, "CDATA", "");
+ serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null, attrs);
+ serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null);
+ serializer.startPrefixMapping("rdf", "foo");
+ serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null, attrs);
+ serializer.startPrefixMapping("p0", "bar");
+ serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null, attrs);
+ serializer.characters("a\uD834\uDD21a\uD834a\uDD21a".toCharArray(), 0, 8);
+ serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null);
+ serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null);
+
+ serializer.endPrefixMapping("rdf");
+ serializer.endElement("1", "a", null);
+ serializer.endDocument();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java
new file mode 100644
index 000000000..66d706ae9
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.test;
+
+import nu.xom.Attribute;
+import nu.xom.Element;
+
+public class XomTest {
+ public static void main(String[] args) {
+ Element elt = new Element("html", "http://www.w3.org/1999/xhtml");
+ elt.addAttribute(new Attribute("xmlns:foo", "bar"));
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html
new file mode 100644
index 000000000..57809b84e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>Test drivers.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java
new file mode 100644
index 000000000..5e2cf1f58
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class HTML2HTML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new HtmlSerializer(out);
+
+ HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
+
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java
new file mode 100644
index 000000000..57666f93b
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlParser;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class HTML2XML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new XmlSerializer(out);
+
+ HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
+
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java
new file mode 100644
index 000000000..dad89a5b2
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+public class XML2HTML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new HtmlSerializer(out);
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader parser = factory.newSAXParser().getXMLReader();
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java
new file mode 100644
index 000000000..2f6aa24d8
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2008 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.TransformerException;
+
+import nu.validator.htmlparser.sax.NameCheckingXmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+public class XML2XML {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException,
+ TransformerException {
+ InputStream in;
+ OutputStream out;
+
+ switch (args.length) {
+ case 0:
+ in = System.in;
+ out = System.out;
+ break;
+ case 1:
+ in = new FileInputStream(args[0]);
+ out = System.out;
+ break;
+ case 2:
+ in = new FileInputStream(args[0]);
+ out = new FileOutputStream(args[1]);
+ break;
+ default:
+ System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
+ System.exit(1);
+ return;
+ }
+
+ ContentHandler serializer = new NameCheckingXmlSerializer(out);
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader parser = factory.newSAXParser().getXMLReader();
+ parser.setErrorHandler(new SystemErrErrorHandler());
+ parser.setContentHandler(serializer);
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler",
+ serializer);
+ parser.parse(new InputSource(in));
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java
new file mode 100644
index 000000000..05d8193c1
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.Templates;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.sax.SAXResult;
+import javax.xml.transform.sax.SAXTransformerFactory;
+import javax.xml.transform.sax.TemplatesHandler;
+import javax.xml.transform.sax.TransformerHandler;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
+import nu.validator.htmlparser.sax.HtmlParser;
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.htmlparser.test.SystemErrErrorHandler;
+
+import org.w3c.dom.Document;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.LexicalHandler;
+
+public class XSLT4HTML5 {
+
+ private enum Mode {
+ STREAMING_SAX, BUFFERED_SAX, DOM,
+ }
+
+ private static final String TEMPLATE = "--template=";
+
+ private static final String INPUT_HTML = "--input-html=";
+
+ private static final String INPUT_XML = "--input-xml=";
+
+ private static final String OUTPUT_HTML = "--output-html=";
+
+ private static final String OUTPUT_XML = "--output-xml=";
+
+ private static final String MODE = "--mode=";
+
+ /**
+ * @param args
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ * @throws MalformedURLException
+ * @throws TransformerException
+ */
+ public static void main(String[] args) throws SAXException,
+ ParserConfigurationException, MalformedURLException, IOException, TransformerException {
+ if (args.length == 0) {
+ System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
+ System.exit(0);
+ }
+ String template = null;
+ String input = null;
+ boolean inputHtml = false;
+ String output = null;
+ boolean outputHtml = false;
+ Mode mode = null;
+ for (int i = 0; i < args.length; i++) {
+ String arg = args[i];
+ if (arg.startsWith(TEMPLATE)) {
+ if (template == null) {
+ template = arg.substring(TEMPLATE.length());
+ } else {
+ System.err.println("Tried to set template twice.");
+ System.exit(1);
+ }
+ } else if (arg.startsWith(INPUT_HTML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_HTML.length());
+ inputHtml = true;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(INPUT_XML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_XML.length());
+ inputHtml = false;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(OUTPUT_HTML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_HTML.length());
+ outputHtml = true;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ } else if (arg.startsWith(OUTPUT_XML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_XML.length());
+ outputHtml = false;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ } else if (arg.startsWith(MODE)) {
+ if (mode == null) {
+ String modeStr = arg.substring(MODE.length());
+ if ("dom".equals(modeStr)) {
+ mode = Mode.DOM;
+ } else if ("sax-buffered".equals(modeStr)) {
+ mode = Mode.BUFFERED_SAX;
+ } else if ("sax-streaming".equals(modeStr)) {
+ mode = Mode.STREAMING_SAX;
+ } else {
+ System.err.println("Unrecognized mode.");
+ System.exit(5);
+ }
+ } else {
+ System.err.println("Tried to set mode twice.");
+ System.exit(4);
+ }
+ }
+ }
+
+ if (template == null) {
+ System.err.println("No template specified.");
+ System.exit(6);
+ }
+ if (input == null) {
+ System.err.println("No input specified.");
+ System.exit(7);
+ }
+ if (output == null) {
+ System.err.println("No output specified.");
+ System.exit(8);
+ }
+ if (mode == null) {
+ mode = Mode.BUFFERED_SAX;
+ }
+
+ SystemErrErrorHandler errorHandler = new SystemErrErrorHandler();
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader reader = factory.newSAXParser().getXMLReader();
+ reader.setErrorHandler(errorHandler);
+
+ SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
+ transformerFactory.setErrorListener(errorHandler);
+ TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
+ reader.setContentHandler(templatesHandler);
+ reader.parse(new File(template).toURI().toASCIIString());
+
+ Templates templates = templatesHandler.getTemplates();
+
+ FileOutputStream outputStream = new FileOutputStream(output);
+ ContentHandler serializer;
+ if (outputHtml) {
+ serializer = new HtmlSerializer(outputStream);
+ } else {
+ serializer = new XmlSerializer(outputStream);
+ }
+ SAXResult result = new SAXResult(new XmlnsDropper(serializer));
+ result.setLexicalHandler((LexicalHandler) serializer);
+
+ if (mode == Mode.DOM) {
+ Document inputDoc;
+ DocumentBuilder builder;
+ if (inputHtml) {
+ builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET);
+ } else {
+ DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
+ factory.setNamespaceAware(true);
+ try {
+ builder = builderFactory.newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ inputDoc = builder.parse(new File(input));
+ DOMSource inputSource = new DOMSource(inputDoc,
+ new File(input).toURI().toASCIIString());
+ Transformer transformer = templates.newTransformer();
+ transformer.setErrorListener(errorHandler);
+ transformer.transform(inputSource, result);
+ } else {
+ if (inputHtml) {
+ reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
+ if (mode == Mode.STREAMING_SAX) {
+ reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL);
+ }
+ }
+ TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates);
+ transformerHandler.setResult(result);
+ reader.setErrorHandler(errorHandler);
+ reader.setContentHandler(transformerHandler);
+ reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler);
+ reader.parse(new File(input).toURI().toASCIIString());
+ }
+ outputStream.flush();
+ outputStream.close();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java
new file mode 100644
index 000000000..b364cc521
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlSerializer;
+import nu.validator.htmlparser.xom.HtmlBuilder;
+import nu.xom.Builder;
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Nodes;
+import nu.xom.ParsingException;
+import nu.xom.Serializer;
+import nu.xom.ValidityException;
+import nu.xom.converters.SAXConverter;
+import nu.xom.xslt.XSLException;
+import nu.xom.xslt.XSLTransform;
+
+import org.xml.sax.SAXException;
+
+public class XSLT4HTML5XOM {
+
+ private static final String TEMPLATE = "--template=";
+
+ private static final String INPUT_HTML = "--input-html=";
+
+ private static final String INPUT_XML = "--input-xml=";
+
+ private static final String OUTPUT_HTML = "--output-html=";
+
+ private static final String OUTPUT_XML = "--output-xml=";
+
+ /**
+ * @param args
+ * @throws IOException
+ * @throws ParsingException
+ * @throws ValidityException
+ * @throws XSLException
+ * @throws SAXException
+ */
+ public static void main(String[] args) throws ValidityException,
+ ParsingException, IOException, XSLException, SAXException {
+ if (args.length == 0) {
+ System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
+ System.exit(0);
+ }
+ String template = null;
+ String input = null;
+ boolean inputHtml = false;
+ String output = null;
+ boolean outputHtml = false;
+ for (int i = 0; i < args.length; i++) {
+ String arg = args[i];
+ if (arg.startsWith(TEMPLATE)) {
+ if (template == null) {
+ template = arg.substring(TEMPLATE.length());
+ } else {
+ System.err.println("Tried to set template twice.");
+ System.exit(1);
+ }
+ } else if (arg.startsWith(INPUT_HTML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_HTML.length());
+ inputHtml = true;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(INPUT_XML)) {
+ if (input == null) {
+ input = arg.substring(INPUT_XML.length());
+ inputHtml = false;
+ } else {
+ System.err.println("Tried to set input twice.");
+ System.exit(2);
+ }
+ } else if (arg.startsWith(OUTPUT_HTML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_HTML.length());
+ outputHtml = true;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ } else if (arg.startsWith(OUTPUT_XML)) {
+ if (output == null) {
+ output = arg.substring(OUTPUT_XML.length());
+ outputHtml = false;
+ } else {
+ System.err.println("Tried to set output twice.");
+ System.exit(3);
+ }
+ }
+ }
+
+ if (template == null) {
+ System.err.println("No template specified.");
+ System.exit(6);
+ }
+ if (input == null) {
+ System.err.println("No input specified.");
+ System.exit(7);
+ }
+ if (output == null) {
+ System.err.println("No output specified.");
+ System.exit(8);
+ }
+
+ Builder builder = new Builder();
+
+ Document transformationDoc = builder.build(new File(template));
+
+ XSLTransform transform = new XSLTransform(transformationDoc);
+
+ FileOutputStream outputStream = new FileOutputStream(output);
+
+ Document inputDoc;
+ if (inputHtml) {
+ builder = new HtmlBuilder(XmlViolationPolicy.ALTER_INFOSET);
+ }
+ inputDoc = builder.build(new File(input));
+ Nodes result = transform.transform(inputDoc);
+ Document outputDoc = new Document((Element) result.get(0));
+ if (outputHtml) {
+ HtmlSerializer htmlSerializer = new HtmlSerializer(outputStream);
+ SAXConverter converter = new SAXConverter(htmlSerializer);
+ converter.setLexicalHandler(htmlSerializer);
+ converter.convert(outputDoc);
+ } else {
+ Serializer serializer = new Serializer(outputStream);
+ serializer.write(outputDoc);
+ }
+ outputStream.flush();
+ outputStream.close();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java
new file mode 100644
index 000000000..0e6d4b1c2
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.tools;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * Quick and dirty hack to work around Xalan xmlns weirdness.
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+class XmlnsDropper implements ContentHandler {
+
+ private final ContentHandler delegate;
+
+ /**
+ * @param delegate
+ */
+ public XmlnsDropper(final ContentHandler delegate) {
+ this.delegate = delegate;
+ }
+
+ /**
+ * @param ch
+ * @param start
+ * @param length
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#characters(char[], int, int)
+ */
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ delegate.characters(ch, start, length);
+ }
+
+ /**
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#endDocument()
+ */
+ public void endDocument() throws SAXException {
+ delegate.endDocument();
+ }
+
+ /**
+ * @param uri
+ * @param localName
+ * @param qName
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ delegate.endElement(uri, localName, qName);
+ }
+
+ /**
+ * @param prefix
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
+ */
+ public void endPrefixMapping(String prefix) throws SAXException {
+ delegate.endPrefixMapping(prefix);
+ }
+
+ /**
+ * @param ch
+ * @param start
+ * @param length
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
+ */
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ delegate.ignorableWhitespace(ch, start, length);
+ }
+
+ /**
+ * @param target
+ * @param data
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
+ */
+ public void processingInstruction(String target, String data) throws SAXException {
+ delegate.processingInstruction(target, data);
+ }
+
+ /**
+ * @param locator
+ * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
+ */
+ public void setDocumentLocator(Locator locator) {
+ delegate.setDocumentLocator(locator);
+ }
+
+ /**
+ * @param name
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
+ */
+ public void skippedEntity(String name) throws SAXException {
+ delegate.skippedEntity(name);
+ }
+
+ /**
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#startDocument()
+ */
+ public void startDocument() throws SAXException {
+ delegate.startDocument();
+ }
+
+ /**
+ * @param uri
+ * @param localName
+ * @param qName
+ * @param atts
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
+ */
+ public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
+ AttributesImpl ai = new AttributesImpl();
+ for (int i = 0; i < atts.getLength(); i++) {
+ String u = atts.getURI(i);
+ String t = atts.getType(i);
+ String v = atts.getValue(i);
+ String n = atts.getLocalName(i);
+ String q = atts.getQName(i);
+ if (q != null) {
+ if ("xmlns".equals(q) || q.startsWith("xmlns:")) {
+ continue;
+ }
+ }
+ ai.addAttribute(u, n, q, t, v);
+ }
+ delegate.startElement(uri, localName, qName, ai);
+ }
+
+ /**
+ * @param prefix
+ * @param uri
+ * @throws SAXException
+ * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
+ */
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ delegate.startPrefixMapping(prefix, uri);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html
new file mode 100644
index 000000000..a04bf3cd0
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>Demo apps.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java
new file mode 100644
index 000000000..df391d4b4
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree.test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+
+import nu.validator.htmlparser.sax.XmlSerializer;
+import nu.validator.saxtree.Node;
+import nu.validator.saxtree.TreeBuilder;
+import nu.validator.saxtree.TreeParser;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.LexicalHandler;
+
+public class PassThruPrinter {
+ public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException {
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setValidating(false);
+ XMLReader reader = factory.newSAXParser().getXMLReader();
+
+ TreeBuilder treeBuilder = new TreeBuilder();
+ reader.setContentHandler(treeBuilder);
+ reader.setProperty("http://xml.org/sax/properties/lexical-handler", treeBuilder);
+
+ File file = new File(args[0]);
+ InputSource is = new InputSource(new FileInputStream(file));
+ is.setSystemId(file.toURI().toASCIIString());
+ reader.parse(is);
+
+ Node doc = treeBuilder.getRoot();
+
+ ContentHandler xmlSerializer = new XmlSerializer(System.out);
+
+ TreeParser treeParser = new TreeParser(xmlSerializer, (LexicalHandler) xmlSerializer);
+ treeParser.parse(doc);
+ }
+
+}
diff --git a/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html
new file mode 100644
index 000000000..57809b84e
--- /dev/null
+++ b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>Test drivers.</p>
+</body>
+</html> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java
new file mode 100644
index 000000000..337394a89
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java
@@ -0,0 +1,139 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.util.List;
+
+import japa.parser.ast.expr.AnnotationExpr;
+import japa.parser.ast.expr.MarkerAnnotationExpr;
+import japa.parser.ast.type.ReferenceType;
+import japa.parser.ast.visitor.VoidVisitorAdapter;
+
+public class AnnotationHelperVisitor<T> extends VoidVisitorAdapter<T> {
+
+ protected List<AnnotationExpr> currentAnnotations;
+
+ protected boolean nsUri() {
+ return hasAnnotation("NsUri");
+ }
+
+ protected boolean prefix() {
+ return hasAnnotation("Prefix");
+ }
+
+ protected boolean local() {
+ return hasAnnotation("Local");
+ }
+
+ protected boolean literal() {
+ return hasAnnotation("Literal");
+ }
+
+ protected boolean inline() {
+ return hasAnnotation("Inline");
+ }
+
+ protected boolean noLength() {
+ return hasAnnotation("NoLength");
+ }
+
+ protected boolean auto() {
+ return hasAnnotation("Auto");
+ }
+
+ protected boolean virtual() {
+ return hasAnnotation("Virtual");
+ }
+
+ protected boolean isConst() {
+ return hasAnnotation("Const");
+ }
+
+ protected boolean characterName() {
+ return hasAnnotation("CharacterName");
+ }
+
+ private boolean hasAnnotation(String anno) {
+ if (currentAnnotations == null) {
+ return false;
+ }
+ for (AnnotationExpr ann : currentAnnotations) {
+ if (ann instanceof MarkerAnnotationExpr) {
+ MarkerAnnotationExpr marker = (MarkerAnnotationExpr) ann;
+ if (marker.getName().getName().equals(anno)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ protected Type convertType(japa.parser.ast.type.Type type, int modifiers) {
+ if (type instanceof ReferenceType) {
+ ReferenceType referenceType = (ReferenceType) type;
+ return new Type(convertTypeName(referenceType.getType().toString()), referenceType.getArrayCount(), noLength(), modifiers);
+ } else {
+ return new Type(convertTypeName(type.toString()), 0, false, modifiers);
+ }
+ }
+
+ private String convertTypeName(String name) {
+ if ("String".equals(name)) {
+ if (local()) {
+ return "@Local";
+ }
+ if (nsUri()) {
+ return "@NsUri";
+ }
+ if (prefix()) {
+ return "@Prefix";
+ }
+ if (literal()) {
+ return "@Literal";
+ }
+ if (auto()) {
+ return "@Auto";
+ }
+ if (characterName()) {
+ return "@CharacterName";
+ }
+ }
+ return name;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java
new file mode 100644
index 000000000..587b81604
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java
@@ -0,0 +1,70 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class CppOnlyInputStream extends InputStream {
+
+ private static final String DROP = "// CPPONLY:";
+
+ private final InputStream delegate;
+
+ public CppOnlyInputStream(InputStream delegate) {
+ this.delegate = new BufferedInputStream(delegate);
+ }
+
+ @Override public int read() throws IOException {
+ int c = delegate.read();
+ if (c == DROP.charAt(0)) {
+ delegate.mark(DROP.length());
+ for (int i = 1; i < DROP.length(); ++i) {
+ int d = delegate.read();
+ if (d != DROP.charAt(i)) {
+ delegate.reset();
+ return c;
+ }
+ }
+ return delegate.read();
+ }
+ return c;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java
new file mode 100644
index 000000000..35c3f6685
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java
@@ -0,0 +1,445 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008-2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class CppTypes {
+
+ /**
+ * The license for the atom list written by this program.
+ */
+ private static final String ATOM_LICENSE = "/*\n"
+ + " * Copyright (c) 2008-2010 Mozilla Foundation\n"
+ + " *\n"
+ + " * Permission is hereby granted, free of charge, to any person obtaining a \n"
+ + " * copy of this software and associated documentation files (the \"Software\"), \n"
+ + " * to deal in the Software without restriction, including without limitation \n"
+ + " * the rights to use, copy, modify, merge, publish, distribute, sublicense, \n"
+ + " * and/or sell copies of the Software, and to permit persons to whom the \n"
+ + " * Software is furnished to do so, subject to the following conditions:\n"
+ + " *\n"
+ + " * The above copyright notice and this permission notice shall be included in \n"
+ + " * all copies or substantial portions of the Software.\n"
+ + " *\n"
+ + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR \n"
+ + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, \n"
+ + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL \n"
+ + " * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER \n"
+ + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING \n"
+ + " * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \n"
+ + " * DEALINGS IN THE SOFTWARE.\n" + " */\n\n";
+
+ private static Set<String> reservedWords = new HashSet<String>();
+
+ static {
+ reservedWords.add("small");
+ reservedWords.add("for");
+ reservedWords.add("false");
+ reservedWords.add("true");
+ reservedWords.add("default");
+ reservedWords.add("class");
+ reservedWords.add("switch");
+ reservedWords.add("union");
+ reservedWords.add("template");
+ reservedWords.add("int");
+ reservedWords.add("char");
+ reservedWords.add("operator");
+ reservedWords.add("or");
+ reservedWords.add("and");
+ reservedWords.add("not");
+ reservedWords.add("xor");
+ reservedWords.add("unicode");
+ }
+
+ private static final String[] TREE_BUILDER_INCLUDES = { "nsContentUtils",
+ "nsIAtom", "nsHtml5AtomTable", "nsITimer", "nsString",
+ "nsNameSpaceManager", "nsIContent", "nsTraceRefcnt", "jArray",
+ "nsHtml5DocumentMode", "nsHtml5ArrayCopy", "nsHtml5Parser",
+ "nsHtml5Atoms", "nsHtml5TreeOperation", "nsHtml5StateSnapshot",
+ "nsHtml5StackNode", "nsHtml5TreeOpExecutor", "nsHtml5StreamParser",
+ "nsAHtml5TreeBuilderState", "nsHtml5Highlighter",
+ "nsHtml5PlainTextUtils", "nsHtml5ViewSourceUtils",
+ "mozilla/Likely", "nsIContentHandle", "nsHtml5OplessBuilder" };
+
+ private static final String[] TOKENIZER_INCLUDES = { "nsIAtom",
+ "nsHtml5AtomTable", "nsString", "nsIContent", "nsTraceRefcnt",
+ "jArray", "nsHtml5DocumentMode", "nsHtml5ArrayCopy",
+ "nsHtml5NamedCharacters", "nsHtml5NamedCharactersAccel",
+ "nsHtml5Atoms", "nsAHtml5TreeBuilderState", "nsHtml5Macros",
+ "nsHtml5Highlighter", "nsHtml5TokenizerLoopPolicies" };
+
+ private static final String[] INCLUDES = { "nsIAtom", "nsHtml5AtomTable",
+ "nsString", "nsNameSpaceManager", "nsIContent", "nsTraceRefcnt",
+ "jArray", "nsHtml5ArrayCopy", "nsAHtml5TreeBuilderState",
+ "nsHtml5Atoms", "nsHtml5ByteReadable", "nsIUnicodeDecoder",
+ "nsHtml5Macros", "nsIContentHandle" };
+
+ private static final String[] OTHER_DECLATIONS = {};
+
+ private static final String[] TREE_BUILDER_OTHER_DECLATIONS = {};
+
+ private static final String[] NAMED_CHARACTERS_INCLUDES = { "jArray",
+ "nscore", "nsDebug", "prlog", "mozilla/ArrayUtils" };
+
+ private static final String[] FORWARD_DECLARATIONS = { "nsHtml5StreamParser" };
+
+ private static final String[] CLASSES_THAT_NEED_SUPPLEMENT = {
+ "MetaScanner", "Tokenizer", "TreeBuilder", "UTF16Buffer", };
+
+ private static final String[] STATE_LOOP_POLICIES = {
+ "nsHtml5ViewSourcePolicy", "nsHtml5SilentPolicy" };
+
+ private final Map<String, String> atomMap = new HashMap<String, String>();
+
+ private final Writer atomWriter;
+
+ public CppTypes(File atomList) {
+ if (atomList == null) {
+ atomWriter = null;
+ } else {
+ try {
+ atomWriter = new OutputStreamWriter(new FileOutputStream(
+ atomList), "utf-8");
+ atomWriter.write(ATOM_LICENSE);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ public void finished() {
+ try {
+ if (atomWriter != null) {
+ atomWriter.flush();
+ atomWriter.close();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public String classPrefix() {
+ return "nsHtml5";
+ }
+
+ public String booleanType() {
+ return "bool";
+ }
+
+ public String byteType() {
+ return "int8_t";
+ }
+
+ public String charType() {
+ return "char16_t";
+ }
+
+ /**
+ * Only used for named characters.
+ *
+ * @return
+ */
+ public String unsignedShortType() {
+ return "uint16_t";
+ }
+
+ public String intType() {
+ return "int32_t";
+ }
+
+ public String stringType() {
+ return "nsString*";
+ }
+
+ public String localType() {
+ return "nsIAtom*";
+ }
+
+ public String prefixType() {
+ return "nsIAtom*";
+ }
+
+ public String nsUriType() {
+ return "int32_t";
+ }
+
+ public String falseLiteral() {
+ return "false";
+ }
+
+ public String trueLiteral() {
+ return "true";
+ }
+
+ public String nullLiteral() {
+ return "nullptr";
+ }
+
+ public String encodingDeclarationHandlerType() {
+ return "nsHtml5StreamParser*";
+ }
+
+ public String nodeType() {
+ return "nsIContentHandle*";
+ }
+
+ public String xhtmlNamespaceLiteral() {
+ return "kNameSpaceID_XHTML";
+ }
+
+ public String svgNamespaceLiteral() {
+ return "kNameSpaceID_SVG";
+ }
+
+ public String xmlnsNamespaceLiteral() {
+ return "kNameSpaceID_XMLNS";
+ }
+
+ public String xmlNamespaceLiteral() {
+ return "kNameSpaceID_XML";
+ }
+
+ public String noNamespaceLiteral() {
+ return "kNameSpaceID_None";
+ }
+
+ public String xlinkNamespaceLiteral() {
+ return "kNameSpaceID_XLink";
+ }
+
+ public String mathmlNamespaceLiteral() {
+ return "kNameSpaceID_MathML";
+ }
+
+ public String arrayTemplate() {
+ return "jArray";
+ }
+
+ public String autoArrayTemplate() {
+ return "autoJArray";
+ }
+
+ public String localForLiteral(String literal) {
+ String atom = atomMap.get(literal);
+ if (atom == null) {
+ atom = createAtomName(literal);
+ atomMap.put(literal, atom);
+ if (atomWriter != null) {
+ try {
+ atomWriter.write("HTML5_ATOM(" + atom + ", \"" + literal
+ + "\")\n");
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ return "nsHtml5Atoms::" + atom;
+ }
+
+ private String createAtomName(String literal) {
+ String candidate = literal.replaceAll("[^a-zA-Z0-9_]", "_");
+ if ("".equals(candidate)) {
+ candidate = "emptystring";
+ }
+ while (atomMap.values().contains(candidate)
+ || reservedWords.contains(candidate)) {
+ candidate = candidate + '_';
+ }
+ return candidate;
+ }
+
+ public String stringForLiteral(String literal) {
+ return '"' + literal + '"';
+ }
+
+ public String staticArrayTemplate() {
+ return "staticJArray";
+ }
+
+ public String newArrayCreator() {
+ return "newJArray";
+ }
+
+ public String[] boilerplateIncludes(String javaClass) {
+ if ("TreeBuilder".equals(javaClass)) {
+ return TREE_BUILDER_INCLUDES;
+ } else if ("Tokenizer".equals(javaClass)) {
+ return TOKENIZER_INCLUDES;
+ } else {
+ return INCLUDES;
+ }
+ }
+
+ public String[] boilerplateDeclarations(String javaClass) {
+ if ("TreeBuilder".equals(javaClass)) {
+ return TREE_BUILDER_OTHER_DECLATIONS;
+ } else {
+ return OTHER_DECLATIONS;
+ }
+ }
+
+ public String[] namedCharactersIncludes() {
+ return NAMED_CHARACTERS_INCLUDES;
+ }
+
+ public String[] boilerplateForwardDeclarations() {
+ return FORWARD_DECLARATIONS;
+ }
+
+ public String documentModeHandlerType() {
+ return "nsHtml5TreeBuilder*";
+ }
+
+ public String documentModeType() {
+ return "nsHtml5DocumentMode";
+ }
+
+ public String arrayCopy() {
+ return "nsHtml5ArrayCopy::arraycopy";
+ }
+
+ public String maxInteger() {
+ return "INT32_MAX";
+ }
+
+ public String constructorBoilerplate(String className) {
+ return "MOZ_COUNT_CTOR(" + className + ");";
+ }
+
+ public String destructorBoilderplate(String className) {
+ return "MOZ_COUNT_DTOR(" + className + ");";
+ }
+
+ public String literalType() {
+ return "const char*";
+ }
+
+ public boolean hasSupplement(String javaClass) {
+ return Arrays.binarySearch(CLASSES_THAT_NEED_SUPPLEMENT, javaClass) > -1;
+ }
+
+ public String internerType() {
+ return "nsHtml5AtomTable*";
+ }
+
+ public String treeBuilderStateInterface() {
+ return "nsAHtml5TreeBuilderState";
+ }
+
+ public String treeBuilderStateType() {
+ return "nsAHtml5TreeBuilderState*";
+ }
+
+ public String arrayLengthMacro() {
+ return "MOZ_ARRAY_LENGTH";
+ }
+
+ public String staticAssert() {
+ return "PR_STATIC_ASSERT";
+ }
+
+ public String abortIfFalse() {
+ return "NS_ABORT_IF_FALSE";
+ }
+
+ public String continueMacro() {
+ return "NS_HTML5_CONTINUE";
+ }
+
+ public String breakMacro() {
+ return "NS_HTML5_BREAK";
+ }
+
+ public String characterNameType() {
+ return "nsHtml5CharacterName&";
+ }
+
+ public String characterNameTypeDeclaration() {
+ return "nsHtml5CharacterName";
+ }
+
+ public String transition() {
+ return "P::transition";
+ }
+
+ public String tokenizerErrorCondition() {
+ return "P::reportErrors";
+ }
+
+ public String firstTransitionArg() {
+ return "mViewSource";
+ }
+
+ public String errorHandler() {
+ return this.unlikely() + "(mViewSource)";
+ }
+
+ public String unlikely() {
+ return "MOZ_UNLIKELY";
+ }
+
+ public String completedCharacterReference() {
+ return "P::completedNamedCharacterReference(mViewSource)";
+ }
+
+ public String[] stateLoopPolicies() {
+ return STATE_LOOP_POLICIES;
+ }
+
+ public String assertionMacro() {
+ return "MOZ_ASSERT";
+ }
+
+ public String releaseAssertionMacro() {
+ return "MOZ_RELEASE_ASSERT";
+ }
+
+ public String crashMacro() {
+ return "MOZ_CRASH";
+ }
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java
new file mode 100644
index 000000000..66f7678aa
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java
@@ -0,0 +1,2421 @@
+/*
+ * Copyright (C) 2007 JĂșlio Vilmar Gesser.
+ * Copyright (C) 2008 Mozilla Foundation
+ *
+ * This file is part of HTML Parser C++ Translator. It was derived from DumpVisitor
+ * which was part of Java 1.5 parser and Abstract Syntax Tree and came with the following notice:
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Java 1.5 parser and Abstract Syntax Tree. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Created on 05/10/2006
+ */
+package nu.validator.htmlparser.cpptranslate;
+
+import japa.parser.ast.BlockComment;
+import japa.parser.ast.CompilationUnit;
+import japa.parser.ast.ImportDeclaration;
+import japa.parser.ast.LineComment;
+import japa.parser.ast.Node;
+import japa.parser.ast.PackageDeclaration;
+import japa.parser.ast.TypeParameter;
+import japa.parser.ast.body.AnnotationDeclaration;
+import japa.parser.ast.body.AnnotationMemberDeclaration;
+import japa.parser.ast.body.BodyDeclaration;
+import japa.parser.ast.body.ClassOrInterfaceDeclaration;
+import japa.parser.ast.body.ConstructorDeclaration;
+import japa.parser.ast.body.EmptyMemberDeclaration;
+import japa.parser.ast.body.EmptyTypeDeclaration;
+import japa.parser.ast.body.EnumConstantDeclaration;
+import japa.parser.ast.body.EnumDeclaration;
+import japa.parser.ast.body.FieldDeclaration;
+import japa.parser.ast.body.InitializerDeclaration;
+import japa.parser.ast.body.JavadocComment;
+import japa.parser.ast.body.MethodDeclaration;
+import japa.parser.ast.body.ModifierSet;
+import japa.parser.ast.body.Parameter;
+import japa.parser.ast.body.TypeDeclaration;
+import japa.parser.ast.body.VariableDeclarator;
+import japa.parser.ast.body.VariableDeclaratorId;
+import japa.parser.ast.expr.ArrayAccessExpr;
+import japa.parser.ast.expr.ArrayCreationExpr;
+import japa.parser.ast.expr.ArrayInitializerExpr;
+import japa.parser.ast.expr.AssignExpr;
+import japa.parser.ast.expr.BinaryExpr;
+import japa.parser.ast.expr.BooleanLiteralExpr;
+import japa.parser.ast.expr.CastExpr;
+import japa.parser.ast.expr.CharLiteralExpr;
+import japa.parser.ast.expr.ClassExpr;
+import japa.parser.ast.expr.ConditionalExpr;
+import japa.parser.ast.expr.DoubleLiteralExpr;
+import japa.parser.ast.expr.EnclosedExpr;
+import japa.parser.ast.expr.Expression;
+import japa.parser.ast.expr.FieldAccessExpr;
+import japa.parser.ast.expr.InstanceOfExpr;
+import japa.parser.ast.expr.IntegerLiteralExpr;
+import japa.parser.ast.expr.IntegerLiteralMinValueExpr;
+import japa.parser.ast.expr.LongLiteralExpr;
+import japa.parser.ast.expr.LongLiteralMinValueExpr;
+import japa.parser.ast.expr.MarkerAnnotationExpr;
+import japa.parser.ast.expr.MemberValuePair;
+import japa.parser.ast.expr.MethodCallExpr;
+import japa.parser.ast.expr.NameExpr;
+import japa.parser.ast.expr.NormalAnnotationExpr;
+import japa.parser.ast.expr.NullLiteralExpr;
+import japa.parser.ast.expr.ObjectCreationExpr;
+import japa.parser.ast.expr.QualifiedNameExpr;
+import japa.parser.ast.expr.SingleMemberAnnotationExpr;
+import japa.parser.ast.expr.StringLiteralExpr;
+import japa.parser.ast.expr.SuperExpr;
+import japa.parser.ast.expr.ThisExpr;
+import japa.parser.ast.expr.UnaryExpr;
+import japa.parser.ast.expr.VariableDeclarationExpr;
+import japa.parser.ast.stmt.AssertStmt;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.CatchClause;
+import japa.parser.ast.stmt.ContinueStmt;
+import japa.parser.ast.stmt.DoStmt;
+import japa.parser.ast.stmt.EmptyStmt;
+import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt;
+import japa.parser.ast.stmt.ExpressionStmt;
+import japa.parser.ast.stmt.ForStmt;
+import japa.parser.ast.stmt.ForeachStmt;
+import japa.parser.ast.stmt.IfStmt;
+import japa.parser.ast.stmt.LabeledStmt;
+import japa.parser.ast.stmt.ReturnStmt;
+import japa.parser.ast.stmt.Statement;
+import japa.parser.ast.stmt.SwitchEntryStmt;
+import japa.parser.ast.stmt.SwitchStmt;
+import japa.parser.ast.stmt.SynchronizedStmt;
+import japa.parser.ast.stmt.ThrowStmt;
+import japa.parser.ast.stmt.TryStmt;
+import japa.parser.ast.stmt.TypeDeclarationStmt;
+import japa.parser.ast.stmt.WhileStmt;
+import japa.parser.ast.type.ClassOrInterfaceType;
+import japa.parser.ast.type.PrimitiveType;
+import japa.parser.ast.type.ReferenceType;
+import japa.parser.ast.type.Type;
+import japa.parser.ast.type.VoidType;
+import japa.parser.ast.type.WildcardType;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * @author Julio Vilmar Gesser
+ * @author Henri Sivonen
+ */
+
+public class CppVisitor extends AnnotationHelperVisitor<LocalSymbolTable> {
+
+ private static final String[] CLASS_NAMES = { "AttributeName",
+ "ElementName", "HtmlAttributes", "LocatorImpl", "MetaScanner",
+ "NamedCharacters", "NamedCharactersAccel", "Portability",
+ "StackNode", "Tokenizer", "TreeBuilder", "UTF16Buffer" };
+
+ private static final String[] METHODS_WITH_UNLIKELY_CONDITIONS = {
+ "appendStrBuf" };
+
+ public class SourcePrinter {
+
+ private int level = 0;
+
+ private boolean indented = false;
+
+ private final StringBuilder buf = new StringBuilder();
+
+ public void indent() {
+ level++;
+ }
+
+ public void unindent() {
+ level--;
+ }
+
+ private void makeIndent() {
+ for (int i = 0; i < level; i++) {
+ buf.append(" ");
+ }
+ }
+
+ public void printWithoutIndent(String arg) {
+ indented = false;
+ buf.append(arg);
+ }
+
+ public void print(String arg) {
+ if (!indented) {
+ makeIndent();
+ indented = true;
+ }
+ buf.append(arg);
+ }
+
+ public void printLn(String arg) {
+ print(arg);
+ printLn();
+ }
+
+ public void printLn() {
+ buf.append("\n");
+ indented = false;
+ }
+
+ public String getSource() {
+ return buf.toString();
+ }
+
+ @Override public String toString() {
+ return getSource();
+ }
+ }
+
+ private boolean supportErrorReporting = true;
+
+ protected SourcePrinter printer = new SourcePrinter();
+
+ private SourcePrinter staticInitializerPrinter = new SourcePrinter();
+
+ private SourcePrinter tempPrinterHolder;
+
+ protected final CppTypes cppTypes;
+
+ protected String className = "";
+
+ protected int currentArrayCount;
+
+ protected Set<String> forLoopsWithCondition = new HashSet<String>();
+
+ protected boolean inPrimitiveNoLengthFieldDeclarator = false;
+
+ protected final SymbolTable symbolTable;
+
+ protected String definePrefix;
+
+ protected String javaClassName;
+
+ protected boolean suppressPointer = false;
+
+ private final List<String> staticReleases = new LinkedList<String>();
+
+ private boolean inConstructorBody = false;
+
+ private String currentMethod = null;
+
+ private Set<String> labels = null;
+
+ private boolean destructor;
+
+ protected boolean inStatic = false;
+
+ private boolean reportTransitions = false;
+
+ private int stateLoopCallCount = 0;
+
+ /**
+ * @param cppTypes
+ */
+ public CppVisitor(CppTypes cppTypes, SymbolTable symbolTable) {
+ this.cppTypes = cppTypes;
+ this.symbolTable = symbolTable;
+ staticInitializerPrinter.indent();
+ }
+
+ public String getSource() {
+ return printer.getSource();
+ }
+
+ private String classNameFromExpression(Expression e) {
+ if (e instanceof NameExpr) {
+ NameExpr nameExpr = (NameExpr) e;
+ String name = nameExpr.getName();
+ if (Arrays.binarySearch(CLASS_NAMES, name) > -1) {
+ return name;
+ }
+ }
+ return null;
+ }
+
+ protected void printModifiers(int modifiers) {
+ }
+
+ private void printMembers(List<BodyDeclaration> members,
+ LocalSymbolTable arg) {
+ for (BodyDeclaration member : members) {
+ if ("Tokenizer".equals(javaClassName)
+ && member instanceof MethodDeclaration
+ && "stateLoop".equals(((MethodDeclaration) member).getName())) {
+ reportTransitions = true;
+ }
+ member.accept(this, arg);
+ reportTransitions = false;
+ }
+ }
+
+ private void printTypeArgs(List<Type> args, LocalSymbolTable arg) {
+ // if (args != null) {
+ // printer.print("<");
+ // for (Iterator<Type> i = args.iterator(); i.hasNext();) {
+ // Type t = i.next();
+ // t.accept(this, arg);
+ // if (i.hasNext()) {
+ // printer.print(", ");
+ // }
+ // }
+ // printer.print(">");
+ // }
+ }
+
+ private void printTypeParameters(List<TypeParameter> args,
+ LocalSymbolTable arg) {
+ // if (args != null) {
+ // printer.print("<");
+ // for (Iterator<TypeParameter> i = args.iterator(); i.hasNext();) {
+ // TypeParameter t = i.next();
+ // t.accept(this, arg);
+ // if (i.hasNext()) {
+ // printer.print(", ");
+ // }
+ // }
+ // printer.print(">");
+ // }
+ }
+
+ public void visit(Node n, LocalSymbolTable arg) {
+ throw new IllegalStateException(n.getClass().getName());
+ }
+
+ public void visit(CompilationUnit n, LocalSymbolTable arg) {
+ if (n.getTypes() != null) {
+ for (Iterator<TypeDeclaration> i = n.getTypes().iterator(); i.hasNext();) {
+ i.next().accept(this, arg);
+ printer.printLn();
+ if (i.hasNext()) {
+ printer.printLn();
+ }
+ }
+ }
+ }
+
+ public void visit(PackageDeclaration n, LocalSymbolTable arg) {
+ throw new IllegalStateException(n.getClass().getName());
+ }
+
+ public void visit(NameExpr n, LocalSymbolTable arg) {
+ if ("mappingLangToXmlLang".equals(n.getName())) {
+ printer.print("0");
+ } else if ("LANG_NS".equals(n.getName())) {
+ printer.print("ALL_NO_NS");
+ } else if ("LANG_PREFIX".equals(n.getName())) {
+ printer.print("ALL_NO_PREFIX");
+ } else if ("HTML_LOCAL".equals(n.getName())) {
+ printer.print(cppTypes.localForLiteral("html"));
+ } else if ("documentModeHandler".equals(n.getName())) {
+ printer.print("this");
+ } else if ("errorHandler".equals(n.getName())) {
+ printer.print(cppTypes.errorHandler());
+ } else {
+ String prefixedName = javaClassName + "." + n.getName();
+ String constant = symbolTable.cppDefinesByJavaNames.get(prefixedName);
+ if (constant != null) {
+ printer.print(constant);
+ } else {
+ printer.print(n.getName());
+ }
+ }
+ }
+
+ public void visit(QualifiedNameExpr n, LocalSymbolTable arg) {
+ n.getQualifier().accept(this, arg);
+ printer.print(".");
+ printer.print(n.getName());
+ }
+
+ public void visit(ImportDeclaration n, LocalSymbolTable arg) {
+ throw new IllegalStateException(n.getClass().getName());
+ }
+
+ public void visit(ClassOrInterfaceDeclaration n, LocalSymbolTable arg) {
+ javaClassName = n.getName();
+ className = cppTypes.classPrefix() + javaClassName;
+ definePrefix = makeDefinePrefix(className);
+
+ startClassDeclaration();
+
+ if (n.getMembers() != null) {
+ printMembers(n.getMembers(), arg);
+ }
+
+ endClassDeclaration();
+ }
+
+ private String makeDefinePrefix(String name) {
+ StringBuilder sb = new StringBuilder();
+ boolean prevWasLowerCase = true;
+ for (int i = 0; i < name.length(); i++) {
+ char c = name.charAt(i);
+ if (c >= 'a' && c <= 'z') {
+ sb.append((char) (c - 0x20));
+ prevWasLowerCase = true;
+ } else if (c >= 'A' && c <= 'Z') {
+ if (prevWasLowerCase) {
+ sb.append('_');
+ }
+ sb.append(c);
+ prevWasLowerCase = false;
+ } else if (c >= '0' && c <= '9') {
+ sb.append(c);
+ prevWasLowerCase = false;
+ }
+ }
+ sb.append('_');
+ return sb.toString();
+ }
+
+ protected void endClassDeclaration() {
+ printer.printLn("void");
+ printer.print(className);
+ printer.printLn("::initializeStatics()");
+ printer.printLn("{");
+ printer.print(staticInitializerPrinter.getSource());
+ printer.printLn("}");
+ printer.printLn();
+
+ printer.printLn("void");
+ printer.print(className);
+ printer.printLn("::releaseStatics()");
+ printer.printLn("{");
+ printer.indent();
+ for (String del : staticReleases) {
+ printer.print(del);
+ printer.printLn(";");
+ }
+ printer.unindent();
+ printer.printLn("}");
+ printer.printLn();
+
+ if (cppTypes.hasSupplement(javaClassName)) {
+ printer.printLn();
+ printer.print("#include \"");
+ printer.print(className);
+ printer.printLn("CppSupplement.h\"");
+ }
+ }
+
+ protected void startClassDeclaration() {
+ printer.print("#define ");
+ printer.print(className);
+ printer.printLn("_cpp__");
+ printer.printLn();
+
+ String[] incs = cppTypes.boilerplateIncludes(javaClassName);
+ for (int i = 0; i < incs.length; i++) {
+ String inc = incs[i];
+ printer.print("#include \"");
+ printer.print(inc);
+ printer.printLn(".h\"");
+ }
+
+ printer.printLn();
+
+ for (int i = 0; i < Main.H_LIST.length; i++) {
+ String klazz = Main.H_LIST[i];
+ if (!klazz.equals(javaClassName)) {
+ printer.print("#include \"");
+ printer.print(cppTypes.classPrefix());
+ printer.print(klazz);
+ printer.printLn(".h\"");
+ }
+ }
+
+ printer.printLn();
+ printer.print("#include \"");
+ printer.print(className);
+ printer.printLn(".h\"");
+ if ("AttributeName".equals(javaClassName)
+ || "ElementName".equals(javaClassName)) {
+ printer.print("#include \"");
+ printer.print(cppTypes.classPrefix());
+ printer.print("Releasable");
+ printer.print(javaClassName);
+ printer.printLn(".h\"");
+ }
+ printer.printLn();
+ }
+
+ public void visit(EmptyTypeDeclaration n, LocalSymbolTable arg) {
+ if (n.getJavaDoc() != null) {
+ n.getJavaDoc().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(JavadocComment n, LocalSymbolTable arg) {
+ printer.print("/**");
+ printer.print(n.getContent());
+ printer.printLn("*/");
+ }
+
+ public void visit(ClassOrInterfaceType n, LocalSymbolTable arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ throw new IllegalStateException("Can't translate nested classes.");
+ }
+ String name = n.getName();
+ if ("String".equals(name)) {
+ if (local()) {
+ name = cppTypes.localType();
+ } else if (prefix()) {
+ name = cppTypes.prefixType();
+ } else if (nsUri()) {
+ name = cppTypes.nsUriType();
+ } else if (literal()) {
+ name = cppTypes.literalType();
+ } else if (characterName()) {
+ name = cppTypes.characterNameType();
+ } else {
+ name = cppTypes.stringType();
+ }
+ } else if ("T".equals(name) || "Object".equals(name)) {
+ name = cppTypes.nodeType();
+ } else if ("TokenHandler".equals(name)) {
+ name = cppTypes.classPrefix() + "TreeBuilder*";
+ } else if ("EncodingDeclarationHandler".equals(name)) {
+ name = cppTypes.encodingDeclarationHandlerType();
+ } else if ("Interner".equals(name)) {
+ name = cppTypes.internerType();
+ } else if ("TreeBuilderState".equals(name)) {
+ name = cppTypes.treeBuilderStateType();
+ } else if ("DocumentModeHandler".equals(name)) {
+ name = cppTypes.documentModeHandlerType();
+ } else if ("DocumentMode".equals(name)) {
+ name = cppTypes.documentModeType();
+ } else {
+ name = cppTypes.classPrefix() + name + (suppressPointer ? "" : "*");
+ }
+ printer.print(name);
+ printTypeArgs(n.getTypeArgs(), arg);
+ }
+
+ protected boolean inHeader() {
+ return false;
+ }
+
+ public void visit(TypeParameter n, LocalSymbolTable arg) {
+ printer.print(n.getName());
+ if (n.getTypeBound() != null) {
+ printer.print(" extends ");
+ for (Iterator<ClassOrInterfaceType> i = n.getTypeBound().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(" & ");
+ }
+ }
+ }
+ }
+
+ public void visit(PrimitiveType n, LocalSymbolTable arg) {
+ switch (n.getType()) {
+ case Boolean:
+ printer.print(cppTypes.booleanType());
+ break;
+ case Byte:
+ printer.print(cppTypes.byteType());
+ break;
+ case Char:
+ printer.print(cppTypes.charType());
+ break;
+ case Double:
+ throw new IllegalStateException("Unsupported primitive.");
+ case Float:
+ throw new IllegalStateException("Unsupported primitive.");
+ case Int:
+ printer.print(cppTypes.intType());
+ break;
+ case Long:
+ throw new IllegalStateException("Unsupported primitive.");
+ case Short:
+ throw new IllegalStateException("Unsupported primitive.");
+ }
+ }
+
+ public void visit(ReferenceType n, LocalSymbolTable arg) {
+ if (isConst()) {
+ printer.print("const ");
+ }
+ if (noLength()) {
+ n.getType().accept(this, arg);
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ if (!inPrimitiveNoLengthFieldDeclarator) {
+ printer.print("*");
+ }
+ }
+ } else {
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ if (inStatic) {
+ printer.print(cppTypes.staticArrayTemplate());
+ } else {
+ if (auto()) {
+ printer.print(cppTypes.autoArrayTemplate());
+ } else {
+ printer.print(cppTypes.arrayTemplate());
+ }
+ }
+ printer.print("<");
+ }
+ n.getType().accept(this, arg);
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print(",");
+ printer.print(cppTypes.intType());
+ printer.print(">");
+ }
+ }
+ }
+
+ public void visit(WildcardType n, LocalSymbolTable arg) {
+ printer.print("?");
+ if (n.getExtends() != null) {
+ printer.print(" extends ");
+ n.getExtends().accept(this, arg);
+ }
+ if (n.getSuper() != null) {
+ printer.print(" super ");
+ n.getSuper().accept(this, arg);
+ }
+ }
+
+ public void visit(FieldDeclaration n, LocalSymbolTable arg) {
+ currentAnnotations = n.getAnnotations();
+ fieldDeclaration(n, arg);
+ currentAnnotations = null;
+ }
+
+ protected boolean isNonToCharArrayMethodCall(Expression exp) {
+ if (exp instanceof MethodCallExpr) {
+ MethodCallExpr mce = (MethodCallExpr) exp;
+ return !"toCharArray".equals(mce.getName());
+ } else {
+ return false;
+ }
+ }
+
+ protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) {
+ tempPrinterHolder = printer;
+ printer = staticInitializerPrinter;
+ int modifiers = n.getModifiers();
+ List<VariableDeclarator> variables = n.getVariables();
+ VariableDeclarator declarator = variables.get(0);
+ if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers)
+ && !(n.getType() instanceof PrimitiveType)
+ && declarator.getInit() != null) {
+ if (n.getType() instanceof ReferenceType) {
+ ReferenceType rt = (ReferenceType) n.getType();
+ currentArrayCount = rt.getArrayCount();
+ if (currentArrayCount > 0) {
+ if (currentArrayCount != 1) {
+ throw new IllegalStateException(
+ "Multidimensional arrays not supported. " + n);
+ }
+ if (noLength()) {
+ if (rt.getType() instanceof PrimitiveType) {
+ inPrimitiveNoLengthFieldDeclarator = true;
+ printer = tempPrinterHolder;
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ printer.print(className);
+ printer.print("::");
+ declarator.getId().accept(this, arg);
+
+ printer.print(" = ");
+
+ declarator.getInit().accept(this, arg);
+
+ printer.printLn(";");
+ printer = staticInitializerPrinter;
+ } else {
+ printer = tempPrinterHolder;
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ printer.print(className);
+ printer.print("::");
+ declarator.getId().accept(this, arg);
+
+ printer.printLn(" = 0;");
+ printer = staticInitializerPrinter;
+
+ staticReleases.add("delete[] "
+ + declarator.getId().getName());
+
+ ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit();
+
+ declarator.getId().accept(this, arg);
+ printer.print(" = new ");
+ // suppressPointer = true;
+ rt.getType().accept(this, arg);
+ // suppressPointer = false;
+ printer.print("[");
+ printer.print("" + aie.getValues().size());
+ printer.printLn("];");
+
+ printArrayInit(declarator.getId(), aie.getValues(),
+ arg);
+ }
+ } else if ((rt.getType() instanceof PrimitiveType) || "String".equals(rt.getType().toString())) {
+ printer = tempPrinterHolder;
+ printer.print("static ");
+ rt.getType().accept(this, arg);
+ printer.print(" const ");
+ declarator.getId().accept(this, arg);
+ printer.print("_DATA[] = ");
+ declarator.getInit().accept(this, arg);
+ printer.printLn(";");
+ printer.print(cppTypes.staticArrayTemplate());
+ printer.print("<");
+ suppressPointer = true;
+ rt.getType().accept(this, arg);
+ suppressPointer = false;
+ printer.print(",");
+ printer.print(cppTypes.intType());
+ printer.print("> ");
+ printer.print(className);
+ printer.print("::");
+ declarator.getId().accept(this, arg);
+ printer.print(" = { ");
+ declarator.getId().accept(this, arg);
+ printer.print("_DATA, ");
+ printer.print(cppTypes.arrayLengthMacro());
+ printer.print("(");
+ declarator.getId().accept(this, arg);
+ printer.printLn("_DATA) };");
+ printer = staticInitializerPrinter;
+ } else if (isNonToCharArrayMethodCall(declarator.getInit())) {
+ staticReleases.add(declarator.getId().getName()
+ + ".release()");
+ declarator.getId().accept(this, arg);
+ printer.print(" = ");
+ if (declarator.getInit() instanceof ArrayInitializerExpr) {
+
+ ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit();
+ printer.print(cppTypes.arrayTemplate());
+ printer.print("<");
+ suppressPointer = true;
+ rt.getType().accept(this, arg);
+ suppressPointer = false;
+ printer.print(",");
+ printer.print(cppTypes.intType());
+ printer.print(">::");
+ printer.print(cppTypes.newArrayCreator());
+ printer.print("(");
+ printer.print("" + aie.getValues().size());
+ printer.printLn(");");
+ printArrayInit(declarator.getId(), aie.getValues(),
+ arg);
+ } else {
+ declarator.getInit().accept(this, arg);
+ printer.printLn(";");
+ }
+ }
+ } else {
+ if (ModifierSet.isStatic(modifiers)) {
+ printer = tempPrinterHolder;
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ printer.print(className);
+ printer.print("::");
+ if ("AttributeName".equals(n.getType().toString())) {
+ printer.print("ATTR_");
+ } else if ("ElementName".equals(n.getType().toString())) {
+ printer.print("ELT_");
+ }
+ declarator.getId().accept(this, arg);
+ printer.print(" = ");
+ printer.print(cppTypes.nullLiteral());
+ printer.printLn(";");
+ printer = staticInitializerPrinter;
+ }
+
+ if ("AttributeName".equals(n.getType().toString())) {
+ printer.print("ATTR_");
+ staticReleases.add("delete ATTR_"
+ + declarator.getId().getName());
+ } else if ("ElementName".equals(n.getType().toString())) {
+ printer.print("ELT_");
+ staticReleases.add("delete ELT_"
+ + declarator.getId().getName());
+ } else {
+ staticReleases.add("delete "
+ + declarator.getId().getName());
+ }
+ declarator.accept(this, arg);
+ printer.printLn(";");
+ }
+ } else {
+ throw new IllegalStateException(
+ "Non-reference, non-primitive fields not supported.");
+ }
+ }
+ currentArrayCount = 0;
+ printer = tempPrinterHolder;
+ inPrimitiveNoLengthFieldDeclarator = false;
+ }
+
+ private void printArrayInit(VariableDeclaratorId variableDeclaratorId,
+ List<Expression> values, LocalSymbolTable arg) {
+ for (int i = 0; i < values.size(); i++) {
+ Expression exp = values.get(i);
+ variableDeclaratorId.accept(this, arg);
+ printer.print("[");
+ printer.print("" + i);
+ printer.print("] = ");
+ if (exp instanceof NameExpr) {
+ if ("AttributeName".equals(javaClassName)) {
+ printer.print("ATTR_");
+ } else if ("ElementName".equals(javaClassName)) {
+ printer.print("ELT_");
+ }
+ }
+ exp.accept(this, arg);
+ printer.printLn(";");
+ }
+ }
+
+ public void visit(VariableDeclarator n, LocalSymbolTable arg) {
+ n.getId().accept(this, arg);
+
+ if (n.getInit() != null) {
+ printer.print(" = ");
+ n.getInit().accept(this, arg);
+ }
+ }
+
+ public void visit(VariableDeclaratorId n, LocalSymbolTable arg) {
+ printer.print(n.getName());
+ if (noLength()) {
+ for (int i = 0; i < currentArrayCount; i++) {
+ if (inPrimitiveNoLengthFieldDeclarator) {
+ printer.print("[]");
+ }
+ }
+ }
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+ }
+
+ public void visit(ArrayInitializerExpr n, LocalSymbolTable arg) {
+ printer.print("{");
+ if (n.getValues() != null) {
+ printer.print(" ");
+ for (Iterator<Expression> i = n.getValues().iterator(); i.hasNext();) {
+ Expression expr = i.next();
+ expr.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ printer.print(" ");
+ }
+ printer.print("}");
+ }
+
+ public void visit(VoidType n, LocalSymbolTable arg) {
+ printer.print("void");
+ }
+
+ public void visit(ArrayAccessExpr n, LocalSymbolTable arg) {
+ n.getName().accept(this, arg);
+ printer.print("[");
+ n.getIndex().accept(this, arg);
+ printer.print("]");
+ }
+
+ public void visit(ArrayCreationExpr n, LocalSymbolTable arg) {
+ // printer.print("new ");
+ // n.getType().accept(this, arg);
+ // printTypeArgs(n.getTypeArgs(), arg);
+
+ if (n.getDimensions() != null) {
+ if (noLength()) {
+ for (Expression dim : n.getDimensions()) {
+ printer.print("new ");
+ n.getType().accept(this, arg);
+ printer.print("[");
+ dim.accept(this, arg);
+ printer.print("]");
+ }
+ } else {
+ for (Expression dim : n.getDimensions()) {
+ printer.print(cppTypes.arrayTemplate());
+ printer.print("<");
+ n.getType().accept(this, arg);
+ printer.print(",");
+ printer.print(cppTypes.intType());
+ printer.print(">::");
+ printer.print(cppTypes.newArrayCreator());
+ printer.print("(");
+ dim.accept(this, arg);
+ printer.print(")");
+ }
+ }
+ if (n.getArrayCount() > 0) {
+ throw new IllegalStateException(
+ "Nested array allocation not supported. "
+ + n.toString());
+ }
+ } else {
+ throw new IllegalStateException(
+ "Array initializer as part of array creation not supported. "
+ + n.toString());
+ }
+ }
+
+ public void visit(AssignExpr n, LocalSymbolTable arg) {
+ if (inConstructorBody) {
+ n.getTarget().accept(this, arg);
+ printer.print("(");
+ n.getValue().accept(this, arg);
+ printer.print(")");
+ } else {
+ n.getTarget().accept(this, arg);
+ printer.print(" ");
+ switch (n.getOperator()) {
+ case assign:
+ printer.print("=");
+ break;
+ case and:
+ printer.print("&=");
+ break;
+ case or:
+ printer.print("|=");
+ break;
+ case xor:
+ printer.print("^=");
+ break;
+ case plus:
+ printer.print("+=");
+ break;
+ case minus:
+ printer.print("-=");
+ break;
+ case rem:
+ printer.print("%=");
+ break;
+ case slash:
+ printer.print("/=");
+ break;
+ case star:
+ printer.print("*=");
+ break;
+ case lShift:
+ printer.print("<<=");
+ break;
+ case rSignedShift:
+ printer.print(">>=");
+ break;
+ case rUnsignedShift:
+ printer.print(">>>=");
+ break;
+ }
+ printer.print(" ");
+ n.getValue().accept(this, arg);
+ }
+ }
+
+ public void visit(BinaryExpr n, LocalSymbolTable arg) {
+ Expression right = n.getRight();
+ switch (n.getOperator()) {
+ case notEquals:
+ if (right instanceof NullLiteralExpr) {
+ printer.print("!!");
+ n.getLeft().accept(this, arg);
+ return;
+ } else if (right instanceof IntegerLiteralExpr) {
+ IntegerLiteralExpr ile = (IntegerLiteralExpr) right;
+ if ("0".equals(ile.getValue())) {
+ n.getLeft().accept(this, arg);
+ return;
+ }
+ }
+ case equals:
+ if (right instanceof NullLiteralExpr) {
+ printer.print("!");
+ n.getLeft().accept(this, arg);
+ return;
+ } else if (right instanceof IntegerLiteralExpr) {
+ IntegerLiteralExpr ile = (IntegerLiteralExpr) right;
+ if ("0".equals(ile.getValue())) {
+ printer.print("!");
+ n.getLeft().accept(this, arg);
+ return;
+ }
+ }
+ default:
+ // fall thru
+ }
+
+ n.getLeft().accept(this, arg);
+ printer.print(" ");
+ switch (n.getOperator()) {
+ case or:
+ printer.print("||");
+ break;
+ case and:
+ printer.print("&&");
+ break;
+ case binOr:
+ printer.print("|");
+ break;
+ case binAnd:
+ printer.print("&");
+ break;
+ case xor:
+ printer.print("^");
+ break;
+ case equals:
+ printer.print("==");
+ break;
+ case notEquals:
+ printer.print("!=");
+ break;
+ case less:
+ printer.print("<");
+ break;
+ case greater:
+ printer.print(">");
+ break;
+ case lessEquals:
+ printer.print("<=");
+ break;
+ case greaterEquals:
+ printer.print(">=");
+ break;
+ case lShift:
+ printer.print("<<");
+ break;
+ case rSignedShift:
+ printer.print(">>");
+ break;
+ case rUnsignedShift:
+ printer.print(">>>");
+ break;
+ case plus:
+ printer.print("+");
+ break;
+ case minus:
+ printer.print("-");
+ break;
+ case times:
+ printer.print("*");
+ break;
+ case divide:
+ printer.print("/");
+ break;
+ case remainder:
+ printer.print("%");
+ break;
+ }
+ printer.print(" ");
+ n.getRight().accept(this, arg);
+ }
+
+ public void visit(CastExpr n, LocalSymbolTable arg) {
+ printer.print("(");
+ n.getType().accept(this, arg);
+ printer.print(") ");
+ n.getExpr().accept(this, arg);
+ }
+
+ public void visit(ClassExpr n, LocalSymbolTable arg) {
+ n.getType().accept(this, arg);
+ printer.print(".class");
+ }
+
+ public void visit(ConditionalExpr n, LocalSymbolTable arg) {
+ n.getCondition().accept(this, arg);
+ printer.print(" ? ");
+ n.getThenExpr().accept(this, arg);
+ printer.print(" : ");
+ n.getElseExpr().accept(this, arg);
+ }
+
+ public void visit(EnclosedExpr n, LocalSymbolTable arg) {
+ printer.print("(");
+ n.getInner().accept(this, arg);
+ printer.print(")");
+ }
+
+ public void visit(FieldAccessExpr n, LocalSymbolTable arg) {
+ Expression scope = n.getScope();
+ String field = n.getField();
+ if (inConstructorBody && (scope instanceof ThisExpr)) {
+ printer.print(field);
+ } else if ("length".equals(field) && !(scope instanceof ThisExpr)) {
+ scope.accept(this, arg);
+ printer.print(".length");
+ } else if ("MAX_VALUE".equals(field)
+ && "Integer".equals(scope.toString())) {
+ printer.print(cppTypes.maxInteger());
+ } else {
+ String clazzName = classNameFromExpression(scope);
+ if (clazzName == null) {
+ if ("DocumentMode".equals(scope.toString())) {
+ // printer.print(cppTypes.documentModeType());
+ // printer.print(".");
+ } else {
+ scope.accept(this, arg);
+ printer.print("->");
+ }
+ } else {
+ String prefixedName = clazzName + "." + field;
+ String constant = symbolTable.cppDefinesByJavaNames.get(prefixedName);
+ if (constant != null) {
+ printer.print(constant);
+ return;
+ } else {
+ printer.print(cppTypes.classPrefix());
+ printer.print(clazzName);
+ printer.print("::");
+ if (symbolTable.isNotAnAttributeOrElementName(field)) {
+ if ("AttributeName".equals(clazzName)) {
+ printer.print("ATTR_");
+ } else if ("ElementName".equals(clazzName)) {
+ printer.print("ELT_");
+ }
+ }
+ }
+ }
+ printer.print(field);
+ }
+ }
+
+ public void visit(InstanceOfExpr n, LocalSymbolTable arg) {
+ n.getExpr().accept(this, arg);
+ printer.print(" instanceof ");
+ n.getType().accept(this, arg);
+ }
+
+ public void visit(CharLiteralExpr n, LocalSymbolTable arg) {
+ printCharLiteral(n.getValue());
+ }
+
+ private void printCharLiteral(String val) {
+ if (val.length() != 1) {
+ printer.print("'");
+ printer.print(val);
+ printer.print("'");
+ return;
+ }
+ char c = val.charAt(0);
+ switch (c) {
+ case 0:
+ printer.print("'\\0'");
+ break;
+ case '\n':
+ printer.print("'\\n'");
+ break;
+ case '\t':
+ printer.print("'\\t'");
+ break;
+ case 0xB:
+ printer.print("'\\v'");
+ break;
+ case '\b':
+ printer.print("'\\b'");
+ break;
+ case '\r':
+ printer.print("'\\r'");
+ break;
+ case 0xC:
+ printer.print("'\\f'");
+ break;
+ case 0x7:
+ printer.print("'\\a'");
+ break;
+ case '\\':
+ printer.print("'\\\\'");
+ break;
+ case '?':
+ printer.print("'\\?'");
+ break;
+ case '\'':
+ printer.print("'\\''");
+ break;
+ case '"':
+ printer.print("'\\\"'");
+ break;
+ default:
+ if (c >= 0x20 && c <= 0x7F) {
+ printer.print("'" + c);
+ printer.print("'");
+ } else {
+ printer.print("0x");
+ printer.print(Integer.toHexString(c));
+ }
+ break;
+ }
+ }
+
+ public void visit(DoubleLiteralExpr n, LocalSymbolTable arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(IntegerLiteralExpr n, LocalSymbolTable arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(LongLiteralExpr n, LocalSymbolTable arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(IntegerLiteralMinValueExpr n, LocalSymbolTable arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(LongLiteralMinValueExpr n, LocalSymbolTable arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(StringLiteralExpr n, LocalSymbolTable arg) {
+ String val = n.getValue();
+ if ("http://www.w3.org/1999/xhtml".equals(val)) {
+ printer.print(cppTypes.xhtmlNamespaceLiteral());
+ } else if ("http://www.w3.org/2000/svg".equals(val)) {
+ printer.print(cppTypes.svgNamespaceLiteral());
+ } else if ("http://www.w3.org/2000/xmlns/".equals(val)) {
+ printer.print(cppTypes.xmlnsNamespaceLiteral());
+ } else if ("http://www.w3.org/XML/1998/namespace".equals(val)) {
+ printer.print(cppTypes.xmlNamespaceLiteral());
+ } else if ("http://www.w3.org/1999/xlink".equals(val)) {
+ printer.print(cppTypes.xlinkNamespaceLiteral());
+ } else if ("http://www.w3.org/1998/Math/MathML".equals(val)) {
+ printer.print(cppTypes.mathmlNamespaceLiteral());
+ } else if ("".equals(val) && "AttributeName".equals(javaClassName)) {
+ printer.print(cppTypes.noNamespaceLiteral());
+ } else if (val.startsWith("-/") || val.startsWith("+//")
+ || val.startsWith("http://") || val.startsWith("XSLT")) {
+ printer.print(cppTypes.stringForLiteral(val));
+ } else if (("hidden".equals(val) || "isindex".equals(val)
+ || "text/html".equals(val)
+ || "application/xhtml+xml".equals(val) || "content-type".equals(val))
+ && "TreeBuilder".equals(javaClassName)) {
+ printer.print(cppTypes.stringForLiteral(val));
+ } else if ("isQuirky".equals(currentMethod) && "html".equals(val)) {
+ printer.print(cppTypes.stringForLiteral(val));
+ } else {
+ printer.print(cppTypes.localForLiteral(val));
+ }
+ }
+
+ public void visit(BooleanLiteralExpr n, LocalSymbolTable arg) {
+ if (n.getValue()) {
+ printer.print(cppTypes.trueLiteral());
+ } else {
+ printer.print(cppTypes.falseLiteral());
+ }
+ }
+
+ public void visit(NullLiteralExpr n, LocalSymbolTable arg) {
+ printer.print(cppTypes.nullLiteral());
+ }
+
+ public void visit(ThisExpr n, LocalSymbolTable arg) {
+ if (n.getClassExpr() != null) {
+ n.getClassExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print("this");
+ }
+
+ public void visit(SuperExpr n, LocalSymbolTable arg) {
+ if (n.getClassExpr() != null) {
+ n.getClassExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print("super");
+ }
+
+ public void visit(MethodCallExpr n, LocalSymbolTable arg) {
+ if ("releaseArray".equals(n.getName())
+ && "Portability".equals(n.getScope().toString())) {
+ n.getArgs().get(0).accept(this, arg);
+ printer.print(".release()");
+ } else if ("deleteArray".equals(n.getName())
+ && "Portability".equals(n.getScope().toString())) {
+ printer.print("delete[] ");
+ n.getArgs().get(0).accept(this, arg);
+ } else if ("delete".equals(n.getName())
+ && "Portability".equals(n.getScope().toString())) {
+ printer.print("delete ");
+ n.getArgs().get(0).accept(this, arg);
+ } else if (("retainElement".equals(n.getName()) || "releaseElement".equals(n.getName()))
+ && "Portability".equals(n.getScope().toString())) {
+ // ignore for now
+ } else if ("transition".equals(n.getName())
+ && n.getScope() == null) {
+ visitTransition(n, arg);
+ } else if ("arraycopy".equals(n.getName())
+ && "System".equals(n.getScope().toString())) {
+ printer.print(cppTypes.arrayCopy());
+ printer.print("(");
+ if (n.getArgs().get(0).toString().equals(
+ n.getArgs().get(2).toString())) {
+ n.getArgs().get(0).accept(this, arg);
+ printer.print(", ");
+ n.getArgs().get(1).accept(this, arg);
+ printer.print(", ");
+ n.getArgs().get(3).accept(this, arg);
+ printer.print(", ");
+ n.getArgs().get(4).accept(this, arg);
+ } else if (n.getArgs().get(1).toString().equals("0")
+ && n.getArgs().get(3).toString().equals("0")) {
+ n.getArgs().get(0).accept(this, arg);
+ printer.print(", ");
+ n.getArgs().get(2).accept(this, arg);
+ printer.print(", ");
+ n.getArgs().get(4).accept(this, arg);
+ } else {
+ for (Iterator<Expression> i = n.getArgs().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+ } else if ("binarySearch".equals(n.getName())
+ && "Arrays".equals(n.getScope().toString())) {
+ n.getArgs().get(0).accept(this, arg);
+ printer.print(".binarySearch(");
+ n.getArgs().get(1).accept(this, arg);
+ printer.print(")");
+ } else {
+ Expression scope = n.getScope();
+ if (scope != null) {
+ if (scope instanceof StringLiteralExpr) {
+ StringLiteralExpr strLit = (StringLiteralExpr) scope;
+ String str = strLit.getValue();
+ if (!"toCharArray".equals(n.getName())) {
+ throw new IllegalStateException(
+ "Unsupported method call on string literal: "
+ + n.getName());
+ }
+ printer.print("{ ");
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (i != 0) {
+ printer.print(", ");
+ }
+ printCharLiteral("" + c);
+ }
+ printer.print(" }");
+ return;
+ } else {
+ String clazzName = classNameFromExpression(scope);
+ if (clazzName == null) {
+ scope.accept(this, arg);
+ if ("length".equals(n.getName())
+ || "charAt".equals(n.getName())) {
+ printer.print(".");
+ } else {
+ printer.print("->");
+ }
+ } else {
+ printer.print(cppTypes.classPrefix());
+ printer.print(clazzName);
+ printer.print("::");
+ }
+ }
+ }
+ printTypeArgs(n.getTypeArgs(), arg);
+ printer.print(n.getName());
+ if ("stateLoop".equals(n.getName())
+ && "Tokenizer".equals(javaClassName)
+ && cppTypes.stateLoopPolicies().length > 0) {
+ printer.print("<");
+ printer.print(cppTypes.stateLoopPolicies()[stateLoopCallCount]);
+ printer.print(">");
+ stateLoopCallCount++;
+ }
+ printer.print("(");
+ if (n.getArgs() != null) {
+ for (Iterator<Expression> i = n.getArgs().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+ }
+ }
+
+ public void visit(ObjectCreationExpr n, LocalSymbolTable arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ }
+
+ printer.print("new ");
+
+ suppressPointer = true;
+ printTypeArgs(n.getTypeArgs(), arg);
+ if ("createAttributeName".equals(currentMethod)
+ || "elementNameByBuffer".equals(currentMethod)) {
+ printer.print(cppTypes.classPrefix());
+ printer.print("Releasable");
+ printer.print(n.getType().getName());
+ } else {
+ n.getType().accept(this, arg);
+ }
+ suppressPointer = false;
+
+ if ("AttributeName".equals(n.getType().getName())) {
+ List<Expression> args = n.getArgs();
+ while (args.size() > 3) {
+ args.remove(3);
+ }
+ }
+
+ printer.print("(");
+ if (n.getArgs() != null) {
+ for (Iterator<Expression> i = n.getArgs().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+
+ if (n.getAnonymousClassBody() != null) {
+ printer.printLn(" {");
+ printer.indent();
+ printMembers(n.getAnonymousClassBody(), arg);
+ printer.unindent();
+ printer.print("}");
+ }
+ }
+
+ public void visit(UnaryExpr n, LocalSymbolTable arg) {
+ switch (n.getOperator()) {
+ case positive:
+ printer.print("+");
+ break;
+ case negative:
+ printer.print("-");
+ break;
+ case inverse:
+ printer.print("~");
+ break;
+ case not:
+ printer.print("!");
+ break;
+ case preIncrement:
+ printer.print("++");
+ break;
+ case preDecrement:
+ printer.print("--");
+ break;
+ }
+
+ n.getExpr().accept(this, arg);
+
+ switch (n.getOperator()) {
+ case posIncrement:
+ printer.print("++");
+ break;
+ case posDecrement:
+ printer.print("--");
+ break;
+ }
+ }
+
+ public void visit(ConstructorDeclaration n, LocalSymbolTable arg) {
+ if ("TreeBuilder".equals(javaClassName)) {
+ return;
+ }
+
+ arg = new LocalSymbolTable(javaClassName, symbolTable);
+
+ // if (n.getJavaDoc() != null) {
+ // n.getJavaDoc().accept(this, arg);
+ // }
+ currentAnnotations = n.getAnnotations();
+
+ printModifiers(n.getModifiers());
+
+ printMethodNamespace();
+ printConstructorExplicit(n.getParameters());
+ printer.print(className);
+ currentAnnotations = null;
+
+ printer.print("(");
+ if (n.getParameters() != null) {
+ for (Iterator<Parameter> i = n.getParameters().iterator(); i.hasNext();) {
+ Parameter p = i.next();
+ p.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+
+ printConstructorBody(n.getBlock(), arg);
+ }
+
+ protected void printConstructorExplicit(List<Parameter> params) {
+ }
+
+ protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) {
+ inConstructorBody = true;
+ List<Statement> statements = block.getStmts();
+ List<Statement> nonAssigns = new LinkedList<Statement>();
+ int i = 0;
+ boolean needOutdent = false;
+ for (Statement statement : statements) {
+ if (statement instanceof ExpressionStmt
+ && ((ExpressionStmt) statement).getExpression() instanceof AssignExpr) {
+ if (i == 0) {
+ printer.printLn();
+ printer.indent();
+ printer.print(": ");
+ needOutdent = true;
+ } else {
+ printer.print(",");
+ printer.printLn();
+ printer.print(" ");
+ }
+ statement.accept(this, arg);
+ i++;
+ } else {
+ nonAssigns.add(statement);
+ }
+ }
+ if (needOutdent) {
+ printer.unindent();
+ }
+ inConstructorBody = false;
+ printer.printLn();
+ printer.printLn("{");
+ printer.indent();
+ String boilerplate = cppTypes.constructorBoilerplate(className);
+ if (boilerplate != null) {
+ printer.printLn(boilerplate);
+ }
+ for (Statement statement : nonAssigns) {
+ statement.accept(this, arg);
+ printer.printLn();
+ }
+ printer.unindent();
+ printer.printLn("}");
+ printer.printLn();
+ }
+
+ public void visit(MethodDeclaration n, LocalSymbolTable arg) {
+ arg = new LocalSymbolTable(javaClassName, symbolTable);
+ if (isPrintableMethod(n.getModifiers())
+ && !(n.getName().equals("endCoalescing") || n.getName().equals(
+ "startCoalescing"))) {
+ printMethodDeclaration(n, arg);
+ }
+ }
+
+ private boolean isPrintableMethod(int modifiers) {
+ return !(ModifierSet.isAbstract(modifiers) || (ModifierSet.isProtected(modifiers) && !(ModifierSet.isFinal(modifiers) || "Tokenizer".equals(javaClassName))));
+ }
+
+ protected void printMethodDeclaration(MethodDeclaration n,
+ LocalSymbolTable arg) {
+ if (n.getName().startsWith("fatal") || n.getName().startsWith("err")
+ || n.getName().startsWith("warn")
+ || n.getName().startsWith("maybeErr")
+ || n.getName().startsWith("maybeWarn")
+ || n.getName().startsWith("note")
+ || "releaseArray".equals(n.getName())
+ || "deleteArray".equals(n.getName())
+ || "delete".equals(n.getName())) {
+ return;
+ }
+
+ currentMethod = n.getName();
+
+ destructor = "destructor".equals(currentMethod);
+
+ // if (n.getJavaDoc() != null) {
+ // n.getJavaDoc().accept(this, arg);
+ // }
+ currentAnnotations = n.getAnnotations();
+ boolean isInline = inline();
+ if (isInline && !inHeader()) {
+ return;
+ }
+
+ if (destructor) {
+ printModifiers(ModifierSet.PUBLIC);
+ } else {
+ printModifiers(n.getModifiers());
+ }
+
+ if ("stateLoop".equals(currentMethod)
+ && "Tokenizer".equals(javaClassName)
+ && cppTypes.stateLoopPolicies().length > 0) {
+ printer.print("template<class P>");
+ if (inHeader()) {
+ printer.print(" ");
+ } else {
+ printer.printLn();
+ }
+ }
+
+ printTypeParameters(n.getTypeParameters(), arg);
+ if (n.getTypeParameters() != null) {
+ printer.print(" ");
+ }
+ if (!destructor) {
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ }
+ printMethodNamespace();
+ if (destructor) {
+ printer.print("~");
+ printer.print(className);
+ } else {
+ printer.print(n.getName());
+ }
+
+ currentAnnotations = null;
+ printer.print("(");
+ if (n.getParameters() != null) {
+ for (Iterator<Parameter> i = n.getParameters().iterator(); i.hasNext();) {
+ Parameter p = i.next();
+ p.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+
+ if (inHeader() == isInline) {
+ printMethodBody(n.getBody(), arg);
+ } else {
+ printer.printLn(";");
+ }
+ }
+
+ private void printMethodBody(BlockStmt n, LocalSymbolTable arg) {
+ if (n == null) {
+ printer.print(";");
+ } else {
+ printer.printLn();
+ printer.printLn("{");
+ printer.indent();
+ if (destructor) {
+ String boilerplate = cppTypes.destructorBoilderplate(className);
+ if (boilerplate != null) {
+ printer.printLn(boilerplate);
+ }
+ }
+ if (n.getStmts() != null) {
+ for (Statement s : n.getStmts()) {
+ s.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ printer.unindent();
+ printer.print("}");
+ }
+ printer.printLn();
+ printer.printLn();
+ }
+
+ protected void printMethodNamespace() {
+ printer.printLn();
+ printer.print(className);
+ printer.print("::");
+ }
+
+ public void visit(Parameter n, LocalSymbolTable arg) {
+ currentAnnotations = n.getAnnotations();
+
+ arg.putLocalType(n.getId().getName(), convertType(n.getType(),
+ n.getModifiers()));
+
+ n.getType().accept(this, arg);
+ if (n.isVarArgs()) {
+ printer.print("...");
+ }
+ printer.print(" ");
+ n.getId().accept(this, arg);
+ currentAnnotations = null;
+ }
+
+ public void visit(ExplicitConstructorInvocationStmt n, LocalSymbolTable arg) {
+ if (n.isThis()) {
+ printTypeArgs(n.getTypeArgs(), arg);
+ printer.print("this");
+ } else {
+ if (n.getExpr() != null) {
+ n.getExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printTypeArgs(n.getTypeArgs(), arg);
+ printer.print("super");
+ }
+ printer.print("(");
+ if (n.getArgs() != null) {
+ for (Iterator<Expression> i = n.getArgs().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(");");
+ }
+
+ public void visit(VariableDeclarationExpr n, LocalSymbolTable arg) {
+ currentAnnotations = n.getAnnotations();
+
+ arg.putLocalType(n.getVars().get(0).toString(), convertType(
+ n.getType(), n.getModifiers()));
+
+ n.getType().accept(this, arg);
+ printer.print(" ");
+
+ for (Iterator<VariableDeclarator> i = n.getVars().iterator(); i.hasNext();) {
+ VariableDeclarator v = i.next();
+ v.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ currentAnnotations = null;
+ }
+
+ public void visit(TypeDeclarationStmt n, LocalSymbolTable arg) {
+ n.getTypeDeclaration().accept(this, arg);
+ }
+
+ public void visit(AssertStmt n, LocalSymbolTable arg) {
+ String message = null;
+ Expression msg = n.getMessage();
+ boolean hasCheck = true;
+ if (msg != null) {
+ if (msg instanceof StringLiteralExpr) {
+ StringLiteralExpr sle = (StringLiteralExpr) msg;
+ message = sle.getValue();
+ } else {
+ throw new RuntimeException("Bad assertion message.");
+ }
+ }
+ String macro = cppTypes.assertionMacro();
+ if (message != null && message.startsWith("RELEASE: ")) {
+ message = message.substring("RELEASE: ".length());
+ macro = cppTypes.releaseAssertionMacro();
+ Expression check = n.getCheck();
+ if (check instanceof BooleanLiteralExpr) {
+ BooleanLiteralExpr expr = (BooleanLiteralExpr) check;
+ if (!expr.getValue()) {
+ hasCheck = false;
+ macro = cppTypes.crashMacro();
+ }
+ }
+ }
+ if (macro != null) {
+ printer.print(macro);
+ printer.print("(");
+ if (hasCheck) {
+ n.getCheck().accept(this, arg);
+ }
+ if (message != null) {
+ if (hasCheck) {
+ printer.print(", ");
+ }
+ printer.print("\"");
+ for (int i = 0; i < message.length(); i++) {
+ char c = message.charAt(i);
+ if (c == '"') {
+ printer.print("\"");
+ } else if (c >= ' ' && c <= '~') {
+ printer.print("" + c);
+ } else {
+ throw new RuntimeException("Bad assertion message string.");
+ }
+ }
+ printer.print("\"");
+ }
+ printer.print(");");
+ }
+ }
+
+ public void visit(BlockStmt n, LocalSymbolTable arg) {
+ printer.printLn("{");
+ if (n.getStmts() != null) {
+ printer.indent();
+ for (Statement s : n.getStmts()) {
+ s.accept(this, arg);
+ printer.printLn();
+ }
+ printer.unindent();
+ }
+ printer.print("}");
+
+ }
+
+ public void visit(LabeledStmt n, LocalSymbolTable arg) {
+ // Only conditionless for loops are needed and supported
+ // Not implementing general Java continue semantics in order
+ // to keep the generated C++ more readable.
+ Statement stmt = n.getStmt();
+ if (stmt instanceof ForStmt) {
+ ForStmt forLoop = (ForStmt) stmt;
+ if (!(forLoop.getInit() == null && forLoop.getCompare() == null && forLoop.getUpdate() == null)) {
+ forLoopsWithCondition.add(n.getLabel());
+ }
+ } else {
+ throw new IllegalStateException(
+ "Only for loop supported as labeled statement. Line: "
+ + n.getBeginLine());
+ }
+ String label = n.getLabel();
+ if (labels.contains(label)) {
+ printer.print(label);
+ printer.print(": ");
+ }
+ stmt.accept(this, arg);
+ printer.printLn();
+ label += "_end";
+ if (labels.contains(label)) {
+ printer.print(label);
+ printer.print(": ;");
+ }
+ }
+
+ public void visit(EmptyStmt n, LocalSymbolTable arg) {
+ printer.print(";");
+ }
+
+ public void visit(ExpressionStmt n, LocalSymbolTable arg) {
+ Expression e = n.getExpression();
+ if (isCompletedCharacterReference(e)) {
+ printer.print(cppTypes.completedCharacterReference());
+ printer.print(";");
+ return;
+ }
+ boolean needsCondition = isTokenizerErrorReportingExpression(e);
+ if (!needsCondition && isDroppedExpression(e)) {
+ return;
+ }
+ if (needsCondition) {
+ printer.print("if (");
+ printer.print(cppTypes.tokenizerErrorCondition());
+ printer.printLn(") {");
+ printer.indent();
+ }
+ e.accept(this, arg);
+ if (!inConstructorBody) {
+ printer.print(";");
+ }
+ if (needsCondition) {
+ printer.printLn();
+ printer.unindent();
+ printer.print("}");
+ }
+ }
+
+ private void visitTransition(MethodCallExpr call, LocalSymbolTable arg) {
+ List<Expression> args = call.getArgs();
+ if (reportTransitions) {
+ printer.print(cppTypes.transition());
+ printer.print("(");
+ printer.print(cppTypes.firstTransitionArg());
+ printer.print(", ");
+ args.get(1).accept(this, arg);
+ printer.print(", ");
+ args.get(2).accept(this, arg);
+ printer.print(", ");
+ args.get(3).accept(this, arg);
+ printer.print(")");
+ } else {
+ args.get(1).accept(this, arg);
+ }
+ }
+
+ private boolean isTokenizerErrorReportingExpression(Expression e) {
+ if (!reportTransitions) {
+ return false;
+ }
+ if (e instanceof MethodCallExpr) {
+ MethodCallExpr methodCallExpr = (MethodCallExpr) e;
+ String name = methodCallExpr.getName();
+ if (supportErrorReporting && !name.startsWith("errHtml4")
+ && ("stateLoop".equals(currentMethod))
+ && (name.startsWith("err") || name.startsWith("maybeErr"))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean isCompletedCharacterReference(Expression e) {
+ if (!reportTransitions) {
+ return false;
+ }
+ if (e instanceof MethodCallExpr) {
+ MethodCallExpr methodCallExpr = (MethodCallExpr) e;
+ String name = methodCallExpr.getName();
+ if (name.equals("completedNamedCharacterReference")) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean isDroppedExpression(Expression e) {
+ if (e instanceof MethodCallExpr) {
+ MethodCallExpr methodCallExpr = (MethodCallExpr) e;
+ String name = methodCallExpr.getName();
+ if (name.startsWith("fatal") || name.startsWith("note")
+ || name.startsWith("errHtml4") || name.startsWith("warn")
+ || name.startsWith("maybeWarn")) {
+ return true;
+ }
+ if (supportErrorReporting
+ && ("stateLoop".equals(currentMethod) && !reportTransitions)
+ && (name.startsWith("err") || name.startsWith("maybeErr"))) {
+ return true;
+ }
+ if (name.equals("completedNamedCharacterReference")
+ && !reportTransitions) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void visit(SwitchStmt n, LocalSymbolTable arg) {
+ printer.print("switch(");
+ n.getSelector().accept(this, arg);
+ printer.printLn(") {");
+ if (n.getEntries() != null) {
+ printer.indent();
+ for (SwitchEntryStmt e : n.getEntries()) {
+ e.accept(this, arg);
+ }
+ printer.unindent();
+ }
+ printer.print("}");
+
+ }
+
+ public void visit(SwitchEntryStmt n, LocalSymbolTable arg) {
+ if (n.getLabel() != null) {
+ boolean isMenuitem = n.getLabel().toString().equals("MENUITEM");
+ if (isMenuitem) {
+ printer.printWithoutIndent("#ifdef ENABLE_VOID_MENUITEM\n");
+ }
+ printer.print("case ");
+ n.getLabel().accept(this, arg);
+ printer.print(":");
+ if (isMenuitem) {
+ printer.printWithoutIndent("\n#endif");
+ }
+ } else {
+ printer.print("default:");
+ }
+ if (isNoStatement(n.getStmts())) {
+ printer.printLn();
+ printer.indent();
+ if (n.getLabel() == null) {
+ printer.printLn("; // fall through");
+ }
+ printer.unindent();
+ } else {
+ printer.printLn(" {");
+ printer.indent();
+ for (Statement s : n.getStmts()) {
+ s.accept(this, arg);
+ printer.printLn();
+ }
+ printer.unindent();
+ printer.printLn("}");
+ }
+ }
+
+ private boolean isNoStatement(List<Statement> stmts) {
+ if (stmts == null) {
+ return true;
+ }
+ for (Statement statement : stmts) {
+ if (!isDroppableStatement(statement)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean isDroppableStatement(Statement statement) {
+ if (statement instanceof AssertStmt) {
+ return true;
+ } else if (statement instanceof ExpressionStmt) {
+ ExpressionStmt es = (ExpressionStmt) statement;
+ if (isDroppedExpression(es.getExpression())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void visit(BreakStmt n, LocalSymbolTable arg) {
+ if (n.getId() != null) {
+ printer.print(cppTypes.breakMacro());
+ printer.print("(");
+ printer.print(n.getId());
+ printer.print(")");
+ } else {
+ printer.print("break");
+ }
+ printer.print(";");
+ }
+
+ public void visit(ReturnStmt n, LocalSymbolTable arg) {
+ printer.print("return");
+ if (n.getExpr() != null) {
+ printer.print(" ");
+ n.getExpr().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(EnumDeclaration n, LocalSymbolTable arg) {
+ if (n.getJavaDoc() != null) {
+ n.getJavaDoc().accept(this, arg);
+ }
+ currentAnnotations = n.getAnnotations();
+ // if (annotations != null) {
+ // for (AnnotationExpr a : annotations) {
+ // a.accept(this, arg);
+ // printer.printLn();
+ // }
+ // }
+ printModifiers(n.getModifiers());
+
+ printer.print("enum ");
+ printer.print(n.getName());
+
+ currentAnnotations = null;
+
+ if (n.getImplements() != null) {
+ printer.print(" implements ");
+ for (Iterator<ClassOrInterfaceType> i = n.getImplements().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getEntries() != null) {
+ printer.printLn();
+ for (Iterator<EnumConstantDeclaration> i = n.getEntries().iterator(); i.hasNext();) {
+ EnumConstantDeclaration e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ if (n.getMembers() != null) {
+ printer.printLn(";");
+ printMembers(n.getMembers(), arg);
+ } else {
+ if (n.getEntries() != null) {
+ printer.printLn();
+ }
+ }
+ printer.unindent();
+ printer.print("}");
+ }
+
+ public void visit(EnumConstantDeclaration n, LocalSymbolTable arg) {
+ if (n.getJavaDoc() != null) {
+ n.getJavaDoc().accept(this, arg);
+ }
+ currentAnnotations = n.getAnnotations();
+ // if (annotations != null) {
+ // for (AnnotationExpr a : annotations) {
+ // a.accept(this, arg);
+ // printer.printLn();
+ // }
+ // }
+ printer.print(n.getName());
+
+ currentAnnotations = null;
+
+ if (n.getArgs() != null) {
+ printer.print("(");
+ for (Iterator<Expression> i = n.getArgs().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ printer.print(")");
+ }
+
+ if (n.getClassBody() != null) {
+ printer.printLn(" {");
+ printer.indent();
+ printMembers(n.getClassBody(), arg);
+ printer.unindent();
+ printer.printLn("}");
+ }
+ }
+
+ public void visit(EmptyMemberDeclaration n, LocalSymbolTable arg) {
+ if (n.getJavaDoc() != null) {
+ n.getJavaDoc().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(InitializerDeclaration n, LocalSymbolTable arg) {
+ if (n.getJavaDoc() != null) {
+ n.getJavaDoc().accept(this, arg);
+ }
+ if (n.isStatic()) {
+ printer.print("static ");
+ }
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(IfStmt n, LocalSymbolTable arg) {
+ if (TranslatorUtils.isDocumentModeHandlerNullCheck(n.getCondition())) {
+ Statement then = n.getThenStmt();
+ if (then instanceof BlockStmt) {
+ BlockStmt block = (BlockStmt) then;
+ List<Statement> statements = block.getStmts();
+ if (statements != null && statements.size() == 1) {
+ statements.get(0).accept(this, arg);
+ } else {
+ then.accept(this, arg);
+ }
+ } else {
+ then.accept(this, arg);
+ }
+ } else if (!TranslatorUtils.isErrorHandlerIf(n.getCondition(), supportErrorReporting)) {
+ if (TranslatorUtils.isErrorOnlyBlock(n.getThenStmt(), supportErrorReporting)) {
+ if (n.getElseStmt() != null
+ && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) {
+ printer.print("if (");
+ if (n.getCondition() instanceof BinaryExpr) {
+ BinaryExpr binExpr = (BinaryExpr) n.getCondition();
+ switch (binExpr.getOperator()) {
+ case equals:
+ binExpr.getLeft().accept(this, arg);
+ printer.print(" != ");
+ binExpr.getRight().accept(this, arg);
+ break;
+ case notEquals:
+ binExpr.getLeft().accept(this, arg);
+ printer.print(" == ");
+ binExpr.getRight().accept(this, arg);
+ break;
+ default:
+ printer.print("!(");
+ formatCondition(n.getCondition(), arg);
+ printer.print(")");
+ break;
+ }
+ } else {
+ printer.print("!(");
+ formatCondition(n.getCondition(), arg);
+ printer.print(")");
+ }
+ printer.print(") ");
+ n.getElseStmt().accept(this, arg);
+ }
+ } else {
+ boolean unlikely = (currentMethod != null)
+ && (Arrays.binarySearch(
+ METHODS_WITH_UNLIKELY_CONDITIONS,
+ currentMethod) >= 0);
+ printer.print("if (");
+ if (unlikely) {
+ printer.print(cppTypes.unlikely());
+ printer.print("(");
+ }
+ formatCondition(n.getCondition(), arg);
+ if (unlikely) {
+ printer.print(")");
+ }
+ printer.print(") ");
+ n.getThenStmt().accept(this, arg);
+ if (n.getElseStmt() != null
+ && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) {
+ printer.print(" else ");
+ n.getElseStmt().accept(this, arg);
+ }
+ }
+ }
+ }
+
+ private void formatCondition(Expression expr, LocalSymbolTable arg) {
+ if (expr instanceof BinaryExpr) {
+ BinaryExpr binExpr = (BinaryExpr) expr;
+ switch (binExpr.getOperator()) {
+ case notEquals:
+ if (binExpr.getRight() instanceof NullLiteralExpr) {
+ binExpr.getLeft().accept(this, arg);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ expr.accept(this, arg);
+ }
+
+
+ public void visit(WhileStmt n, LocalSymbolTable arg) {
+ printer.print("while (");
+ n.getCondition().accept(this, arg);
+ printer.print(") ");
+ n.getBody().accept(this, arg);
+ }
+
+ public void visit(ContinueStmt n, LocalSymbolTable arg) {
+ // Not supporting the general Java continue semantics.
+ // Instead, making the generated code more readable for the
+ // case at hand.
+ if (n.getId() != null) {
+ printer.print(cppTypes.continueMacro());
+ printer.print("(");
+ printer.print(n.getId());
+ printer.print(")");
+ if (forLoopsWithCondition.contains(n.getId())) {
+ throw new IllegalStateException(
+ "Continue attempted with a loop that has a condition. "
+ + className + " " + n.getId());
+ }
+ } else {
+ printer.print("continue");
+ }
+ printer.print(";");
+ }
+
+ public void visit(DoStmt n, LocalSymbolTable arg) {
+ printer.print("do ");
+ n.getBody().accept(this, arg);
+ printer.print(" while (");
+ n.getCondition().accept(this, arg);
+ printer.print(");");
+ }
+
+ public void visit(ForeachStmt n, LocalSymbolTable arg) {
+ printer.print("for (");
+ n.getVariable().accept(this, arg);
+ printer.print(" : ");
+ n.getIterable().accept(this, arg);
+ printer.print(") ");
+ n.getBody().accept(this, arg);
+ }
+
+ public void visit(ForStmt n, LocalSymbolTable arg) {
+ printer.print("for (");
+ if (n.getInit() != null) {
+ for (Iterator<Expression> i = n.getInit().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print("; ");
+ if (n.getCompare() != null) {
+ n.getCompare().accept(this, arg);
+ }
+ printer.print("; ");
+ if (n.getUpdate() != null) {
+ for (Iterator<Expression> i = n.getUpdate().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(") ");
+ n.getBody().accept(this, arg);
+ }
+
+ public void visit(ThrowStmt n, LocalSymbolTable arg) {
+ printer.print("throw ");
+ n.getExpr().accept(this, arg);
+ printer.print(";");
+ }
+
+ public void visit(SynchronizedStmt n, LocalSymbolTable arg) {
+ printer.print("synchronized (");
+ n.getExpr().accept(this, arg);
+ printer.print(") ");
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(TryStmt n, LocalSymbolTable arg) {
+ printer.print("try ");
+ n.getTryBlock().accept(this, arg);
+ if (n.getCatchs() != null) {
+ for (CatchClause c : n.getCatchs()) {
+ c.accept(this, arg);
+ }
+ }
+ if (n.getFinallyBlock() != null) {
+ printer.print(" finally ");
+ n.getFinallyBlock().accept(this, arg);
+ }
+ }
+
+ public void visit(CatchClause n, LocalSymbolTable arg) {
+ printer.print(" catch (");
+ n.getExcept().accept(this, arg);
+ printer.print(") ");
+ n.getCatchBlock().accept(this, arg);
+
+ }
+
+ public void visit(AnnotationDeclaration n, LocalSymbolTable arg) {
+ if (n.getJavaDoc() != null) {
+ n.getJavaDoc().accept(this, arg);
+ }
+ currentAnnotations = n.getAnnotations();
+ // if (annotations != null) {
+ // for (AnnotationExpr a : annotations) {
+ // a.accept(this, arg);
+ // printer.printLn();
+ // }
+ // }
+ printModifiers(n.getModifiers());
+
+ printer.print("@interface ");
+ printer.print(n.getName());
+ currentAnnotations = null;
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getMembers() != null) {
+ printMembers(n.getMembers(), arg);
+ }
+ printer.unindent();
+ printer.print("}");
+ }
+
+ public void visit(AnnotationMemberDeclaration n, LocalSymbolTable arg) {
+ if (n.getJavaDoc() != null) {
+ n.getJavaDoc().accept(this, arg);
+ }
+ currentAnnotations = n.getAnnotations();
+ // if (annotations != null) {
+ // for (AnnotationExpr a : annotations) {
+ // a.accept(this, arg);
+ // printer.printLn();
+ // }
+ // }
+ printModifiers(n.getModifiers());
+
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ printer.print(n.getName());
+ currentAnnotations = null;
+ printer.print("()");
+ if (n.getDefaultValue() != null) {
+ printer.print(" default ");
+ n.getDefaultValue().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(MarkerAnnotationExpr n, LocalSymbolTable arg) {
+ printer.print("@");
+ n.getName().accept(this, arg);
+ }
+
+ public void visit(SingleMemberAnnotationExpr n, LocalSymbolTable arg) {
+ printer.print("@");
+ n.getName().accept(this, arg);
+ printer.print("(");
+ n.getMemberValue().accept(this, arg);
+ printer.print(")");
+ }
+
+ public void visit(NormalAnnotationExpr n, LocalSymbolTable arg) {
+ printer.print("@");
+ n.getName().accept(this, arg);
+ printer.print("(");
+ if (n.getPairs() != null) {
+ for (Iterator<MemberValuePair> i = n.getPairs().iterator(); i.hasNext();) {
+ MemberValuePair m = i.next();
+ m.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+ }
+
+ public void visit(MemberValuePair n, LocalSymbolTable arg) {
+ printer.print(n.getName());
+ printer.print(" = ");
+ n.getValue().accept(this, arg);
+ }
+
+ public void visit(LineComment n, LocalSymbolTable arg) {
+ printer.print("//");
+ printer.printLn(n.getContent());
+ }
+
+ public void visit(BlockComment n, LocalSymbolTable arg) {
+ printer.print("/*");
+ printer.print(n.getContent());
+ printer.printLn("*/");
+ }
+
+ public void setLabels(Set<String> labels) {
+ this.labels = labels;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java
new file mode 100644
index 000000000..3d642c0e0
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java
@@ -0,0 +1,70 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class GkAtomParser {
+
+ private static final Pattern ATOM = Pattern.compile("^GK_ATOM\\(([^,]+),\\s*\"([^\"]*)\"\\).*$");
+
+ private final BufferedReader reader;
+
+ public GkAtomParser(Reader reader) {
+ this.reader = new BufferedReader(reader);
+ }
+
+ public Map<String, String> parse() throws IOException {
+ Map<String, String> map = new HashMap<String, String>();
+ String line;
+ while((line = reader.readLine()) != null) {
+ Matcher m = ATOM.matcher(line);
+ if (m.matches()) {
+ map.put(m.group(2), m.group(1));
+ }
+ }
+ return map;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java
new file mode 100644
index 000000000..25cf7aef1
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java
@@ -0,0 +1,306 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import japa.parser.ast.body.FieldDeclaration;
+import japa.parser.ast.body.MethodDeclaration;
+import japa.parser.ast.body.ModifierSet;
+import japa.parser.ast.body.Parameter;
+import japa.parser.ast.body.VariableDeclarator;
+import japa.parser.ast.expr.IntegerLiteralExpr;
+import japa.parser.ast.expr.MethodCallExpr;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.type.PrimitiveType;
+import japa.parser.ast.type.ReferenceType;
+import japa.parser.ast.type.Type;
+
+public class HVisitor extends CppVisitor {
+
+ private enum Visibility {
+ NONE, PRIVATE, PUBLIC, PROTECTED,
+ }
+
+ private Visibility previousVisibility = Visibility.NONE;
+
+ private List<String> defines = new LinkedList<String>();
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printMethodNamespace()
+ */
+ @Override protected void printMethodNamespace() {
+ }
+
+ public HVisitor(CppTypes cppTypes, SymbolTable symbolTable) {
+ super(cppTypes, symbolTable);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#startClassDeclaration()
+ */
+ @Override protected void startClassDeclaration() {
+ printer.print("#ifndef ");
+ printer.print(className);
+ printer.printLn("_h");
+ printer.print("#define ");
+ printer.print(className);
+ printer.printLn("_h");
+
+ printer.printLn();
+
+ String[] incs = cppTypes.boilerplateIncludes(javaClassName);
+ for (int i = 0; i < incs.length; i++) {
+ String inc = incs[i];
+ if (className.equals(inc)) {
+ continue;
+ }
+ printer.print("#include \"");
+ printer.print(inc);
+ printer.printLn(".h\"");
+ }
+
+ printer.printLn();
+
+ String[] forwDecls = cppTypes.boilerplateForwardDeclarations();
+ for (int i = 0; i < forwDecls.length; i++) {
+ String decl = forwDecls[i];
+ printer.print("class ");
+ printer.print(decl);
+ printer.printLn(";");
+ }
+
+ printer.printLn();
+
+ for (int i = 0; i < Main.H_LIST.length; i++) {
+ String klazz = Main.H_LIST[i];
+ if (!(klazz.equals(javaClassName) || klazz.equals("StackNode"))) {
+ printer.print("class ");
+ printer.print(cppTypes.classPrefix());
+ printer.print(klazz);
+ printer.printLn(";");
+ }
+ }
+
+ printer.printLn();
+
+ String[] otherDecls = cppTypes.boilerplateDeclarations(javaClassName);
+ for (int i = 0; i < otherDecls.length; i++) {
+ String decl = otherDecls[i];
+ printer.printLn(decl);
+ }
+
+ printer.printLn();
+
+ printer.print("class ");
+ printer.print(className);
+ if ("StateSnapshot".equals(javaClassName) || "TreeBuilder".equals(javaClassName)) {
+ printer.print(" : public ");
+ printer.print(cppTypes.treeBuilderStateInterface());
+ }
+ printer.printLn();
+ printer.printLn("{");
+ printer.indent();
+ printer.indent();
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#endClassDeclaration()
+ */
+ @Override protected void endClassDeclaration() {
+ printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC);
+ printer.printLn("void initializeStatics();");
+ printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC);
+ printer.printLn("void releaseStatics();");
+
+ printer.unindent();
+ printer.unindent();
+
+ if (cppTypes.hasSupplement(javaClassName)) {
+ printer.printLn();
+ printer.print("#include \"");
+ printer.print(className);
+ printer.printLn("HSupplement.h\"");
+ }
+
+ printer.printLn("};");
+ printer.printLn();
+
+ for (String define : defines) {
+ printer.printLn(define);
+ }
+
+ printer.printLn();
+ printer.printLn();
+ printer.printLn("#endif");
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printModifiers(int)
+ */
+ @Override protected void printModifiers(int modifiers) {
+ if (ModifierSet.isPrivate(modifiers)) {
+ if (previousVisibility != Visibility.PRIVATE) {
+ printer.unindent();
+ printer.printLn("private:");
+ printer.indent();
+ previousVisibility = Visibility.PRIVATE;
+ }
+ } else if (ModifierSet.isProtected(modifiers)) {
+ if (previousVisibility != Visibility.PROTECTED) {
+ printer.unindent();
+ printer.printLn("protected:");
+ printer.indent();
+ previousVisibility = Visibility.PROTECTED;
+ }
+ } else {
+ if (previousVisibility != Visibility.PUBLIC) {
+ printer.unindent();
+ printer.printLn("public:");
+ printer.indent();
+ previousVisibility = Visibility.PUBLIC;
+ }
+ }
+ if (inline()) {
+ printer.print("inline ");
+ }
+ if (virtual()) {
+ printer.print("virtual ");
+ }
+ if (ModifierSet.isStatic(modifiers)) {
+ printer.print("static ");
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#fieldDeclaration(japa.parser.ast.body.FieldDeclaration, java.lang.LocalSymbolTable)
+ */
+ @Override protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) {
+ int modifiers = n.getModifiers();
+ List<VariableDeclarator> variables = n.getVariables();
+ VariableDeclarator declarator = variables.get(0);
+ if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers)
+ && n.getType() instanceof PrimitiveType) {
+ PrimitiveType type = (PrimitiveType) n.getType();
+ if (type.getType() != PrimitiveType.Primitive.Int) {
+ throw new IllegalStateException(
+ "Only int constant #defines supported.");
+ }
+ if (variables.size() != 1) {
+ throw new IllegalStateException(
+ "More than one variable declared by one declarator.");
+ }
+ String name = javaClassName + "." + declarator.getId().getName();
+ String value = declarator.getInit().toString();
+ if ("Integer.MAX_VALUE".equals(value)) {
+ value = cppTypes.maxInteger();
+ }
+ String longName = definePrefix + declarator.getId().getName();
+ if (symbolTable.cppDefinesByJavaNames.containsKey(name)) {
+ throw new IllegalStateException(
+ "Duplicate #define constant local name: " + name);
+ }
+ symbolTable.cppDefinesByJavaNames.put(name, longName);
+ defines.add("#define " + longName + " " + value);
+ } else {
+ if (n.getType() instanceof ReferenceType) {
+ ReferenceType rt = (ReferenceType) n.getType();
+ currentArrayCount = rt.getArrayCount();
+ if (currentArrayCount > 0
+ && (rt.getType() instanceof PrimitiveType) && declarator.getInit() != null) {
+ if (!ModifierSet.isStatic(modifiers)) {
+ throw new IllegalStateException(
+ "Non-static array case not supported here." + declarator);
+ }
+ if (noLength()) {
+ inPrimitiveNoLengthFieldDeclarator = true;
+ }
+ }
+ }
+ printModifiers(modifiers);
+ inStatic = ModifierSet.isStatic(modifiers);
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ if (ModifierSet.isStatic(modifiers)) {
+ if ("AttributeName".equals(n.getType().toString())) {
+ printer.print("ATTR_");
+ } else if ("ElementName".equals(n.getType().toString())) {
+ printer.print("ELT_");
+ }
+ }
+ declarator.getId().accept(this, arg);
+ printer.printLn(";");
+ currentArrayCount = 0;
+ inStatic = false;
+ inPrimitiveNoLengthFieldDeclarator = false;
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorExplicit(java.util.List<japa.parser.ast.body.Parameter>)
+ */
+ @Override protected void printConstructorExplicit(List<Parameter> params) {
+ if (params != null && params.size() == 1) {
+ printer.print("explicit ");
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorBody(japa.parser.ast.stmt.BlockStmt, java.lang.LocalSymbolTable)
+ */
+ @Override protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) {
+ printer.printLn(";");
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#visit(japa.parser.ast.body.MethodDeclaration, java.lang.LocalSymbolTable)
+ */
+ @Override public void visit(MethodDeclaration n, LocalSymbolTable arg) {
+ arg = new LocalSymbolTable(javaClassName, symbolTable);
+ printMethodDeclaration(n, arg);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.cpptranslate.CppVisitor#inHeader()
+ */
+ @Override protected boolean inHeader() {
+ return true;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java
new file mode 100644
index 000000000..f27d465a3
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java
@@ -0,0 +1,84 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.ContinueStmt;
+import japa.parser.ast.visitor.VoidVisitorAdapter;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class LabelVisitor extends VoidVisitorAdapter<Object> {
+
+ private final Set<String> labels = new HashSet<String>();
+
+ public LabelVisitor() {
+ }
+
+ /**
+ * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.BreakStmt, java.lang.Object)
+ */
+ @Override
+ public void visit(BreakStmt n, Object arg) {
+ String label = n.getId();
+ if (label != null) {
+ labels.add(label + "_end");
+ }
+ }
+
+ /**
+ * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.ContinueStmt, java.lang.Object)
+ */
+ @Override
+ public void visit(ContinueStmt n, Object arg) {
+ String label = n.getId();
+ if (label != null) {
+ labels.add(label);
+ }
+ }
+
+ /**
+ * Returns the labels.
+ *
+ * @return the labels
+ */
+ public Set<String> getLabels() {
+ return labels;
+ }
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java
new file mode 100644
index 000000000..e4030f438
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java
@@ -0,0 +1,75 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+public class LicenseExtractor {
+
+ private final Reader reader;
+
+ public LicenseExtractor(File file) throws IOException {
+ this.reader = new InputStreamReader(new FileInputStream(file), "utf-8");
+ }
+
+ public String extract() throws IOException {
+ boolean prevWasAsterisk = false;
+ StringBuilder sb = new StringBuilder();
+ int c;
+ while ((c = reader.read()) != -1) {
+ sb.append((char)c);
+ switch (c) {
+ case '*':
+ prevWasAsterisk = true;
+ continue;
+ case '/':
+ if (prevWasAsterisk) {
+ return sb.toString();
+ }
+ default:
+ prevWasAsterisk = false;
+ continue;
+ }
+ }
+ return "";
+ }
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java
new file mode 100644
index 000000000..a9375e88a
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java
@@ -0,0 +1,89 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class LocalSymbolTable {
+
+ private final Map<String, Type> locals = new HashMap<String, Type>();
+
+ private final String javaClassName;
+
+ private final SymbolTable delegate;
+
+ /**
+ * @param javaClassName
+ * @param delegate
+ */
+ public LocalSymbolTable(String javaClassName, SymbolTable delegate) {
+ this.javaClassName = javaClassName;
+ this.delegate = delegate;
+ }
+
+ public void putLocalType(String name, Type type) {
+ locals.put(name, type);
+ }
+
+ /**
+ * @param klazz
+ * @param variable
+ * @return
+ * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getFieldType(java.lang.String, java.lang.String)
+ */
+ public Type getVariableType(String klazz, String variable) {
+ if (klazz == null) {
+ Type type = locals.get(variable);
+ if (type != null) {
+ return type;
+ }
+ }
+ return delegate.getFieldType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), variable);
+ }
+
+ /**
+ * @param klazz may be <code>null</code> or "this"
+ * @param method
+ * @return
+ * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getMethodReturnType(java.lang.String, java.lang.String)
+ */
+ public Type getMethodReturnType(String klazz, String method) {
+ return delegate.getMethodReturnType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), method);
+ }
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java
new file mode 100644
index 000000000..53347bd42
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java
@@ -0,0 +1,148 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import japa.parser.JavaParser;
+import japa.parser.ParseException;
+import japa.parser.ast.CompilationUnit;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+
+public class Main {
+
+ static final String[] H_LIST = {
+ "Tokenizer",
+ "TreeBuilder",
+ "MetaScanner",
+ "AttributeName",
+ "ElementName",
+ "HtmlAttributes",
+ "StackNode",
+ "UTF16Buffer",
+ "StateSnapshot",
+ "Portability",
+ };
+
+ private static final String[] CPP_LIST = {
+ "Tokenizer",
+ "TreeBuilder",
+ "MetaScanner",
+ "AttributeName",
+ "ElementName",
+ "HtmlAttributes",
+ "StackNode",
+ "UTF16Buffer",
+ "StateSnapshot",
+ };
+
+ /**
+ * @param args
+ * @throws ParseException
+ * @throws IOException
+ */
+ public static void main(String[] args) throws ParseException, IOException {
+ CppTypes cppTypes = new CppTypes(new File(args[2]));
+ SymbolTable symbolTable = new SymbolTable();
+
+ File javaDirectory = new File(args[0]);
+ File targetDirectory = new File(args[1]);
+ File cppDirectory = targetDirectory;
+ File javaCopyDirectory = new File(targetDirectory, "javasrc");
+
+ for (int i = 0; i < H_LIST.length; i++) {
+ parseFile(cppTypes, javaDirectory, cppDirectory, H_LIST[i], ".h", new HVisitor(cppTypes, symbolTable));
+ copyFile(new File(javaDirectory, H_LIST[i] + ".java"), new File(javaCopyDirectory, H_LIST[i] + ".java"));
+ }
+ for (int i = 0; i < CPP_LIST.length; i++) {
+ parseFile(cppTypes, javaDirectory, cppDirectory, CPP_LIST[i], ".cpp", new CppVisitor(cppTypes, symbolTable));
+ }
+ cppTypes.finished();
+ }
+
+ private static void copyFile(File input, File output) throws IOException {
+ if (input.getCanonicalFile().equals(output.getCanonicalFile())) {
+ return; // files are the same!
+ }
+ // This is horribly inefficient, but perf is not really much of a concern here.
+ FileInputStream in = new FileInputStream(input);
+ FileOutputStream out = new FileOutputStream(output);
+ int b;
+ while ((b = in.read()) != -1) {
+ out.write(b);
+ }
+ out.flush();
+ out.close();
+ in.close();
+ }
+
+ private static void parseFile(CppTypes cppTypes, File javaDirectory,
+ File cppDirectory, String className, String fne, CppVisitor visitor)
+ throws FileNotFoundException, UnsupportedEncodingException,
+ IOException {
+ File file = null;
+ try {
+ file = new File(javaDirectory, className + ".java");
+ String license = new LicenseExtractor(file).extract();
+ CompilationUnit cu = JavaParser.parse(new NoCppInputStream(
+ new CppOnlyInputStream(new FileInputStream(file))), "utf-8");
+ LabelVisitor labelVisitor = new LabelVisitor();
+ cu.accept(labelVisitor, null);
+ visitor.setLabels(labelVisitor.getLabels());
+ cu.accept(visitor, null);
+ FileOutputStream out = new FileOutputStream(new File(cppDirectory,
+ cppTypes.classPrefix() + className + fne));
+ OutputStreamWriter w = new OutputStreamWriter(out, "utf-8");
+ w.write(license);
+ w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit "
+ + className + ".java instead and regenerate.\n */\n\n");
+ w.write(visitor.getSource());
+ w.close();
+ } catch (ParseException e) {
+ System.err.println(file);
+ e.printStackTrace();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java
new file mode 100644
index 000000000..86f9ae7ff
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java
@@ -0,0 +1,86 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class NoCppInputStream extends InputStream {
+
+ private final static char[] START = "[NOCPP[".toCharArray();
+
+ private final static char[] END = "]NOCPP]".toCharArray();
+
+ private int state;
+
+ private final InputStream delegate;
+
+
+
+ /**
+ * @param delegate
+ */
+ public NoCppInputStream(InputStream delegate) {
+ this.delegate = delegate;
+ this.state = 0;
+ }
+
+ @Override public int read() throws IOException {
+ int c;
+ if (state == START.length) {
+ int endState = 0;
+ while (endState != END.length) {
+ c = delegate.read();
+ if (END[endState] == c) {
+ endState++;
+ } else {
+ endState = 0;
+ }
+ }
+ state = 0;
+ }
+ c = delegate.read();
+ if (START[state] == c) {
+ state++;
+ } else {
+ state = 0;
+ }
+ return c;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java
new file mode 100644
index 000000000..305f516a7
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java
@@ -0,0 +1,70 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StringLiteralParser {
+
+ private static final Pattern STRING_DECL = Pattern.compile("^.*\\(([^ ]+) = new nsString\\(\\)\\)->Assign\\(NS_LITERAL_STRING\\(\"([^\"]*)\"\\)\\);.*$");
+
+ private final BufferedReader reader;
+
+ public StringLiteralParser(Reader reader) {
+ this.reader = new BufferedReader(reader);
+ }
+
+ public Map<String, String> parse() throws IOException {
+ Map<String, String> map = new HashMap<String, String>();
+ String line;
+ while((line = reader.readLine()) != null) {
+ Matcher m = STRING_DECL.matcher(line);
+ if (m.matches()) {
+ map.put(m.group(2), m.group(1));
+ }
+ }
+ return map;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java
new file mode 100644
index 000000000..e24247f7e
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java
@@ -0,0 +1,73 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+public class StringPair {
+
+ /**
+ * @param first
+ * @param second
+ */
+ public StringPair(String first, String second) {
+ this.first = first;
+ this.second = second;
+ }
+
+ private final String first;
+
+ private final String second;
+
+ /**
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override public boolean equals(Object o) {
+ if (o instanceof StringPair) {
+ StringPair other = (StringPair) o;
+ return first.equals(other.first) && second.equals(other.second);
+ }
+ return false;
+ }
+
+ /**
+ * @see java.lang.Object#hashCode()
+ */
+ @Override public int hashCode() {
+ return first.hashCode() ^ second.hashCode();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java
new file mode 100644
index 000000000..970a2b64b
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java
@@ -0,0 +1,80 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class SymbolTable {
+
+ public final Map<String, String> cppDefinesByJavaNames = new HashMap<String, String>();
+
+ private final Map<StringPair, Type> fields = new HashMap<StringPair, Type>();
+
+ private final Map<StringPair, Type> methodReturns = new HashMap<StringPair, Type>();
+
+ /**
+ * This is a sad hack to work around the fact the there's no real symbol
+ * table yet.
+ *
+ * @param name
+ * @return
+ */
+ public boolean isNotAnAttributeOrElementName(String name) {
+ return !("ATTRIBUTE_HASHES".equals(name)
+ || "ATTRIBUTE_NAMES".equals(name)
+ || "ELEMENT_HASHES".equals(name)
+ || "ELEMENT_NAMES".equals(name) || "ALL_NO_NS".equals(name));
+ }
+
+ public void putFieldType(String klazz, String field, Type type) {
+ fields.put(new StringPair(klazz, field), type);
+ }
+
+ public void putMethodReturnType(String klazz, String method, Type type) {
+ methodReturns.put(new StringPair(klazz, method), type);
+ }
+
+ public Type getFieldType(String klazz, String field) {
+ return fields.get(new StringPair(klazz, field));
+ }
+
+ public Type getMethodReturnType(String klazz, String method) {
+ return methodReturns.get(new StringPair(klazz, method));
+ }
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java
new file mode 100644
index 000000000..00f7c5741
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java
@@ -0,0 +1,71 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+import japa.parser.ast.body.ClassOrInterfaceDeclaration;
+import japa.parser.ast.body.FieldDeclaration;
+import japa.parser.ast.body.MethodDeclaration;
+
+public class SymbolTableVisitor extends AnnotationHelperVisitor<SymbolTable> {
+
+ private String javaClassName;
+
+ /**
+ * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.FieldDeclaration, java.lang.Object)
+ */
+ @Override public void visit(FieldDeclaration n, SymbolTable arg) {
+ currentAnnotations = n.getAnnotations();
+ arg.putFieldType(javaClassName, n.getVariables().get(0).getId().getName(), convertType(n.getType(), n.getModifiers()));
+ }
+
+ /**
+ * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.MethodDeclaration, java.lang.Object)
+ */
+ @Override public void visit(MethodDeclaration n, SymbolTable arg) {
+ currentAnnotations = n.getAnnotations();
+ arg.putMethodReturnType(javaClassName, n.getName(), convertType(n.getType(), n.getModifiers()));
+ }
+
+ /**
+ * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.ClassOrInterfaceDeclaration, java.lang.Object)
+ */
+ @Override public void visit(ClassOrInterfaceDeclaration n, SymbolTable arg) {
+ javaClassName = n.getName();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java
new file mode 100644
index 000000000..866db093d
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java
@@ -0,0 +1,81 @@
+package nu.validator.htmlparser.cpptranslate;
+
+import japa.parser.ast.expr.BinaryExpr;
+import japa.parser.ast.expr.BinaryExpr.Operator;
+import japa.parser.ast.expr.Expression;
+import japa.parser.ast.expr.MethodCallExpr;
+import japa.parser.ast.expr.NameExpr;
+import japa.parser.ast.expr.NullLiteralExpr;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.stmt.ExpressionStmt;
+import japa.parser.ast.stmt.Statement;
+
+import java.util.List;
+
+public class TranslatorUtils {
+ public static boolean isErrorOnlyBlock(Statement elseStmt, boolean supportErrorReporting) {
+ if (supportErrorReporting) {
+ return false;
+ }
+ if (elseStmt instanceof BlockStmt) {
+ BlockStmt block = (BlockStmt) elseStmt;
+ List<Statement> statements = block.getStmts();
+ if (statements == null) {
+ return false;
+ }
+ if (statements.size() != 1) {
+ return false;
+ }
+ Statement statement = statements.get(0);
+ if (statement instanceof ExpressionStmt) {
+ ExpressionStmt exprStmt = (ExpressionStmt) statement;
+ Expression expr = exprStmt.getExpression();
+ if (expr instanceof MethodCallExpr) {
+ MethodCallExpr call = (MethodCallExpr) expr;
+ if (call.getName().startsWith("err")) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ public static boolean isErrorHandlerIf(Expression condition, boolean supportErrorReporting) {
+ if (supportErrorReporting) {
+ return false;
+ }
+ while (condition instanceof BinaryExpr) {
+ BinaryExpr binex = (BinaryExpr) condition;
+ condition = binex.getLeft();
+ if (condition instanceof NameExpr) {
+ NameExpr name = (NameExpr) condition;
+ if ("errorHandler".equals(name.getName())) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public static boolean isDocumentModeHandlerNullCheck(Expression condition) {
+ if (condition instanceof BinaryExpr) {
+ BinaryExpr binex = (BinaryExpr) condition;
+ if (binex.getOperator() != Operator.notEquals) {
+ return false;
+ }
+ if (!(binex.getRight() instanceof NullLiteralExpr)) {
+ return false;
+ }
+ Expression left = binex.getLeft();
+ if (left instanceof NameExpr) {
+ NameExpr name = (NameExpr) left;
+ if ("documentModeHandler".equals(name.getName())) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java
new file mode 100644
index 000000000..783a3bbd0
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java
@@ -0,0 +1,99 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.cpptranslate;
+
+public class Type {
+
+ /**
+ * @param type
+ * @param arrayCount
+ * @param noLength
+ * @param modifiers
+ */
+ public Type(String type, int arrayCount, boolean noLength, int modifiers) {
+ this.type = type;
+ this.arrayCount = arrayCount;
+ this.noLength = noLength;
+ this.modifiers = modifiers;
+ }
+
+ private final String type;
+
+ private final int arrayCount;
+
+ private final boolean noLength;
+
+ private final int modifiers;
+
+ /**
+ * Returns the type.
+ *
+ * @return the type
+ */
+ public String getType() {
+ return type;
+ }
+
+ /**
+ * Returns the arrayCount.
+ *
+ * @return the arrayCount
+ */
+ public int getArrayCount() {
+ return arrayCount;
+ }
+
+ /**
+ * Returns the noLength.
+ *
+ * @return the noLength
+ */
+ public boolean isNoLength() {
+ return noLength;
+ }
+
+ /**
+ * Returns the modifiers.
+ *
+ * @return the modifiers
+ */
+ public int getModifiers() {
+ return modifiers;
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java
new file mode 100644
index 000000000..eb580e70c
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2010-2011 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.generator;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Applies a workaround that splits the <code>stateLoop</code> method in the
+ * tokenizer into two methods. This way, each method stays under 8000 bytes in
+ * size. By default, HotSpot doesn't compile methods that are over 8000 bytes in
+ * size, which is a performance problem.
+ *
+ * This program should have been written in Perl, but to avoid introducing new
+ * dependencies, it's written in Java. No attempt at efficiency has been made.
+ *
+ * Warning! This modifies Tokenizer.java in place!
+ *
+ * @version $Id$
+ * @author hsivonen
+ */
+public class ApplyHotSpotWorkaround {
+
+ private static final String BEGIN_WORKAROUND = "// BEGIN HOTSPOT WORKAROUND";
+
+ private static final String END_WORKAROUND = "// END HOTSPOT WORKAROUND";
+
+ public static void main(String[] args) throws Throwable {
+ String tokenizer = readFileIntoString(args[0]);
+ String workaround = readFileIntoString(args[1]);
+
+ int beginIndex = tokenizer.indexOf(BEGIN_WORKAROUND);
+ int endIndex = tokenizer.indexOf(END_WORKAROUND);
+ String tokenizerHead = tokenizer.substring(0, beginIndex);
+ String tokenizerMiddle = tokenizer.substring(beginIndex, endIndex);
+ String tokenizerTail = tokenizer.substring(endIndex);
+
+ beginIndex = workaround.indexOf(BEGIN_WORKAROUND);
+ endIndex = workaround.indexOf(END_WORKAROUND);
+ String workaroundHead = workaround.substring(0, beginIndex);
+ String workaroundMiddle = workaround.substring(beginIndex, endIndex);
+ String workaroundTail = workaround.substring(endIndex);
+
+ String newTokenizer = tokenizerHead + workaroundMiddle + tokenizerTail;
+ String newWorkaround = workaroundHead + tokenizerMiddle
+ + workaroundTail;
+
+ int insertionPoint = newTokenizer.indexOf("// HOTSPOT WORKAROUND INSERTION POINT");
+
+ tokenizerHead = newTokenizer.substring(0, insertionPoint);
+ tokenizerTail = newTokenizer.substring(insertionPoint);
+
+ newTokenizer = tokenizerHead + newWorkaround + tokenizerTail;
+
+ Pattern pat = Pattern.compile("state = transition\\(state, ([^,]*), reconsume, pos\\)");
+ Matcher m = pat.matcher(newTokenizer);
+ newTokenizer = m.replaceAll("state = $1");
+
+ Writer out = new OutputStreamWriter(new FileOutputStream(args[0]),
+ "utf-8");
+ out.write(newTokenizer);
+ out.flush();
+ out.close();
+ }
+
+ private static String readFileIntoString(String name) throws IOException {
+ Reader in = new InputStreamReader(new FileInputStream(name), "UTF-8");
+ StringBuilder builder = new StringBuilder();
+ int c;
+ while ((c = in.read()) != -1) {
+ builder.append((char) c);
+ }
+ in.close();
+ return builder.toString();
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java
new file mode 100644
index 000000000..69ddb318e
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2008-2009 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.generator;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class GenerateNamedCharacters {
+
+ private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
+
+ private static final Pattern LINE_PATTERN = Pattern.compile("<td> <code title=\"\">([^<]*)</code> </td> <td> U\\+(\\S*) (?:U\\+(\\S*) )?</td>");
+
+ private static String toUString(int c) {
+ String hexString = Integer.toHexString(c);
+ switch (hexString.length()) {
+ case 1:
+ return "\\u000" + hexString;
+ case 2:
+ return "\\u00" + hexString;
+ case 3:
+ return "\\u0" + hexString;
+ case 4:
+ return "\\u" + hexString;
+ default:
+ throw new RuntimeException("Unreachable.");
+ }
+ }
+
+ private static int charToIndex(char c) {
+ if (c >= 'a' && c <= 'z') {
+ return c - 'a' + 26;
+ } else if (c >= 'A' && c <= 'Z') {
+ return c - 'A';
+ }
+ throw new IllegalArgumentException("Bad char in named character name: "
+ + c);
+ }
+
+ private static boolean allZero(int[] arr) {
+ for (int i = 0; i < arr.length; i++) {
+ if (arr[i] != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ TreeMap<String, String> entities = new TreeMap<String, String>();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ System.in, "utf-8"));
+ String line;
+ while ((line = reader.readLine()) != null) {
+ Matcher m = LINE_PATTERN.matcher(line);
+ while (m.find()) {
+ String value;
+ if (m.group(3) != null) {
+ // two BMP chars
+ int firstIntVal = Integer.parseInt(m.group(2), 16);
+ int secondIntVal = Integer.parseInt(m.group(3), 16);
+ value = ("" + (char)firstIntVal) + (char)secondIntVal;
+ } else {
+ // one code point
+ int intVal = Integer.parseInt(m.group(2), 16);
+ if (intVal <= 0xFFFF) {
+ value = "" + (char)intVal;
+ } else {
+ int high = (LEAD_OFFSET + (intVal >> 10));
+ int low = (0xDC00 + (intVal & 0x3FF));
+ value = ("" + (char)high) + (char)low;
+ }
+ }
+ entities.put(m.group(1), value);
+ }
+ }
+
+ // Java initializes arrays to zero. Zero is our magic value for no hilo
+ // value.
+ int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1];
+
+ String firstName = entities.entrySet().iterator().next().getKey();
+ int firstKey = charToIndex(firstName.charAt(0));
+ int secondKey = firstName.charAt(1);
+ int row = 0;
+ int lo = 0;
+
+ System.out.print("static final @NoLength @CharacterName String[] NAMES = {\n");
+ for (Map.Entry<String, String> entity : entities.entrySet()) {
+ String name = entity.getKey();
+ int newFirst = charToIndex(name.charAt(0));
+ int newSecond = name.charAt(1);
+ assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA";
+ if (firstKey != newFirst || secondKey != newSecond) {
+ hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo;
+ lo = row;
+ firstKey = newFirst;
+ secondKey = newSecond;
+ }
+ System.out.print("\"");
+ System.out.print(name.substring(2));
+ System.out.print("\",\n");
+ row++;
+ }
+ System.out.print("};\n");
+
+ hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo;
+
+ System.out.print("static final @NoLength char[][] VALUES = {\n");
+ for (Map.Entry<String, String> entity : entities.entrySet()) {
+ String value = entity.getValue();
+ System.out.print("{");
+ if (value.length() == 1) {
+ char c = value.charAt(0);
+ if (c == '\'') {
+ System.out.print("\'\\\'\'");
+ } else if (c == '\n') {
+ System.out.print("\'\\n\'");
+ } else if (c == '\\') {
+ System.out.print("\'\\\\\'");
+ } else if (c <= 0xFFFF) {
+ System.out.print("\'");
+ System.out.print(toUString(c));
+ System.out.print("\'");
+ }
+ } else {
+ System.out.print("\'");
+ System.out.print(toUString(value.charAt(0)));
+ System.out.print("\', \'");
+ System.out.print(toUString(value.charAt(1)));
+ System.out.print("\'");
+ }
+ System.out.print("},\n");
+ }
+ System.out.print("};\n");
+
+ System.out.print("static final @NoLength int[][] HILO_ACCEL = {\n");
+ for (int i = 0; i < hiLoTable.length; i++) {
+ if (allZero(hiLoTable[i])) {
+ System.out.print("null,\n");
+ } else {
+ System.out.print("{");
+ for (int j = 0; j < hiLoTable[i].length; j++) {
+ System.out.print(hiLoTable[i][j]);
+ System.out.print(", ");
+ }
+ System.out.print("},\n");
+ }
+ }
+ System.out.print("};\n");
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java
new file mode 100644
index 000000000..2cfe7b112
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java
@@ -0,0 +1,580 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.generator;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import nu.validator.htmlparser.cpptranslate.CppTypes;
+
+public class GenerateNamedCharactersCpp {
+
+ /**
+ * The license for the output of this program except for data files.
+ */
+ private static final String OUTPUT_LICENSE = "/*\n"
+ + " * Copyright (c) 2008-2010 Mozilla Foundation\n"
+ + " *\n"
+ + " * Permission is hereby granted, free of charge, to any person obtaining a \n"
+ + " * copy of this software and associated documentation files (the \"Software\"), \n"
+ + " * to deal in the Software without restriction, including without limitation \n"
+ + " * the rights to use, copy, modify, merge, publish, distribute, sublicense, \n"
+ + " * and/or sell copies of the Software, and to permit persons to whom the \n"
+ + " * Software is furnished to do so, subject to the following conditions:\n"
+ + " *\n"
+ + " * The above copyright notice and this permission notice shall be included in \n"
+ + " * all copies or substantial portions of the Software.\n"
+ + " *\n"
+ + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR \n"
+ + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, \n"
+ + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL \n"
+ + " * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER \n"
+ + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING \n"
+ + " * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \n"
+ + " * DEALINGS IN THE SOFTWARE.\n" + " */\n\n";
+
+ /**
+ * The license for the generated data files.
+ */
+ private static final String DATA_LICENSE = "/*\n"
+ + " * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera \n"
+ + " * Software ASA.\n"
+ + " * \n"
+ + " * You are granted a license to use, reproduce and create derivative works of \n"
+ + " * this document.\n" + " */\n\n";
+
+ private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
+
+ private static final Pattern LINE_PATTERN = Pattern.compile("<td> <code title=\"\">([^<]*)</code> </td> <td> U\\+(\\S*) (?:U\\+(\\S*) )?</td>");
+
+ private static String toHexString(int c) {
+ String hexString = Integer.toHexString(c);
+ switch (hexString.length()) {
+ case 1:
+ return "0x000" + hexString;
+ case 2:
+ return "0x00" + hexString;
+ case 3:
+ return "0x0" + hexString;
+ case 4:
+ return "0x" + hexString;
+ default:
+ throw new RuntimeException("Unreachable.");
+ }
+ }
+
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ TreeMap<String, String> entities = new TreeMap<String, String>();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ new FileInputStream(args[0]), "utf-8"));
+ String line;
+ while ((line = reader.readLine()) != null) {
+ Matcher m = LINE_PATTERN.matcher(line);
+ while (m.find()) {
+ String value;
+ if (m.group(3) != null) {
+ // two BMP chars
+ int firstIntVal = Integer.parseInt(m.group(2), 16);
+ int secondIntVal = Integer.parseInt(m.group(3), 16);
+ value = ("" + (char)firstIntVal) + (char)secondIntVal;
+ } else {
+ // one code point
+ int intVal = Integer.parseInt(m.group(2), 16);
+ if (intVal <= 0xFFFF) {
+ value = "" + (char)intVal;
+ } else {
+ int high = (LEAD_OFFSET + (intVal >> 10));
+ int low = (0xDC00 + (intVal & 0x3FF));
+ value = ("" + (char)high) + (char)low;
+ }
+ }
+ entities.put(m.group(1), value);
+ }
+ }
+
+ CppTypes cppTypes = new CppTypes(null);
+ File targetDirectory = new File(args[1]);
+
+ generateH(targetDirectory, cppTypes, entities);
+ generateInclude(targetDirectory, cppTypes, entities);
+ generateCpp(targetDirectory, cppTypes, entities);
+ generateAccelH(targetDirectory, cppTypes, entities);
+ generateAccelCpp(targetDirectory, cppTypes, entities);
+ }
+
+ private static void generateAccelCpp(File targetDirectory,
+ CppTypes cppTypes, TreeMap<String, String> entities) throws IOException {
+ String includeFile = cppTypes.classPrefix()
+ + "NamedCharactersInclude.h";
+ File cppFile = new File(targetDirectory, cppTypes.classPrefix()
+ + "NamedCharactersAccel.cpp");
+ Writer out = new OutputStreamWriter(new FileOutputStream(cppFile),
+ "utf-8");
+
+ out.write(DATA_LICENSE);
+ out.write('\n');
+ out.write("#include \"" + cppTypes.classPrefix()
+ + "NamedCharactersAccel.h\"\n");
+ out.write("\n");
+
+ // Java initializes arrays to zero. Zero is our magic value for no hilo
+ // value.
+ int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1];
+
+ String firstName = entities.entrySet().iterator().next().getKey();
+ int firstKey = charToIndex(firstName.charAt(0));
+ int secondKey = firstName.charAt(1);
+ int row = 0;
+ int lo = 0;
+
+ for (Map.Entry<String, String> entity : entities.entrySet()) {
+ String name = entity.getKey();
+ int newFirst = charToIndex(name.charAt(0));
+ int newSecond = name.charAt(1);
+ assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA";
+ if (firstKey != newFirst || secondKey != newSecond) {
+ hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo;
+ lo = row;
+ firstKey = newFirst;
+ secondKey = newSecond;
+ }
+ row++;
+ }
+
+ hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo;
+
+ for (int i = 0; i < hiLoTable.length; i++) {
+ if (!allZero(hiLoTable[i])) {
+ out.write("static " + cppTypes.intType() + " const HILO_ACCEL_"
+ + i + "[] = {\n");
+ for (int j = 0; j < hiLoTable[i].length; j++) {
+ if (j != 0) {
+ out.write(", ");
+ }
+ out.write("" + hiLoTable[i][j]);
+ }
+ out.write("\n};\n\n");
+ }
+ }
+
+ out.write("const int32_t* const " + cppTypes.classPrefix()
+ + "NamedCharactersAccel::HILO_ACCEL[] = {\n");
+ for (int i = 0; i < hiLoTable.length; i++) {
+ if (i != 0) {
+ out.write(",\n");
+ }
+ if (allZero(hiLoTable[i])) {
+ out.write(" 0");
+ } else {
+ out.write(" HILO_ACCEL_" + i);
+ }
+ }
+ out.write("\n};\n\n");
+
+ out.flush();
+ out.close();
+ }
+
+ private static void generateAccelH(File targetDirectory, CppTypes cppTypes,
+ TreeMap<String, String> entities) throws IOException {
+ File hFile = new File(targetDirectory, cppTypes.classPrefix()
+ + "NamedCharactersAccel.h");
+ Writer out = new OutputStreamWriter(new FileOutputStream(hFile),
+ "utf-8");
+ out.write(DATA_LICENSE);
+ out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n");
+ out.write("#define " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n");
+ out.write('\n');
+
+ String[] includes = cppTypes.namedCharactersIncludes();
+ for (int i = 0; i < includes.length; i++) {
+ String include = includes[i];
+ out.write("#include \"" + include + ".h\"\n");
+ }
+
+ out.write('\n');
+
+ out.write("class " + cppTypes.classPrefix() + "NamedCharactersAccel\n");
+ out.write("{\n");
+ out.write(" public:\n");
+ out.write(" static const " + cppTypes.intType()
+ + "* const HILO_ACCEL[];\n");
+ out.write("};\n");
+
+ out.write("\n#endif // " + cppTypes.classPrefix()
+ + "NamedCharactersAccel_h\n");
+ out.flush();
+ out.close();
+ }
+
+ private static void generateH(File targetDirectory, CppTypes cppTypes,
+ Map<String, String> entities) throws IOException {
+ File hFile = new File(targetDirectory, cppTypes.classPrefix()
+ + "NamedCharacters.h");
+ Writer out = new OutputStreamWriter(new FileOutputStream(hFile),
+ "utf-8");
+ out.write(OUTPUT_LICENSE);
+ out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharacters_h\n");
+ out.write("#define " + cppTypes.classPrefix() + "NamedCharacters_h\n");
+ out.write('\n');
+
+ String[] includes = cppTypes.namedCharactersIncludes();
+ for (int i = 0; i < includes.length; i++) {
+ String include = includes[i];
+ out.write("#include \"" + include + ".h\"\n");
+ }
+
+ out.write("\nstruct ");
+ out.write(cppTypes.characterNameTypeDeclaration());
+ out.write(" {\n ");
+ out.write(cppTypes.unsignedShortType());
+ out.write(" nameStart;\n ");
+ out.write(cppTypes.unsignedShortType());
+ out.write(" nameLen;\n #ifdef DEBUG\n ");
+ out.write(cppTypes.intType());
+ out.write(" n;\n #endif\n ");
+ out.write(cppTypes.intType());
+ out.write(" length() const;\n ");
+ out.write(cppTypes.charType());
+ out.write(" charAt(");
+ out.write(cppTypes.intType());
+ out.write(" index) const;\n};\n\n");
+
+ out.write("class " + cppTypes.classPrefix() + "NamedCharacters\n");
+ out.write("{\n");
+ out.write(" public:\n");
+ out.write(" static const " + cppTypes.characterNameTypeDeclaration() + " NAMES[];\n");
+ out.write(" static const " + cppTypes.charType() + " VALUES[][2];\n");
+ out.write(" static " + cppTypes.charType() + "** WINDOWS_1252;\n");
+ out.write(" static void initializeStatics();\n");
+ out.write(" static void releaseStatics();\n");
+ out.write("};\n");
+
+ out.write("\n#endif // " + cppTypes.classPrefix()
+ + "NamedCharacters_h\n");
+ out.flush();
+ out.close();
+ }
+
+ private static void generateInclude(File targetDirectory,
+ CppTypes cppTypes, Map<String, String> entities) throws IOException {
+ File includeFile = new File(targetDirectory, cppTypes.classPrefix()
+ + "NamedCharactersInclude.h");
+ Writer out = new OutputStreamWriter(new FileOutputStream(includeFile),
+ "utf-8");
+
+ out.write(DATA_LICENSE);
+ out.write("/* Data generated from the table of named character references found at\n");
+ out.write(" *\n");
+ out.write(" * http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html#named-character-references\n");
+ out.write(" *\n");
+ out.write(" * Files that #include this file must #define NAMED_CHARACTER_REFERENCE as a\n");
+ out.write(" * macro of four parameters:\n");
+ out.write(" *\n");
+ out.write(" * 1. a unique integer N identifying the Nth [0,1,..] macro expansion in this file,\n");
+ out.write(" * 2. a comma-separated sequence of characters comprising the character name,\n");
+ out.write(" * without the first two letters or 0 if the sequence would be empty. \n");
+ out.write(" * See Tokenizer.java.\n");
+ out.write(" * 3. the length of this sequence of characters,\n");
+ out.write(" * 4. placeholder flag (0 if argument #is not a placeholder and 1 if it is),\n");
+ out.write(" * 5. a comma-separated sequence of char16_t literals corresponding\n");
+ out.write(" * to the code-point(s) of the named character.\n");
+ out.write(" *\n");
+ out.write(" * The macro expansion doesn't have to refer to all or any of these parameters,\n");
+ out.write(" * but common sense dictates that it should involve at least one of them.\n");
+ out.write(" */\n");
+ out.write("\n");
+ out.write("// This #define allows the NAMED_CHARACTER_REFERENCE macro to accept comma-\n");
+ out.write("// separated sequences as single macro arguments. Using commas directly would\n");
+ out.write("// split the sequence into multiple macro arguments.\n");
+ out.write("#define _ ,\n");
+ out.write("\n");
+
+ int i = 0;
+ for (Map.Entry<String, String> entity : entities.entrySet()) {
+ out.write("NAMED_CHARACTER_REFERENCE(" + i++ + ", ");
+ String name = entity.getKey();
+ writeNameInitializer(out, name, " _ ");
+ out.write(", " + (name.length() - 2) + ", ");
+ out.write((name.length() == 2 ? "1" : "0") + ", ");
+ writeValueInitializer(out, entity.getValue(), " _ ");
+ out.write(")\n");
+ }
+
+ out.write("\n");
+ out.write("#undef _\n");
+
+ out.flush();
+ out.close();
+ }
+
+ private static void writeNameInitializer(Writer out,
+ String name, String separator)
+ throws IOException {
+ out.write("/* " + name.charAt(0) + " " + name.charAt(1) + " */ ");
+ if (name.length() == 2) {
+ out.write("0");
+ } else {
+ for (int i = 2; i < name.length(); i++) {
+ out.write("'" + name.charAt(i) + "'");
+ if (i < name.length() - 1)
+ out.write(separator);
+ }
+ }
+ }
+
+ private static void writeValueInitializer(Writer out,
+ String value, String separator)
+ throws IOException {
+ if (value.length() == 1) {
+ out.write(toHexString(value.charAt(0)));
+ out.write(separator);
+ out.write("0");
+ } else {
+ out.write(toHexString(value.charAt(0)));
+ out.write(separator);
+ out.write(toHexString(value.charAt(1)));
+ }
+ }
+
+ private static void defineMacroAndInclude(Writer out, String expansion,
+ String includeFile) throws IOException {
+ out.write("#define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n"
+ + expansion + "\n");
+ out.write("#include \"" + includeFile + "\"\n");
+ out.write("#undef NAMED_CHARACTER_REFERENCE\n");
+ }
+
+ private static void defineMacroAndInclude(Writer out, String expansion,
+ String debugExpansion, String includeFile) throws IOException {
+ out.write("#ifdef DEBUG\n");
+ out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n"
+ + debugExpansion + "\n");
+ out.write("#else\n");
+ out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n"
+ + expansion + "\n");
+ out.write("#endif\n");
+ out.write("#include \"" + includeFile + "\"\n");
+ out.write("#undef NAMED_CHARACTER_REFERENCE\n");
+ }
+
+ private static void writeStaticMemberDeclaration(Writer out,
+ CppTypes cppTypes, String type, String name) throws IOException {
+ out.write(type + " " + cppTypes.classPrefix() + "NamedCharacters::"
+ + name + ";\n");
+ }
+
+ private static int charToIndex(char c) {
+ if (c >= 'a' && c <= 'z') {
+ return c - 'a' + 26;
+ } else if (c >= 'A' && c <= 'Z') {
+ return c - 'A';
+ }
+ throw new IllegalArgumentException("Bad char in named character name: "
+ + c);
+ }
+
+ private static boolean allZero(int[] arr) {
+ for (int i = 0; i < arr.length; i++) {
+ if (arr[i] != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static void generateCpp(File targetDirectory, CppTypes cppTypes,
+ Map<String, String> entities) throws IOException {
+ String includeFile = cppTypes.classPrefix()
+ + "NamedCharactersInclude.h";
+ File cppFile = new File(targetDirectory, cppTypes.classPrefix()
+ + "NamedCharacters.cpp");
+ Writer out = new OutputStreamWriter(new FileOutputStream(cppFile),
+ "utf-8");
+
+ out.write(OUTPUT_LICENSE);
+ out.write("#define " + cppTypes.classPrefix()
+ + "NamedCharacters_cpp_\n");
+
+ String[] includes = cppTypes.namedCharactersIncludes();
+ for (int i = 0; i < includes.length; i++) {
+ String include = includes[i];
+ out.write("#include \"" + include + ".h\"\n");
+ }
+
+ out.write('\n');
+ out.write("#include \"" + cppTypes.classPrefix()
+ + "NamedCharacters.h\"\n");
+ out.write("\n");
+
+ out.write("const " + cppTypes.charType() + " " + cppTypes.classPrefix()
+ + "NamedCharacters::VALUES[][2] = {\n");
+ defineMacroAndInclude(out, "{ VALUE },", includeFile);
+ // The useless terminator entry makes the above macro simpler with
+ // compilers that whine about a comma after the last item
+ out.write("{0, 0} };\n\n");
+
+ String staticMemberType = cppTypes.charType() + "**";
+ writeStaticMemberDeclaration(out, cppTypes, staticMemberType,
+ "WINDOWS_1252");
+
+ out.write("static " + cppTypes.charType()
+ + " const WINDOWS_1252_DATA[] = {\n");
+ out.write(" 0x20AC,\n");
+ out.write(" 0x0081,\n");
+ out.write(" 0x201A,\n");
+ out.write(" 0x0192,\n");
+ out.write(" 0x201E,\n");
+ out.write(" 0x2026,\n");
+ out.write(" 0x2020,\n");
+ out.write(" 0x2021,\n");
+ out.write(" 0x02C6,\n");
+ out.write(" 0x2030,\n");
+ out.write(" 0x0160,\n");
+ out.write(" 0x2039,\n");
+ out.write(" 0x0152,\n");
+ out.write(" 0x008D,\n");
+ out.write(" 0x017D,\n");
+ out.write(" 0x008F,\n");
+ out.write(" 0x0090,\n");
+ out.write(" 0x2018,\n");
+ out.write(" 0x2019,\n");
+ out.write(" 0x201C,\n");
+ out.write(" 0x201D,\n");
+ out.write(" 0x2022,\n");
+ out.write(" 0x2013,\n");
+ out.write(" 0x2014,\n");
+ out.write(" 0x02DC,\n");
+ out.write(" 0x2122,\n");
+ out.write(" 0x0161,\n");
+ out.write(" 0x203A,\n");
+ out.write(" 0x0153,\n");
+ out.write(" 0x009D,\n");
+ out.write(" 0x017E,\n");
+ out.write(" 0x0178\n");
+ out.write("};\n\n");
+
+ out.write("/**\n");
+ out.write(" * To avoid having lots of pointers in the |charData| array, below,\n");
+ out.write(" * which would cause us to have to do lots of relocations at library\n");
+ out.write(" * load time, store all the string data for the names in one big array.\n");
+ out.write(" * Then use tricks with enums to help us build an array that contains\n");
+ out.write(" * the positions of each within the big arrays.\n");
+ out.write(" */\n\n");
+
+ out.write("static const " + cppTypes.byteType() + " ALL_NAMES[] = {\n");
+
+ defineMacroAndInclude(out, "CHARS ,", includeFile);
+
+ out.write("};\n\n");
+
+ out.write("enum NamePositions {\n");
+ out.write(" DUMMY_INITIAL_NAME_POSITION = 0,\n");
+
+ out.write("/* enums don't take up space, so generate _START and _END */\n");
+ defineMacroAndInclude(out,
+ "NAME_##N##_DUMMY, /* automatically one higher than previous */ \\\n"
+ + "NAME_##N##_START = NAME_##N##_DUMMY - 1, \\\n"
+ + "NAME_##N##_END = NAME_##N##_START + LEN + FLAG,",
+ includeFile);
+
+ out.write(" DUMMY_FINAL_NAME_VALUE\n");
+ out.write("};\n\n");
+
+ String arrayLengthMacro = cppTypes.arrayLengthMacro();
+ String staticAssert = cppTypes.staticAssert();
+ if (staticAssert != null && arrayLengthMacro != null) {
+ out.write("/* check that the start positions will fit in 16 bits */\n");
+ out.write(staticAssert + "(" + arrayLengthMacro
+ + "(ALL_NAMES) < 0x10000);\n\n");
+ }
+
+ out.write("const " + cppTypes.characterNameTypeDeclaration() + " " + cppTypes.classPrefix()
+ + "NamedCharacters::NAMES[] = {\n");
+ defineMacroAndInclude(out, "{ NAME_##N##_START, LEN, },", "{ NAME_##N##_START, LEN, N },", includeFile);
+ out.write("};\n\n");
+
+ out.write(cppTypes.intType());
+ out.write("\n");
+ out.write(cppTypes.characterNameTypeDeclaration());
+ out.write("::length() const\n{\n return nameLen;\n}\n\n");
+ out.write(cppTypes.charType());
+ out.write("\n");
+ out.write(cppTypes.characterNameTypeDeclaration());
+ out.write("::charAt(");
+ out.write("int32_t");
+ out.write(" index) const\n{\n return static_cast<");
+ out.write(cppTypes.charType());
+ out.write("> (ALL_NAMES[nameStart + index]);\n}\n\n");
+
+ out.write("void\n");
+ out.write(cppTypes.classPrefix()
+ + "NamedCharacters::initializeStatics()\n");
+ out.write("{\n");
+ out.write(" WINDOWS_1252 = new " + cppTypes.charType() + "*[32];\n");
+ out.write(" for (" + cppTypes.intType() + " i = 0; i < 32; ++i) {\n");
+ out.write(" WINDOWS_1252[i] = (" + cppTypes.charType()
+ + "*)&(WINDOWS_1252_DATA[i]);\n");
+ out.write(" }\n");
+ out.write("}\n");
+ out.write("\n");
+
+ out.write("void\n");
+ out.write(cppTypes.classPrefix()
+ + "NamedCharacters::releaseStatics()\n");
+ out.write("{\n");
+ out.write(" delete[] WINDOWS_1252;\n");
+ out.write("}\n");
+ out.flush();
+ out.close();
+ }
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java
new file mode 100644
index 000000000..b88107361
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java
@@ -0,0 +1,79 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser Rust Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.rusttranslate;
+
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.Statement;
+import japa.parser.ast.stmt.SwitchEntryStmt;
+import japa.parser.ast.stmt.SwitchStmt;
+import japa.parser.ast.visitor.VoidVisitorAdapter;
+
+import java.util.LinkedList;
+import java.util.List;
+
+public class DuplicatingFallThroughRemover extends VoidVisitorAdapter<Object> {
+
+ private static final SwitchBreakAnalyzerVisitor ANALYZER_VISITOR = new SwitchBreakAnalyzerVisitor();
+
+ @Override public void visit(SwitchStmt sw, Object arg) {
+ if ("state".equals(sw.getSelector().toString())) {
+ super.visit(sw, arg);
+ return;
+ }
+
+ List<Statement> tail = new LinkedList<Statement>();
+ tail.add(new BreakStmt());
+
+ List<SwitchEntryStmt> entries = sw.getEntries();
+ for (int i = entries.size() - 1; i >= 0; i--) {
+ SwitchEntryStmt stmt = entries.get(i);
+ List<Statement> list = stmt.getStmts();
+ if (list != null) {
+ if (!(list.size() > 0
+ && list.get(list.size() - 1).accept(ANALYZER_VISITOR, true))) {
+ list.addAll(tail);
+ }
+ tail = list;
+ for (Statement statement : list) {
+ statement.accept(this, arg);
+ }
+ }
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java
new file mode 100644
index 000000000..97ded525f
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java
@@ -0,0 +1,1349 @@
+/*
+ * Copyright (C) 2007 JĂșlio Vilmar Gesser.
+ * Copyright (C) 2012 Mozilla Foundation
+ *
+ * This file is part of Java 1.5 parser and Abstract Syntax Tree.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Java 1.5 parser and Abstract Syntax Tree. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Created on 05/10/2006
+ */
+package nu.validator.htmlparser.rusttranslate;
+
+import japa.parser.ast.BlockComment;
+import japa.parser.ast.CompilationUnit;
+import japa.parser.ast.ImportDeclaration;
+import japa.parser.ast.LineComment;
+import japa.parser.ast.PackageDeclaration;
+import japa.parser.ast.TypeParameter;
+import japa.parser.ast.body.AnnotationDeclaration;
+import japa.parser.ast.body.AnnotationMemberDeclaration;
+import japa.parser.ast.body.BodyDeclaration;
+import japa.parser.ast.body.ClassOrInterfaceDeclaration;
+import japa.parser.ast.body.ConstructorDeclaration;
+import japa.parser.ast.body.EmptyMemberDeclaration;
+import japa.parser.ast.body.EmptyTypeDeclaration;
+import japa.parser.ast.body.EnumConstantDeclaration;
+import japa.parser.ast.body.EnumDeclaration;
+import japa.parser.ast.body.FieldDeclaration;
+import japa.parser.ast.body.InitializerDeclaration;
+import japa.parser.ast.body.JavadocComment;
+import japa.parser.ast.body.MethodDeclaration;
+import japa.parser.ast.body.ModifierSet;
+import japa.parser.ast.body.Parameter;
+import japa.parser.ast.body.TypeDeclaration;
+import japa.parser.ast.body.VariableDeclarator;
+import japa.parser.ast.body.VariableDeclaratorId;
+import japa.parser.ast.expr.AnnotationExpr;
+import japa.parser.ast.expr.ArrayAccessExpr;
+import japa.parser.ast.expr.ArrayCreationExpr;
+import japa.parser.ast.expr.ArrayInitializerExpr;
+import japa.parser.ast.expr.AssignExpr;
+import japa.parser.ast.expr.BinaryExpr;
+import japa.parser.ast.expr.BooleanLiteralExpr;
+import japa.parser.ast.expr.CastExpr;
+import japa.parser.ast.expr.CharLiteralExpr;
+import japa.parser.ast.expr.ClassExpr;
+import japa.parser.ast.expr.ConditionalExpr;
+import japa.parser.ast.expr.DoubleLiteralExpr;
+import japa.parser.ast.expr.EnclosedExpr;
+import japa.parser.ast.expr.Expression;
+import japa.parser.ast.expr.FieldAccessExpr;
+import japa.parser.ast.expr.InstanceOfExpr;
+import japa.parser.ast.expr.IntegerLiteralExpr;
+import japa.parser.ast.expr.IntegerLiteralMinValueExpr;
+import japa.parser.ast.expr.LongLiteralExpr;
+import japa.parser.ast.expr.LongLiteralMinValueExpr;
+import japa.parser.ast.expr.MarkerAnnotationExpr;
+import japa.parser.ast.expr.MemberValuePair;
+import japa.parser.ast.expr.MethodCallExpr;
+import japa.parser.ast.expr.NameExpr;
+import japa.parser.ast.expr.NormalAnnotationExpr;
+import japa.parser.ast.expr.NullLiteralExpr;
+import japa.parser.ast.expr.ObjectCreationExpr;
+import japa.parser.ast.expr.QualifiedNameExpr;
+import japa.parser.ast.expr.SingleMemberAnnotationExpr;
+import japa.parser.ast.expr.StringLiteralExpr;
+import japa.parser.ast.expr.SuperExpr;
+import japa.parser.ast.expr.ThisExpr;
+import japa.parser.ast.expr.UnaryExpr;
+import japa.parser.ast.expr.VariableDeclarationExpr;
+import japa.parser.ast.stmt.AssertStmt;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.CatchClause;
+import japa.parser.ast.stmt.ContinueStmt;
+import japa.parser.ast.stmt.DoStmt;
+import japa.parser.ast.stmt.EmptyStmt;
+import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt;
+import japa.parser.ast.stmt.ExpressionStmt;
+import japa.parser.ast.stmt.ForStmt;
+import japa.parser.ast.stmt.ForeachStmt;
+import japa.parser.ast.stmt.IfStmt;
+import japa.parser.ast.stmt.LabeledStmt;
+import japa.parser.ast.stmt.ReturnStmt;
+import japa.parser.ast.stmt.Statement;
+import japa.parser.ast.stmt.SwitchEntryStmt;
+import japa.parser.ast.stmt.SwitchStmt;
+import japa.parser.ast.stmt.SynchronizedStmt;
+import japa.parser.ast.stmt.ThrowStmt;
+import japa.parser.ast.stmt.TryStmt;
+import japa.parser.ast.stmt.TypeDeclarationStmt;
+import japa.parser.ast.stmt.WhileStmt;
+import japa.parser.ast.type.ClassOrInterfaceType;
+import japa.parser.ast.type.PrimitiveType;
+import japa.parser.ast.type.ReferenceType;
+import japa.parser.ast.type.Type;
+import japa.parser.ast.type.VoidType;
+import japa.parser.ast.type.WildcardType;
+import japa.parser.ast.visitor.VoidVisitor;
+
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * @author Julio Vilmar Gesser
+ * @author Henri Sivonen
+ */
+
+public final class JavaVisitor implements VoidVisitor<Object> {
+
+ private static class SourcePrinter {
+
+ private int level = 0;
+
+ private boolean indented = false;
+
+ private final StringBuilder buf = new StringBuilder();
+
+ public void indent() {
+ level++;
+ }
+
+ public void unindent() {
+ level--;
+ }
+
+ private void makeIndent() {
+ for (int i = 0; i < level; i++) {
+ buf.append(" ");
+ }
+ }
+
+ public void print(String arg) {
+ if (!indented) {
+ makeIndent();
+ indented = true;
+ }
+ buf.append(arg);
+ }
+
+ public void printLn(String arg) {
+ print(arg);
+ printLn();
+ }
+
+ public void printLn() {
+ buf.append("\n");
+ indented = false;
+ }
+
+ public String getSource() {
+ return buf.toString();
+ }
+
+ @Override
+ public String toString() {
+ return getSource();
+ }
+ }
+
+ private final SourcePrinter printer = new SourcePrinter();
+
+ public String getSource() {
+ return printer.getSource();
+ }
+
+ private void printModifiers(int modifiers) {
+ if (ModifierSet.isPrivate(modifiers)) {
+ printer.print("private ");
+ }
+ if (ModifierSet.isProtected(modifiers)) {
+ printer.print("protected ");
+ }
+ if (ModifierSet.isPublic(modifiers)) {
+ printer.print("public ");
+ }
+ if (ModifierSet.isAbstract(modifiers)) {
+ printer.print("abstract ");
+ }
+ if (ModifierSet.isStatic(modifiers)) {
+ printer.print("static ");
+ }
+ if (ModifierSet.isFinal(modifiers)) {
+ printer.print("final ");
+ }
+ if (ModifierSet.isNative(modifiers)) {
+ printer.print("native ");
+ }
+ if (ModifierSet.isStrictfp(modifiers)) {
+ printer.print("strictfp ");
+ }
+ if (ModifierSet.isSynchronized(modifiers)) {
+ printer.print("synchronized ");
+ }
+ if (ModifierSet.isTransient(modifiers)) {
+ printer.print("transient ");
+ }
+ if (ModifierSet.isVolatile(modifiers)) {
+ printer.print("volatile ");
+ }
+ }
+
+ private void printMembers(List<BodyDeclaration> members, Object arg) {
+ for (BodyDeclaration member : members) {
+ printer.printLn();
+ member.accept(this, arg);
+ printer.printLn();
+ }
+ }
+
+ private void printMemberAnnotations(List<AnnotationExpr> annotations, Object arg) {
+ if (annotations != null) {
+ for (AnnotationExpr a : annotations) {
+ a.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ }
+
+ private void printAnnotations(List<AnnotationExpr> annotations, Object arg) {
+ if (annotations != null) {
+ for (AnnotationExpr a : annotations) {
+ a.accept(this, arg);
+ printer.print(" ");
+ }
+ }
+ }
+
+ private void printTypeArgs(List<Type> args, Object arg) {
+ if (args != null) {
+ printer.print("<");
+ for (Iterator<Type> i = args.iterator(); i.hasNext();) {
+ Type t = i.next();
+ t.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ printer.print(">");
+ }
+ }
+
+ private void printTypeParameters(List<TypeParameter> args, Object arg) {
+ if (args != null) {
+ printer.print("<");
+ for (Iterator<TypeParameter> i = args.iterator(); i.hasNext();) {
+ TypeParameter t = i.next();
+ t.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ printer.print(">");
+ }
+ }
+
+ private void printArguments(List<Expression> args, Object arg) {
+ printer.print("(");
+ if (args != null) {
+ for (Iterator<Expression> i = args.iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+ }
+
+ private void printJavadoc(JavadocComment javadoc, Object arg) {
+ if (javadoc != null) {
+ javadoc.accept(this, arg);
+ }
+ }
+
+ public void visit(CompilationUnit n, Object arg) {
+ if (n.getPackage() != null) {
+ n.getPackage().accept(this, arg);
+ }
+ if (n.getImports() != null) {
+ for (ImportDeclaration i : n.getImports()) {
+ i.accept(this, arg);
+ }
+ printer.printLn();
+ }
+ if (n.getTypes() != null) {
+ for (Iterator<TypeDeclaration> i = n.getTypes().iterator(); i.hasNext();) {
+ i.next().accept(this, arg);
+ printer.printLn();
+ if (i.hasNext()) {
+ printer.printLn();
+ }
+ }
+ }
+ }
+
+ public void visit(PackageDeclaration n, Object arg) {
+ printAnnotations(n.getAnnotations(), arg);
+ printer.print("package ");
+ n.getName().accept(this, arg);
+ printer.printLn(";");
+ printer.printLn();
+ }
+
+ public void visit(NameExpr n, Object arg) {
+ printer.print(n.getName());
+ }
+
+ public void visit(QualifiedNameExpr n, Object arg) {
+ n.getQualifier().accept(this, arg);
+ printer.print(".");
+ printer.print(n.getName());
+ }
+
+ public void visit(ImportDeclaration n, Object arg) {
+ printer.print("import ");
+ if (n.isStatic()) {
+ printer.print("static ");
+ }
+ n.getName().accept(this, arg);
+ if (n.isAsterisk()) {
+ printer.print(".*");
+ }
+ printer.printLn(";");
+ }
+
+ public void visit(ClassOrInterfaceDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ if (n.isInterface()) {
+ printer.print("interface ");
+ } else {
+ printer.print("class ");
+ }
+
+ printer.print(n.getName());
+
+ printTypeParameters(n.getTypeParameters(), arg);
+
+ if (n.getExtends() != null) {
+ printer.print(" extends ");
+ for (Iterator<ClassOrInterfaceType> i = n.getExtends().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+
+ if (n.getImplements() != null) {
+ printer.print(" implements ");
+ for (Iterator<ClassOrInterfaceType> i = n.getImplements().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getMembers() != null) {
+ printMembers(n.getMembers(), arg);
+ }
+ printer.unindent();
+ printer.print("}");
+ }
+
+ public void visit(EmptyTypeDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printer.print(";");
+ }
+
+ public void visit(JavadocComment n, Object arg) {
+ printer.print("/**");
+ printer.print(n.getContent());
+ printer.printLn("*/");
+ }
+
+ public void visit(ClassOrInterfaceType n, Object arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print(n.getName());
+ printTypeArgs(n.getTypeArgs(), arg);
+ }
+
+ public void visit(TypeParameter n, Object arg) {
+ printer.print(n.getName());
+ if (n.getTypeBound() != null) {
+ printer.print(" extends ");
+ for (Iterator<ClassOrInterfaceType> i = n.getTypeBound().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(" & ");
+ }
+ }
+ }
+ }
+
+ public void visit(PrimitiveType n, Object arg) {
+ switch (n.getType()) {
+ case Boolean:
+ printer.print("boolean");
+ break;
+ case Byte:
+ printer.print("byte");
+ break;
+ case Char:
+ printer.print("char");
+ break;
+ case Double:
+ printer.print("double");
+ break;
+ case Float:
+ printer.print("float");
+ break;
+ case Int:
+ printer.print("int");
+ break;
+ case Long:
+ printer.print("long");
+ break;
+ case Short:
+ printer.print("short");
+ break;
+ }
+ }
+
+ public void visit(ReferenceType n, Object arg) {
+ n.getType().accept(this, arg);
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+ }
+
+ public void visit(WildcardType n, Object arg) {
+ printer.print("?");
+ if (n.getExtends() != null) {
+ printer.print(" extends ");
+ n.getExtends().accept(this, arg);
+ }
+ if (n.getSuper() != null) {
+ printer.print(" super ");
+ n.getSuper().accept(this, arg);
+ }
+ }
+
+ public void visit(FieldDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+ n.getType().accept(this, arg);
+
+ printer.print(" ");
+ for (Iterator<VariableDeclarator> i = n.getVariables().iterator(); i.hasNext();) {
+ VariableDeclarator var = i.next();
+ var.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+
+ printer.print(";");
+ }
+
+ public void visit(VariableDeclarator n, Object arg) {
+ n.getId().accept(this, arg);
+ if (n.getInit() != null) {
+ printer.print(" = ");
+ n.getInit().accept(this, arg);
+ }
+ }
+
+ public void visit(VariableDeclaratorId n, Object arg) {
+ printer.print(n.getName());
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+ }
+
+ public void visit(ArrayInitializerExpr n, Object arg) {
+ printer.print("{");
+ if (n.getValues() != null) {
+ printer.print(" ");
+ for (Iterator<Expression> i = n.getValues().iterator(); i.hasNext();) {
+ Expression expr = i.next();
+ expr.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ printer.print(" ");
+ }
+ printer.print("}");
+ }
+
+ public void visit(VoidType n, Object arg) {
+ printer.print("void");
+ }
+
+ public void visit(ArrayAccessExpr n, Object arg) {
+ n.getName().accept(this, arg);
+ printer.print("[");
+ n.getIndex().accept(this, arg);
+ printer.print("]");
+ }
+
+ public void visit(ArrayCreationExpr n, Object arg) {
+ printer.print("new ");
+ n.getType().accept(this, arg);
+
+ if (n.getDimensions() != null) {
+ for (Expression dim : n.getDimensions()) {
+ printer.print("[");
+ dim.accept(this, arg);
+ printer.print("]");
+ }
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+ } else {
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+ printer.print(" ");
+ n.getInitializer().accept(this, arg);
+ }
+ }
+
+ public void visit(AssignExpr n, Object arg) {
+ n.getTarget().accept(this, arg);
+ printer.print(" ");
+ switch (n.getOperator()) {
+ case assign:
+ printer.print("=");
+ break;
+ case and:
+ printer.print("&=");
+ break;
+ case or:
+ printer.print("|=");
+ break;
+ case xor:
+ printer.print("^=");
+ break;
+ case plus:
+ printer.print("+=");
+ break;
+ case minus:
+ printer.print("-=");
+ break;
+ case rem:
+ printer.print("%=");
+ break;
+ case slash:
+ printer.print("/=");
+ break;
+ case star:
+ printer.print("*=");
+ break;
+ case lShift:
+ printer.print("<<=");
+ break;
+ case rSignedShift:
+ printer.print(">>=");
+ break;
+ case rUnsignedShift:
+ printer.print(">>>=");
+ break;
+ }
+ printer.print(" ");
+ n.getValue().accept(this, arg);
+ }
+
+ public void visit(BinaryExpr n, Object arg) {
+ n.getLeft().accept(this, arg);
+ printer.print(" ");
+ switch (n.getOperator()) {
+ case or:
+ printer.print("||");
+ break;
+ case and:
+ printer.print("&&");
+ break;
+ case binOr:
+ printer.print("|");
+ break;
+ case binAnd:
+ printer.print("&");
+ break;
+ case xor:
+ printer.print("^");
+ break;
+ case equals:
+ printer.print("==");
+ break;
+ case notEquals:
+ printer.print("!=");
+ break;
+ case less:
+ printer.print("<");
+ break;
+ case greater:
+ printer.print(">");
+ break;
+ case lessEquals:
+ printer.print("<=");
+ break;
+ case greaterEquals:
+ printer.print(">=");
+ break;
+ case lShift:
+ printer.print("<<");
+ break;
+ case rSignedShift:
+ printer.print(">>");
+ break;
+ case rUnsignedShift:
+ printer.print(">>>");
+ break;
+ case plus:
+ printer.print("+");
+ break;
+ case minus:
+ printer.print("-");
+ break;
+ case times:
+ printer.print("*");
+ break;
+ case divide:
+ printer.print("/");
+ break;
+ case remainder:
+ printer.print("%");
+ break;
+ }
+ printer.print(" ");
+ n.getRight().accept(this, arg);
+ }
+
+ public void visit(CastExpr n, Object arg) {
+ printer.print("(");
+ n.getType().accept(this, arg);
+ printer.print(") ");
+ n.getExpr().accept(this, arg);
+ }
+
+ public void visit(ClassExpr n, Object arg) {
+ n.getType().accept(this, arg);
+ printer.print(".class");
+ }
+
+ public void visit(ConditionalExpr n, Object arg) {
+ n.getCondition().accept(this, arg);
+ printer.print(" ? ");
+ n.getThenExpr().accept(this, arg);
+ printer.print(" : ");
+ n.getElseExpr().accept(this, arg);
+ }
+
+ public void visit(EnclosedExpr n, Object arg) {
+ printer.print("(");
+ n.getInner().accept(this, arg);
+ printer.print(")");
+ }
+
+ public void visit(FieldAccessExpr n, Object arg) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ printer.print(n.getField());
+ }
+
+ public void visit(InstanceOfExpr n, Object arg) {
+ n.getExpr().accept(this, arg);
+ printer.print(" instanceof ");
+ n.getType().accept(this, arg);
+ }
+
+ public void visit(CharLiteralExpr n, Object arg) {
+ printer.print("'");
+ char c = n.getValue().charAt(0);
+ switch (c) {
+ case '\b':
+ printer.print("\\b");
+ break;
+ case '\t':
+ printer.print("\\t");
+ break;
+ case '\n':
+ printer.print("\\n");
+ break;
+ case '\f':
+ printer.print("\\f");
+ break;
+ case '\r':
+ printer.print("\\r");
+ break;
+ case '\'':
+ printer.print("\\'");
+ break;
+ case '\\':
+ printer.print(n.getValue());
+ break;
+ default:
+ if (c < ' ' || c > '~') {
+ String hex = Integer.toHexString(c);
+ switch (hex.length()) {
+ case 1:
+ printer.print("\\u000"+hex);
+ break;
+ case 2:
+ printer.print("\\u00"+hex);
+ break;
+ case 3:
+ printer.print("\\u0"+hex);
+ break;
+ case 4:
+ printer.print("\\u"+hex);
+ break;
+ }
+ } else {
+ printer.print(""+c);
+ }
+ break;
+ }
+ printer.print("'");
+ }
+
+ public void visit(DoubleLiteralExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(IntegerLiteralExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(LongLiteralExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(IntegerLiteralMinValueExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(LongLiteralMinValueExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(StringLiteralExpr n, Object arg) {
+ printer.print("\"");
+ printer.print(n.getValue());
+ printer.print("\"");
+ }
+
+ public void visit(BooleanLiteralExpr n, Object arg) {
+ printer.print(String.valueOf(n.getValue()));
+ }
+
+ public void visit(NullLiteralExpr n, Object arg) {
+ printer.print("null");
+ }
+
+ public void visit(ThisExpr n, Object arg) {
+ if (n.getClassExpr() != null) {
+ n.getClassExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print("this");
+ }
+
+ public void visit(SuperExpr n, Object arg) {
+ if (n.getClassExpr() != null) {
+ n.getClassExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print("super");
+ }
+
+ public void visit(MethodCallExpr n, Object arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ }
+ printTypeArgs(n.getTypeArgs(), arg);
+ printer.print(n.getName());
+ printArguments(n.getArgs(), arg);
+ }
+
+ public void visit(ObjectCreationExpr n, Object arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ }
+
+ printer.print("new ");
+
+ printTypeArgs(n.getTypeArgs(), arg);
+ n.getType().accept(this, arg);
+
+ printArguments(n.getArgs(), arg);
+
+ if (n.getAnonymousClassBody() != null) {
+ printer.printLn(" {");
+ printer.indent();
+ printMembers(n.getAnonymousClassBody(), arg);
+ printer.unindent();
+ printer.print("}");
+ }
+ }
+
+ public void visit(UnaryExpr n, Object arg) {
+ switch (n.getOperator()) {
+ case positive:
+ printer.print("+");
+ break;
+ case negative:
+ printer.print("-");
+ break;
+ case inverse:
+ printer.print("~");
+ break;
+ case not:
+ printer.print("!");
+ break;
+ case preIncrement:
+ printer.print("++");
+ break;
+ case preDecrement:
+ printer.print("--");
+ break;
+ }
+
+ n.getExpr().accept(this, arg);
+
+ switch (n.getOperator()) {
+ case posIncrement:
+ printer.print("++");
+ break;
+ case posDecrement:
+ printer.print("--");
+ break;
+ }
+ }
+
+ public void visit(ConstructorDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ printTypeParameters(n.getTypeParameters(), arg);
+ if (n.getTypeParameters() != null) {
+ printer.print(" ");
+ }
+ printer.print(n.getName());
+
+ printer.print("(");
+ if (n.getParameters() != null) {
+ for (Iterator<Parameter> i = n.getParameters().iterator(); i.hasNext();) {
+ Parameter p = i.next();
+ p.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+
+ if (n.getThrows() != null) {
+ printer.print(" throws ");
+ for (Iterator<NameExpr> i = n.getThrows().iterator(); i.hasNext();) {
+ NameExpr name = i.next();
+ name.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(" ");
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(MethodDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ printTypeParameters(n.getTypeParameters(), arg);
+ if (n.getTypeParameters() != null) {
+ printer.print(" ");
+ }
+
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ printer.print(n.getName());
+
+ printer.print("(");
+ if (n.getParameters() != null) {
+ for (Iterator<Parameter> i = n.getParameters().iterator(); i.hasNext();) {
+ Parameter p = i.next();
+ p.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+
+ if (n.getThrows() != null) {
+ printer.print(" throws ");
+ for (Iterator<NameExpr> i = n.getThrows().iterator(); i.hasNext();) {
+ NameExpr name = i.next();
+ name.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ if (n.getBody() == null) {
+ printer.print(";");
+ } else {
+ printer.print(" ");
+ n.getBody().accept(this, arg);
+ }
+ }
+
+ public void visit(Parameter n, Object arg) {
+ printAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ n.getType().accept(this, arg);
+ if (n.isVarArgs()) {
+ printer.print("...");
+ }
+ printer.print(" ");
+ n.getId().accept(this, arg);
+ }
+
+ public void visit(ExplicitConstructorInvocationStmt n, Object arg) {
+ if (n.isThis()) {
+ printTypeArgs(n.getTypeArgs(), arg);
+ printer.print("this");
+ } else {
+ if (n.getExpr() != null) {
+ n.getExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printTypeArgs(n.getTypeArgs(), arg);
+ printer.print("super");
+ }
+ printArguments(n.getArgs(), arg);
+ printer.print(";");
+ }
+
+ public void visit(VariableDeclarationExpr n, Object arg) {
+ printAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ n.getType().accept(this, arg);
+ printer.print(" ");
+
+ for (Iterator<VariableDeclarator> i = n.getVars().iterator(); i.hasNext();) {
+ VariableDeclarator v = i.next();
+ v.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+
+ public void visit(TypeDeclarationStmt n, Object arg) {
+ n.getTypeDeclaration().accept(this, arg);
+ }
+
+ public void visit(AssertStmt n, Object arg) {
+ printer.print("assert ");
+ n.getCheck().accept(this, arg);
+ if (n.getMessage() != null) {
+ printer.print(" : ");
+ n.getMessage().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(BlockStmt n, Object arg) {
+ printer.printLn("{");
+ if (n.getStmts() != null) {
+ printer.indent();
+ for (Statement s : n.getStmts()) {
+ s.accept(this, arg);
+ printer.printLn();
+ }
+ printer.unindent();
+ }
+ printer.print("}");
+
+ }
+
+ public void visit(LabeledStmt n, Object arg) {
+ printer.print(n.getLabel());
+ printer.print(": ");
+ n.getStmt().accept(this, arg);
+ }
+
+ public void visit(EmptyStmt n, Object arg) {
+ printer.print(";");
+ }
+
+ public void visit(ExpressionStmt n, Object arg) {
+ n.getExpression().accept(this, arg);
+ printer.print(";");
+ }
+
+ public void visit(SwitchStmt n, Object arg) {
+ printer.print("switch(");
+ n.getSelector().accept(this, arg);
+ printer.printLn(") {");
+ if (n.getEntries() != null) {
+ printer.indent();
+ for (SwitchEntryStmt e : n.getEntries()) {
+ e.accept(this, arg);
+ }
+ printer.unindent();
+ }
+ printer.print("}");
+
+ }
+
+ public void visit(SwitchEntryStmt n, Object arg) {
+ if (n.getLabel() != null) {
+ printer.print("case ");
+ n.getLabel().accept(this, arg);
+ printer.print(":");
+ } else {
+ printer.print("default:");
+ }
+ printer.printLn();
+ printer.indent();
+ if (n.getStmts() != null) {
+ for (Statement s : n.getStmts()) {
+ s.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ printer.unindent();
+ }
+
+ public void visit(BreakStmt n, Object arg) {
+ printer.print("break");
+ if (n.getId() != null) {
+ printer.print(" ");
+ printer.print(n.getId());
+ }
+ printer.print(";");
+ }
+
+ public void visit(ReturnStmt n, Object arg) {
+ printer.print("return");
+ if (n.getExpr() != null) {
+ printer.print(" ");
+ n.getExpr().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(EnumDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ printer.print("enum ");
+ printer.print(n.getName());
+
+ if (n.getImplements() != null) {
+ printer.print(" implements ");
+ for (Iterator<ClassOrInterfaceType> i = n.getImplements().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getEntries() != null) {
+ printer.printLn();
+ for (Iterator<EnumConstantDeclaration> i = n.getEntries().iterator(); i.hasNext();) {
+ EnumConstantDeclaration e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ if (n.getMembers() != null) {
+ printer.printLn(";");
+ printMembers(n.getMembers(), arg);
+ } else {
+ if (n.getEntries() != null) {
+ printer.printLn();
+ }
+ }
+ printer.unindent();
+ printer.print("}");
+ }
+
+ public void visit(EnumConstantDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printer.print(n.getName());
+
+ if (n.getArgs() != null) {
+ printArguments(n.getArgs(), arg);
+ }
+
+ if (n.getClassBody() != null) {
+ printer.printLn(" {");
+ printer.indent();
+ printMembers(n.getClassBody(), arg);
+ printer.unindent();
+ printer.printLn("}");
+ }
+ }
+
+ public void visit(EmptyMemberDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printer.print(";");
+ }
+
+ public void visit(InitializerDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ if (n.isStatic()) {
+ printer.print("static ");
+ }
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(IfStmt n, Object arg) {
+ printer.print("if (");
+ n.getCondition().accept(this, arg);
+ printer.print(") ");
+ n.getThenStmt().accept(this, arg);
+ if (n.getElseStmt() != null) {
+ printer.print(" else ");
+ n.getElseStmt().accept(this, arg);
+ }
+ }
+
+ public void visit(WhileStmt n, Object arg) {
+ printer.print("while (");
+ n.getCondition().accept(this, arg);
+ printer.print(") ");
+ n.getBody().accept(this, arg);
+ }
+
+ public void visit(ContinueStmt n, Object arg) {
+ printer.print("continue");
+ if (n.getId() != null) {
+ printer.print(" ");
+ printer.print(n.getId());
+ }
+ printer.print(";");
+ }
+
+ public void visit(DoStmt n, Object arg) {
+ printer.print("do ");
+ n.getBody().accept(this, arg);
+ printer.print(" while (");
+ n.getCondition().accept(this, arg);
+ printer.print(");");
+ }
+
+ public void visit(ForeachStmt n, Object arg) {
+ printer.print("for (");
+ n.getVariable().accept(this, arg);
+ printer.print(" : ");
+ n.getIterable().accept(this, arg);
+ printer.print(") ");
+ n.getBody().accept(this, arg);
+ }
+
+ public void visit(ForStmt n, Object arg) {
+ printer.print("for (");
+ if (n.getInit() != null) {
+ for (Iterator<Expression> i = n.getInit().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print("; ");
+ if (n.getCompare() != null) {
+ n.getCompare().accept(this, arg);
+ }
+ printer.print("; ");
+ if (n.getUpdate() != null) {
+ for (Iterator<Expression> i = n.getUpdate().iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(") ");
+ n.getBody().accept(this, arg);
+ }
+
+ public void visit(ThrowStmt n, Object arg) {
+ printer.print("throw ");
+ n.getExpr().accept(this, arg);
+ printer.print(";");
+ }
+
+ public void visit(SynchronizedStmt n, Object arg) {
+ printer.print("synchronized (");
+ n.getExpr().accept(this, arg);
+ printer.print(") ");
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(TryStmt n, Object arg) {
+ printer.print("try ");
+ n.getTryBlock().accept(this, arg);
+ if (n.getCatchs() != null) {
+ for (CatchClause c : n.getCatchs()) {
+ c.accept(this, arg);
+ }
+ }
+ if (n.getFinallyBlock() != null) {
+ printer.print(" finally ");
+ n.getFinallyBlock().accept(this, arg);
+ }
+ }
+
+ public void visit(CatchClause n, Object arg) {
+ printer.print(" catch (");
+ n.getExcept().accept(this, arg);
+ printer.print(") ");
+ n.getCatchBlock().accept(this, arg);
+
+ }
+
+ public void visit(AnnotationDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ printer.print("@interface ");
+ printer.print(n.getName());
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getMembers() != null) {
+ printMembers(n.getMembers(), arg);
+ }
+ printer.unindent();
+ printer.print("}");
+ }
+
+ public void visit(AnnotationMemberDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ n.getType().accept(this, arg);
+ printer.print(" ");
+ printer.print(n.getName());
+ printer.print("()");
+ if (n.getDefaultValue() != null) {
+ printer.print(" default ");
+ n.getDefaultValue().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(MarkerAnnotationExpr n, Object arg) {
+ printer.print("@");
+ n.getName().accept(this, arg);
+ }
+
+ public void visit(SingleMemberAnnotationExpr n, Object arg) {
+ printer.print("@");
+ n.getName().accept(this, arg);
+ printer.print("(");
+ n.getMemberValue().accept(this, arg);
+ printer.print(")");
+ }
+
+ public void visit(NormalAnnotationExpr n, Object arg) {
+ printer.print("@");
+ n.getName().accept(this, arg);
+ printer.print("(");
+ if (n.getPairs() != null) {
+ for (Iterator<MemberValuePair> i = n.getPairs().iterator(); i.hasNext();) {
+ MemberValuePair m = i.next();
+ m.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+ }
+
+ public void visit(MemberValuePair n, Object arg) {
+ printer.print(n.getName());
+ printer.print(" = ");
+ n.getValue().accept(this, arg);
+ }
+
+ public void visit(LineComment n, Object arg) {
+ printer.print("//");
+ printer.printLn(n.getContent());
+ }
+
+ public void visit(BlockComment n, Object arg) {
+ printer.print("/*");
+ printer.print(n.getContent());
+ printer.printLn("*/");
+ }
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java
new file mode 100644
index 000000000..384716e0b
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2008 JĂșlio Vilmar Gesser.
+ * Copyright (C) 2012 Mozilla Foundation
+ *
+ * This file is part of Java 1.5 parser and Abstract Syntax Tree.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Java 1.5 parser and Abstract Syntax Tree. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Created on 09/06/2008
+ */
+package nu.validator.htmlparser.rusttranslate;
+
+import japa.parser.ast.stmt.AssertStmt;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.CatchClause;
+import japa.parser.ast.stmt.ContinueStmt;
+import japa.parser.ast.stmt.DoStmt;
+import japa.parser.ast.stmt.EmptyStmt;
+import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt;
+import japa.parser.ast.stmt.ExpressionStmt;
+import japa.parser.ast.stmt.ForStmt;
+import japa.parser.ast.stmt.ForeachStmt;
+import japa.parser.ast.stmt.IfStmt;
+import japa.parser.ast.stmt.LabeledStmt;
+import japa.parser.ast.stmt.ReturnStmt;
+import japa.parser.ast.stmt.Statement;
+import japa.parser.ast.stmt.SwitchEntryStmt;
+import japa.parser.ast.stmt.SwitchStmt;
+import japa.parser.ast.stmt.SynchronizedStmt;
+import japa.parser.ast.stmt.ThrowStmt;
+import japa.parser.ast.stmt.TryStmt;
+import japa.parser.ast.stmt.TypeDeclarationStmt;
+import japa.parser.ast.stmt.WhileStmt;
+import japa.parser.ast.type.WildcardType;
+import japa.parser.ast.visitor.GenericVisitorAdapter;
+
+import java.util.List;
+
+/**
+ * @author Julio Vilmar Gesser
+ * @author Henri Sivonen
+ */
+public class LoopBreakAnalyzerVisitor extends GenericVisitorAdapter<Boolean, Boolean> {
+
+ public Boolean visit(AssertStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(BlockStmt n, Boolean arg) {
+ for (Statement stmt : n.getStmts()) {
+ if (stmt.accept(this, arg)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public Boolean visit(BreakStmt n, Boolean arg) {
+ return n.getId() != null;
+ }
+
+ public Boolean visit(CatchClause n, Boolean arg) {
+ return n.getCatchBlock().accept(this, arg);
+ }
+
+ public Boolean visit(ContinueStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(DoStmt n, Boolean arg) {
+ return n.getBody().accept(this, arg);
+ }
+
+ public Boolean visit(EmptyStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(ExplicitConstructorInvocationStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(ExpressionStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(ForeachStmt n, Boolean arg) {
+ return n.getBody().accept(this, arg);
+ }
+
+ public Boolean visit(ForStmt n, Boolean arg) {
+ //bogus
+ return false;
+ }
+
+ public Boolean visit(IfStmt n, Boolean arg) {
+ if (n.getElseStmt() != null) {
+ if (n.getElseStmt().accept(this, arg)) {
+ return true;
+ }
+ }
+ if (n.getThenStmt().accept(this, arg)) {
+ return true;
+ }
+ return false;
+ }
+
+ public Boolean visit(LabeledStmt n, Boolean arg) {
+ return n.getStmt().accept(this, arg);
+ }
+
+ public Boolean visit(ReturnStmt n, Boolean arg) {
+ return true;
+ }
+
+ public Boolean visit(SwitchEntryStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(SwitchStmt n, Boolean arg) {
+ /*
+ List<SwitchEntryStmt> entries = n.getEntries();
+ for (int i = 0; i < array.length; i++) {
+ array_type array_element = array[i];
+
+ }
+ */
+ return true;
+ }
+
+ public Boolean visit(SynchronizedStmt n, Boolean arg) {
+ return n.getBlock().accept(this, arg);
+ }
+
+ public Boolean visit(ThrowStmt n, Boolean arg) {
+ return true;
+ }
+
+ public Boolean visit(TryStmt n, Boolean arg) {
+ if (n.getFinallyBlock() != null) {
+ return n.getFinallyBlock().accept(this, arg);
+ }
+ if (n.getCatchs() != null) {
+ for (CatchClause c : n.getCatchs()) {
+ boolean brk = c.accept(this, arg);
+ if (!brk) {
+ return false;
+ }
+ }
+ }
+ return n.getTryBlock().accept(this, arg);
+ }
+
+ public Boolean visit(TypeDeclarationStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(WhileStmt n, Boolean arg) {
+ return n.getBody().accept(this, arg);
+ }
+
+ public Boolean visit(WildcardType n, Boolean arg) {
+ if (n.getExtends() != null) {
+ n.getExtends().accept(this, arg);
+ }
+ if (n.getSuper() != null) {
+ n.getSuper().accept(this, arg);
+ }
+ return null;
+ }
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java
new file mode 100644
index 000000000..4e1b0a7dd
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java
@@ -0,0 +1,144 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser Rust Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+package nu.validator.htmlparser.rusttranslate;
+
+import japa.parser.JavaParser;
+import japa.parser.ParseException;
+import japa.parser.ast.CompilationUnit;
+import japa.parser.ast.visitor.DumpVisitor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+
+import nu.validator.htmlparser.cpptranslate.CppOnlyInputStream;
+import nu.validator.htmlparser.cpptranslate.LicenseExtractor;
+import nu.validator.htmlparser.cpptranslate.NoCppInputStream;
+
+public class Main {
+
+ private static final String[] CLASSLIST = {
+ "Tokenizer",
+ "TreeBuilder",
+ "MetaScanner",
+ "AttributeName",
+ "ElementName",
+ "HtmlAttributes",
+ "StackNode",
+ "UTF16Buffer",
+ "StateSnapshot",
+ };
+
+ /**
+ * @param args
+ * @throws ParseException
+ * @throws IOException
+ */
+ public static void main(String[] args) throws ParseException, IOException {
+ File javaDirectory = new File(args[0]);
+ File targetDirectory = new File(args[1]);
+
+ for (int i = 0; i < CLASSLIST.length; i++) {
+ parseFile(javaDirectory, targetDirectory, CLASSLIST[i], ".java");
+ }
+ }
+
+ private static void parseFile(File javaDirectory,
+ File targetDirectory, String className, String fne)
+ throws FileNotFoundException, UnsupportedEncodingException,
+ IOException {
+ File file = null;
+// try {
+// file = new File(javaDirectory, className + ".java");
+// String license = new LicenseExtractor(file).extract();
+// CompilationUnit cu = JavaParser.parse(new FileInputStream(file), "utf-8");
+//
+// ModeFallThroughRemover mftr = new ModeFallThroughRemover();
+// cu.accept(mftr, null);
+//
+// DuplicatingFallThroughRemover dftr = new DuplicatingFallThroughRemover();
+// cu.accept(dftr, null);
+//
+// JavaVisitor visitor = new JavaVisitor();
+// cu.accept(visitor, null);
+// FileOutputStream out = new FileOutputStream(new File(targetDirectory,
+// className + fne));
+// OutputStreamWriter w = new OutputStreamWriter(out, "utf-8");
+// w.write(license);
+// w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit "
+// + className + ".java instead and regenerate.\n */\n\n");
+// w.write(visitor.getSource());
+// w.close();
+// } catch (ParseException e) {
+// System.err.println(file);
+// e.printStackTrace();
+// }
+ try {
+ file = new File(javaDirectory, className + ".java");
+ String license = new LicenseExtractor(file).extract();
+ CompilationUnit cu = JavaParser.parse(new NoCppInputStream(
+ new CppOnlyInputStream(new FileInputStream(file))), "utf-8");
+
+ ModeFallThroughRemover mftr = new ModeFallThroughRemover();
+ cu.accept(mftr, null);
+
+ DuplicatingFallThroughRemover dftr = new DuplicatingFallThroughRemover();
+ cu.accept(dftr, null);
+
+ RustVisitor visitor = new RustVisitor();
+ cu.accept(visitor, null);
+ FileOutputStream out = new FileOutputStream(new File(targetDirectory,
+ className + ".rs"));
+ OutputStreamWriter w = new OutputStreamWriter(out, "utf-8");
+ w.write(license);
+ w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit "
+ + className + ".java instead and regenerate.\n */\n\n");
+ w.write(visitor.getSource());
+ w.close();
+ } catch (ParseException e) {
+ System.err.println(file);
+ e.printStackTrace();
+ }
+ }
+
+}
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java
new file mode 100644
index 000000000..a89926748
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2008 JĂșlio Vilmar Gesser.
+ * Copyright (C) 2012 Mozilla Foundation
+ *
+ * This file is part of Java 1.5 parser and Abstract Syntax Tree.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Java 1.5 parser and Abstract Syntax Tree. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Created on 09/06/2008
+ */
+package nu.validator.htmlparser.rusttranslate;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import japa.parser.ast.body.MethodDeclaration;
+import japa.parser.ast.expr.BinaryExpr;
+import japa.parser.ast.expr.BinaryExpr.Operator;
+import japa.parser.ast.expr.Expression;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.IfStmt;
+import japa.parser.ast.stmt.Statement;
+import japa.parser.ast.stmt.SwitchEntryStmt;
+import japa.parser.ast.stmt.SwitchStmt;
+import japa.parser.ast.visitor.VoidVisitorAdapter;
+
+/**
+ * @author Julio Vilmar Gesser
+ * @author Henri Sivonen
+ */
+public class ModeFallThroughRemover extends VoidVisitorAdapter<Object> {
+
+ private String method;
+
+ public void visit(BlockStmt n, Object arg) {
+ if (!("startTag".equals(method) || "endTag".equals(method))) {
+ super.visit(n, arg);
+ return;
+ }
+ List<Statement> list = n.getStmts();
+ if (list != null) {
+ for (int i = 0; i < list.size(); i++) {
+ Statement s = list.get(i);
+ if (s instanceof SwitchStmt) {
+ SwitchStmt sw = (SwitchStmt) s;
+ if ("mode".equals(sw.getSelector().toString())) {
+ list.remove(i);
+ int j = 0;
+ for (SwitchEntryStmt entry : sw.getEntries()) {
+ List<Statement> statements = entry.getStmts();
+ if (statements == null) {
+ continue;
+ }
+ Statement last = statements.get(statements.size() - 1);
+ if (last instanceof BreakStmt) {
+ BreakStmt brk = (BreakStmt) last;
+ if (brk.getId() == null) {
+ statements.remove(last);
+ }
+ }
+ Statement stm;
+ Expression label = entry.getLabel();
+ if (label == null) {
+ stm = new BlockStmt(statements);
+ } else {
+ Expression lte = new BinaryExpr(
+ sw.getSelector(), label,
+ Operator.lessEquals);
+ stm = new IfStmt(lte,
+ new BlockStmt(statements), null);
+ }
+ list.add(i + j, stm);
+ j++;
+ }
+ } else {
+ s.accept(this, arg);
+ }
+ } else {
+ s.accept(this, arg);
+ }
+ }
+ }
+ }
+
+ /**
+ * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.MethodDeclaration, java.lang.Object)
+ */
+ @Override public void visit(MethodDeclaration md, Object arg) {
+ method = md.getName();
+ super.visit(md, arg);
+ }
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java
new file mode 100644
index 000000000..36feced04
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java
@@ -0,0 +1,1586 @@
+/*
+ * Copyright (C) 2007 JĂșlio Vilmar Gesser.
+ * Copyright (C) 2012 Mozilla Foundation
+ *
+ * This file is part of Java 1.5 parser and Abstract Syntax Tree.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Java 1.5 parser and Abstract Syntax Tree. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Created on 05/10/2006
+ */
+package nu.validator.htmlparser.rusttranslate;
+
+import japa.parser.ast.BlockComment;
+import japa.parser.ast.CompilationUnit;
+import japa.parser.ast.LineComment;
+import japa.parser.ast.TypeParameter;
+import japa.parser.ast.body.BodyDeclaration;
+import japa.parser.ast.body.ClassOrInterfaceDeclaration;
+import japa.parser.ast.body.ConstructorDeclaration;
+import japa.parser.ast.body.EmptyMemberDeclaration;
+import japa.parser.ast.body.EmptyTypeDeclaration;
+import japa.parser.ast.body.EnumConstantDeclaration;
+import japa.parser.ast.body.EnumDeclaration;
+import japa.parser.ast.body.FieldDeclaration;
+import japa.parser.ast.body.InitializerDeclaration;
+import japa.parser.ast.body.JavadocComment;
+import japa.parser.ast.body.MethodDeclaration;
+import japa.parser.ast.body.ModifierSet;
+import japa.parser.ast.body.Parameter;
+import japa.parser.ast.body.TypeDeclaration;
+import japa.parser.ast.body.VariableDeclarator;
+import japa.parser.ast.body.VariableDeclaratorId;
+import japa.parser.ast.expr.AnnotationExpr;
+import japa.parser.ast.expr.ArrayAccessExpr;
+import japa.parser.ast.expr.ArrayCreationExpr;
+import japa.parser.ast.expr.ArrayInitializerExpr;
+import japa.parser.ast.expr.AssignExpr;
+import japa.parser.ast.expr.BinaryExpr;
+import japa.parser.ast.expr.BooleanLiteralExpr;
+import japa.parser.ast.expr.CastExpr;
+import japa.parser.ast.expr.CharLiteralExpr;
+import japa.parser.ast.expr.ClassExpr;
+import japa.parser.ast.expr.ConditionalExpr;
+import japa.parser.ast.expr.DoubleLiteralExpr;
+import japa.parser.ast.expr.EnclosedExpr;
+import japa.parser.ast.expr.Expression;
+import japa.parser.ast.expr.FieldAccessExpr;
+import japa.parser.ast.expr.InstanceOfExpr;
+import japa.parser.ast.expr.IntegerLiteralExpr;
+import japa.parser.ast.expr.IntegerLiteralMinValueExpr;
+import japa.parser.ast.expr.LongLiteralExpr;
+import japa.parser.ast.expr.LongLiteralMinValueExpr;
+import japa.parser.ast.expr.MemberValuePair;
+import japa.parser.ast.expr.MethodCallExpr;
+import japa.parser.ast.expr.NameExpr;
+import japa.parser.ast.expr.NullLiteralExpr;
+import japa.parser.ast.expr.ObjectCreationExpr;
+import japa.parser.ast.expr.QualifiedNameExpr;
+import japa.parser.ast.expr.StringLiteralExpr;
+import japa.parser.ast.expr.SuperExpr;
+import japa.parser.ast.expr.ThisExpr;
+import japa.parser.ast.expr.UnaryExpr;
+import japa.parser.ast.expr.UnaryExpr.Operator;
+import japa.parser.ast.expr.VariableDeclarationExpr;
+import japa.parser.ast.stmt.AssertStmt;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.CatchClause;
+import japa.parser.ast.stmt.ContinueStmt;
+import japa.parser.ast.stmt.DoStmt;
+import japa.parser.ast.stmt.EmptyStmt;
+import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt;
+import japa.parser.ast.stmt.ExpressionStmt;
+import japa.parser.ast.stmt.ForStmt;
+import japa.parser.ast.stmt.ForeachStmt;
+import japa.parser.ast.stmt.IfStmt;
+import japa.parser.ast.stmt.LabeledStmt;
+import japa.parser.ast.stmt.ReturnStmt;
+import japa.parser.ast.stmt.Statement;
+import japa.parser.ast.stmt.SwitchEntryStmt;
+import japa.parser.ast.stmt.SwitchStmt;
+import japa.parser.ast.stmt.SynchronizedStmt;
+import japa.parser.ast.stmt.ThrowStmt;
+import japa.parser.ast.stmt.TryStmt;
+import japa.parser.ast.stmt.TypeDeclarationStmt;
+import japa.parser.ast.stmt.WhileStmt;
+import japa.parser.ast.type.ClassOrInterfaceType;
+import japa.parser.ast.type.PrimitiveType;
+import japa.parser.ast.type.ReferenceType;
+import japa.parser.ast.type.Type;
+import japa.parser.ast.type.VoidType;
+import japa.parser.ast.type.WildcardType;
+import japa.parser.ast.visitor.VoidVisitorAdapter;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import nu.validator.htmlparser.cpptranslate.TranslatorUtils;
+
+/**
+ * @author Julio Vilmar Gesser
+ * @author Henri Sivonen
+ */
+
+public final class RustVisitor extends VoidVisitorAdapter<Object> {
+
+ private static final String[] MODS = {
+ "Tokenizer",
+ "TreeBuilder",
+ "MetaScanner",
+ "AttributeName",
+ "ElementName",
+ "HtmlAttributes",
+ "StackNode",
+ "UTF16Buffer",
+ "StateSnapshot",
+ };
+
+ private boolean inMethodSignature = false;
+
+ private Set<String> fields = new HashSet<String>();
+
+ private Set<String> constants = new HashSet<String>();
+
+ private Expression loopUpdate = null;
+
+ private static class SourcePrinter {
+
+ private int level = 0;
+
+ private boolean indented = false;
+
+ private final StringBuilder buf = new StringBuilder();
+
+ public void indent() {
+ level++;
+ }
+
+ public void unindent() {
+ level--;
+ }
+
+ private void makeIndent() {
+ for (int i = 0; i < level; i++) {
+ buf.append(" ");
+ }
+ }
+
+ public void print(String arg) {
+ if (!indented) {
+ makeIndent();
+ indented = true;
+ }
+ buf.append(arg);
+ }
+
+ public void printLn(String arg) {
+ print(arg);
+ printLn();
+ }
+
+ public void printLn() {
+ buf.append("\n");
+ indented = false;
+ }
+
+ public String getSource() {
+ return buf.toString();
+ }
+
+ @Override
+ public String toString() {
+ return getSource();
+ }
+ }
+
+ private final SourcePrinter printer = new SourcePrinter();
+
+ public String getSource() {
+ return printer.getSource();
+ }
+
+ private void printModifiers(int modifiers) {
+ if (ModifierSet.isPrivate(modifiers)) {
+ printer.print("private ");
+ }
+ if (ModifierSet.isProtected(modifiers)) {
+ printer.print("protected ");
+ }
+ if (ModifierSet.isPublic(modifiers)) {
+ printer.print("public ");
+ }
+ if (ModifierSet.isAbstract(modifiers)) {
+ printer.print("abstract ");
+ }
+ if (ModifierSet.isStatic(modifiers)) {
+ printer.print("static ");
+ }
+ if (ModifierSet.isFinal(modifiers)) {
+ printer.print("final ");
+ }
+ if (ModifierSet.isNative(modifiers)) {
+ printer.print("native ");
+ }
+ if (ModifierSet.isStrictfp(modifiers)) {
+ printer.print("strictfp ");
+ }
+ if (ModifierSet.isSynchronized(modifiers)) {
+ printer.print("synchronized ");
+ }
+ if (ModifierSet.isTransient(modifiers)) {
+ printer.print("transient ");
+ }
+ if (ModifierSet.isVolatile(modifiers)) {
+ printer.print("volatile ");
+ }
+ }
+
+ private void printMethods(List<BodyDeclaration> members, Object arg) {
+ for (BodyDeclaration member : members) {
+ if (member instanceof MethodDeclaration) {
+ MethodDeclaration meth = (MethodDeclaration) member;
+ if (meth.getName().startsWith("fatal") || meth.getName().startsWith("err")
+ || meth.getName().startsWith("warn")
+ || meth.getName().startsWith("maybeErr")
+ || meth.getName().startsWith("maybeWarn")
+ || meth.getName().startsWith("note")
+ || "releaseArray".equals(meth.getName())
+ || "deleteArray".equals(meth.getName())
+ || "delete".equals(meth.getName())) {
+ continue;
+ }
+ printer.printLn();
+ member.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ }
+
+ private void printFields(List<BodyDeclaration> members, Object arg) {
+ for (BodyDeclaration member : members) {
+ if (member instanceof FieldDeclaration) {
+ FieldDeclaration field = (FieldDeclaration) member;
+ int mods = field.getModifiers();
+ if (ModifierSet.isStatic(mods) && ModifierSet.isFinal(mods)) {
+ continue;
+ }
+ fields.add(field.getVariables().get(0).getId().getName());
+ printer.printLn();
+ member.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ }
+
+ private void printConstants(List<BodyDeclaration> members, Object arg) {
+ for (BodyDeclaration member : members) {
+ if (member instanceof FieldDeclaration) {
+ FieldDeclaration field = (FieldDeclaration) member;
+ int mods = field.getModifiers();
+ if (!(ModifierSet.isStatic(mods) && ModifierSet.isFinal(mods))) {
+ continue;
+ }
+ constants.add(field.getVariables().get(0).getId().getName());
+ printer.printLn();
+ member.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ }
+
+ private void printMemberAnnotations(List<AnnotationExpr> annotations, Object arg) {
+ if (annotations != null) {
+ for (AnnotationExpr a : annotations) {
+ a.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ }
+
+ private void printArguments(List<Expression> args, Object arg) {
+ printer.print("(");
+ if (args != null) {
+ for (Iterator<Expression> i = args.iterator(); i.hasNext();) {
+ Expression e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+ }
+
+ private void printJavadoc(JavadocComment javadoc, Object arg) {
+ if (javadoc != null) {
+ javadoc.accept(this, arg);
+ }
+ }
+
+ public void visit(CompilationUnit n, Object arg) {
+ if (n.getTypes() != null) {
+ for (Iterator<TypeDeclaration> i = n.getTypes().iterator(); i.hasNext();) {
+ i.next().accept(this, arg);
+ printer.printLn();
+ if (i.hasNext()) {
+ printer.printLn();
+ }
+ }
+ }
+ }
+
+ public void visit(NameExpr n, Object arg) {
+ if (fields.contains(n.getName())) {
+ printer.print("self.");
+ }
+ printer.print(n.getName());
+ }
+
+ public void visit(QualifiedNameExpr n, Object arg) {
+ n.getQualifier().accept(this, arg);
+ printer.print(".");
+ printer.print(n.getName());
+ }
+
+ public void visit(ClassOrInterfaceDeclaration n, Object arg) {
+ for (int i = 0; i < MODS.length; i++) {
+ String mod = MODS[i];
+ if (!mod.equals(n.getName())) {
+ printer.print("mod ");
+ printer.print(mod);
+ printer.printLn(";");
+ }
+ }
+
+ printJavadoc(n.getJavaDoc(), arg);
+
+
+ if (n.getMembers() != null) {
+ printConstants(n.getMembers(), arg);
+ }
+ printer.printLn();
+ printer.printLn();
+
+ printer.print("struct ");
+
+ printer.print(n.getName());
+
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getMembers() != null) {
+ printFields(n.getMembers(), arg);
+ }
+ printer.unindent();
+ printer.print("}");
+
+ printer.printLn();
+ printer.printLn();
+
+ printer.print("impl ");
+
+ printer.print(n.getName());
+
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getMembers() != null) {
+ printMethods(n.getMembers(), arg);
+ }
+ printer.unindent();
+ printer.print("}");
+
+ }
+
+ public void visit(EmptyTypeDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printer.print(";");
+ }
+
+ public void visit(JavadocComment n, Object arg) {
+ printer.print("/**");
+ printer.print(n.getContent());
+ printer.printLn("*/");
+ }
+
+ public void visit(ClassOrInterfaceType n, Object arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print(n.getName());
+ }
+
+ public void visit(TypeParameter n, Object arg) {
+ printer.print(n.getName());
+ if (n.getTypeBound() != null) {
+ printer.print(" extends ");
+ for (Iterator<ClassOrInterfaceType> i = n.getTypeBound().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(" & ");
+ }
+ }
+ }
+ }
+
+ public void visit(PrimitiveType n, Object arg) {
+ switch (n.getType()) {
+ case Boolean:
+ printer.print("bool");
+ break;
+ case Byte:
+ printer.print("i8");
+ break;
+ case Char:
+ printer.print("u16");
+ break;
+ case Double:
+ printer.print("f64");
+ break;
+ case Float:
+ printer.print("f32");
+ break;
+ case Int:
+ printer.print("i32");
+ break;
+ case Long:
+ printer.print("i64");
+ break;
+ case Short:
+ printer.print("i16");
+ break;
+ }
+ }
+
+ public void visit(ReferenceType n, Object arg) {
+// if (inMethodSignature) {
+// printer.print("&");
+// } else {
+// printer.print("~");
+// }
+ printer.print("@");
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[");
+ }
+ n.getType().accept(this, arg);
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("]");
+ }
+ }
+
+ public void visit(WildcardType n, Object arg) {
+ printer.print("?");
+ if (n.getExtends() != null) {
+ printer.print(" extends ");
+ n.getExtends().accept(this, arg);
+ }
+ if (n.getSuper() != null) {
+ printer.print(" super ");
+ n.getSuper().accept(this, arg);
+ }
+ }
+
+ public void visit(FieldDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+// printMemberAnnotations(n.getAnnotations(), arg);
+
+ boolean field = true;
+ int mods = n.getModifiers();
+ if (ModifierSet.isStatic(mods) && ModifierSet.isFinal(mods)) {
+ if (!ModifierSet.isPrivate(mods)) {
+ printer.print("pub ");
+ }
+ printer.print("const ");
+ field = false;
+ } else if (!ModifierSet.isFinal(mods)) {
+ printer.print("mut ");
+ }
+
+ List<VariableDeclarator> vars = n.getVariables();
+
+ printVariableDeclarator(n.getType(), vars, arg, field);
+
+ printer.print(field ? "," : ";");
+ }
+
+ private void printVariableDeclarator(Type type, List<VariableDeclarator> vars,
+ Object arg, boolean field) {
+ if (vars.size() != 1) {
+ throw new RuntimeException();
+ }
+
+ VariableDeclarator decl = vars.get(0);
+
+ VariableDeclaratorId id = decl.getId();
+
+ printer.print(id.getName());
+
+ printer.print(": ");
+
+ for (int i = 0; i < id.getArrayCount(); i++) {
+ printer.print("[");
+ }
+
+ type.accept(this, arg);
+
+ for (int i = 0; i < id.getArrayCount(); i++) {
+ printer.print("]");
+ }
+
+ Expression init = decl.getInit();
+
+ if (init != null && !field) {
+ printer.print(" = ");
+ init.accept(this, arg);
+ }
+ }
+
+ public void visit(ArrayInitializerExpr n, Object arg) {
+ printer.print("[");
+ if (n.getValues() != null) {
+ printer.print(" ");
+ for (Iterator<Expression> i = n.getValues().iterator(); i.hasNext();) {
+ Expression expr = i.next();
+ expr.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ printer.print(" ");
+ }
+ printer.print("]");
+ }
+
+ public void visit(VoidType n, Object arg) {
+ printer.print("void");
+ }
+
+ public void visit(ArrayAccessExpr n, Object arg) {
+ n.getName().accept(this, arg);
+ printer.print("[");
+ n.getIndex().accept(this, arg);
+ printer.print("]");
+ }
+
+ public void visit(ArrayCreationExpr n, Object arg) {
+ printer.print("new ");
+ n.getType().accept(this, arg);
+
+ if (n.getDimensions() != null) {
+ for (Expression dim : n.getDimensions()) {
+ printer.print("[");
+ dim.accept(this, arg);
+ printer.print("]");
+ }
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+ } else {
+ for (int i = 0; i < n.getArrayCount(); i++) {
+ printer.print("[]");
+ }
+ printer.print(" ");
+ n.getInitializer().accept(this, arg);
+ }
+ }
+
+ public void visit(AssignExpr n, Object arg) {
+ n.getTarget().accept(this, arg);
+ printer.print(" ");
+ switch (n.getOperator()) {
+ case assign:
+ printer.print("=");
+ break;
+ case and:
+ printer.print("&=");
+ break;
+ case or:
+ printer.print("|=");
+ break;
+ case xor:
+ printer.print("^=");
+ break;
+ case plus:
+ printer.print("+=");
+ break;
+ case minus:
+ printer.print("-=");
+ break;
+ case rem:
+ printer.print("%=");
+ break;
+ case slash:
+ printer.print("/=");
+ break;
+ case star:
+ printer.print("*=");
+ break;
+ case lShift:
+ printer.print("<<=");
+ break;
+ case rSignedShift:
+ printer.print(">>=");
+ break;
+ case rUnsignedShift:
+ printer.print(">>>=");
+ break;
+ }
+ printer.print(" ");
+ n.getValue().accept(this, arg);
+ }
+
+ public void visit(BinaryExpr n, Object arg) {
+ n.getLeft().accept(this, arg);
+ printer.print(" ");
+ switch (n.getOperator()) {
+ case or:
+ printer.print("||");
+ break;
+ case and:
+ printer.print("&&");
+ break;
+ case binOr:
+ printer.print("|");
+ break;
+ case binAnd:
+ printer.print("&");
+ break;
+ case xor:
+ printer.print("^");
+ break;
+ case equals:
+ printer.print("==");
+ break;
+ case notEquals:
+ printer.print("!=");
+ break;
+ case less:
+ printer.print("<");
+ break;
+ case greater:
+ printer.print(">");
+ break;
+ case lessEquals:
+ printer.print("<=");
+ break;
+ case greaterEquals:
+ printer.print(">=");
+ break;
+ case lShift:
+ printer.print("<<");
+ break;
+ case rSignedShift:
+ printer.print(">>");
+ break;
+ case rUnsignedShift:
+ printer.print(">>>");
+ break;
+ case plus:
+ printer.print("+");
+ break;
+ case minus:
+ printer.print("-");
+ break;
+ case times:
+ printer.print("*");
+ break;
+ case divide:
+ printer.print("/");
+ break;
+ case remainder:
+ printer.print("%");
+ break;
+ }
+ printer.print(" ");
+ n.getRight().accept(this, arg);
+ }
+
+ public void visit(CastExpr n, Object arg) {
+ printer.print("(");
+ n.getType().accept(this, arg);
+ printer.print(") ");
+ n.getExpr().accept(this, arg);
+ }
+
+ public void visit(ClassExpr n, Object arg) {
+ n.getType().accept(this, arg);
+ printer.print(".class");
+ }
+
+ public void visit(ConditionalExpr n, Object arg) {
+ n.getCondition().accept(this, arg);
+ printer.print(" ? ");
+ n.getThenExpr().accept(this, arg);
+ printer.print(" : ");
+ n.getElseExpr().accept(this, arg);
+ }
+
+ public void visit(EnclosedExpr n, Object arg) {
+ printer.print("(");
+ n.getInner().accept(this, arg);
+ printer.print(")");
+ }
+
+ public void visit(FieldAccessExpr n, Object arg) {
+ String scope = n.getScope().toString();
+ printer.print(scope);
+ boolean mod = false;
+ for (int i = 0; i < MODS.length; i++) {
+ if (MODS[i].equals(scope)) {
+ mod = true;
+ break;
+ }
+ }
+ printer.print(mod ? "::" : ".");
+ if ("length".equals(n.getField())) {
+ printer.print("len() as i32");
+ } else {
+ printer.print(n.getField());
+ }
+ }
+
+ public void visit(InstanceOfExpr n, Object arg) {
+ n.getExpr().accept(this, arg);
+ printer.print(" instanceof ");
+ n.getType().accept(this, arg);
+ }
+
+ public void visit(CharLiteralExpr n, Object arg) {
+// printer.print("'");
+// char c = n.getValue().charAt(0);
+// switch (c) {
+// case '\b':
+// printer.print("\\b");
+// break;
+// case '\t':
+// printer.print("\\t");
+// break;
+// case '\n':
+// printer.print("\\n");
+// break;
+// case '\f':
+// printer.print("\\f");
+// break;
+// case '\r':
+// printer.print("\\r");
+// break;
+// case '\'':
+// printer.print("\\'");
+// break;
+// case '\\':
+// printer.print(n.getValue());
+// break;
+// default:
+// if (c < ' ' || c > '~') {
+// String hex = Integer.toHexString(c);
+// switch (hex.length()) {
+// case 1:
+// printer.print("\\u000"+hex);
+// break;
+// case 2:
+// printer.print("\\u00"+hex);
+// break;
+// case 3:
+// printer.print("\\u0"+hex);
+// break;
+// case 4:
+// printer.print("\\u"+hex);
+// break;
+// }
+// } else {
+// printer.print(""+c);
+// }
+// break;
+// }
+// printer.print("'");
+ String str = n.getValue();
+ if (str.length() == 1) {
+ String hex = Integer.toHexString(str.charAt(0));
+ switch (hex.length()) {
+ case 1:
+ printer.print("0x0"+hex);
+ break;
+ case 2:
+ printer.print("0x"+hex);
+ break;
+ case 3:
+ printer.print("0x0"+hex);
+ break;
+ case 4:
+ printer.print("0x"+hex);
+ break;
+ }
+ } else if ("\\n".equals(str)) {
+ printer.print("0x0A");
+ } else if ("\\r".equals(str)) {
+ printer.print("0x0D");
+ } else if ("\\t".equals(str)) {
+ printer.print("0x09");
+ } else if ("\\\"".equals(str)) {
+ printer.print("0x22");
+ } else if ("\\'".equals(str)) {
+ printer.print("0x27");
+ } else {
+ throw new RuntimeException(str);
+ }
+ }
+
+ public void visit(DoubleLiteralExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(IntegerLiteralExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(LongLiteralExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(IntegerLiteralMinValueExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(LongLiteralMinValueExpr n, Object arg) {
+ printer.print(n.getValue());
+ }
+
+ public void visit(StringLiteralExpr n, Object arg) {
+ printer.print("\"");
+ printer.print(n.getValue());
+ printer.print("\"");
+ }
+
+ public void visit(BooleanLiteralExpr n, Object arg) {
+ printer.print(String.valueOf(n.getValue()));
+ }
+
+ public void visit(NullLiteralExpr n, Object arg) {
+ printer.print("null");
+ }
+
+ public void visit(ThisExpr n, Object arg) {
+ if (n.getClassExpr() != null) {
+ n.getClassExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print("self");
+ }
+
+ public void visit(SuperExpr n, Object arg) {
+ if (n.getClassExpr() != null) {
+ n.getClassExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print("super");
+ }
+
+ public void visit(MethodCallExpr n, Object arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print(n.getName());
+ printArguments(n.getArgs(), arg);
+ }
+
+ public void visit(ObjectCreationExpr n, Object arg) {
+ if (n.getScope() != null) {
+ n.getScope().accept(this, arg);
+ printer.print(".");
+ }
+
+ printer.print("new ");
+
+ n.getType().accept(this, arg);
+
+ printArguments(n.getArgs(), arg);
+
+ if (n.getAnonymousClassBody() != null) {
+ printer.printLn(" {");
+ printer.indent();
+ printMethods(n.getAnonymousClassBody(), arg);
+ printer.unindent();
+ printer.print("}");
+ }
+ }
+
+ public void visit(UnaryExpr n, Object arg) {
+ Operator op = n.getOperator();
+ if (op == null) {
+ n.getExpr().accept(this, arg);
+ return;
+ }
+ switch (op) {
+ case positive:
+ printer.print("+");
+ n.getExpr().accept(this, arg);
+ break;
+ case negative:
+ printer.print("-");
+ n.getExpr().accept(this, arg);
+ break;
+ case inverse:
+ printer.print("i32::compl(");
+ n.getExpr().accept(this, arg);
+ printer.print(")");
+ break;
+ case not:
+ printer.print("!");
+ n.getExpr().accept(this, arg);
+ break;
+ case preIncrement:
+ case posIncrement:
+ n.getExpr().accept(this, arg);
+ printer.print(" = ");
+ n.getExpr().accept(this, arg);
+ printer.print(" + 1");
+ break;
+ case preDecrement:
+ case posDecrement:
+ n.getExpr().accept(this, arg);
+ printer.print(" = ");
+ n.getExpr().accept(this, arg);
+ printer.print(" - 1");
+ break;
+ }
+ }
+
+ public void visit(ConstructorDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ if (n.getTypeParameters() != null) {
+ printer.print(" ");
+ }
+ printer.print(n.getName());
+
+ printer.print("(");
+ if (n.getParameters() != null) {
+ for (Iterator<Parameter> i = n.getParameters().iterator(); i.hasNext();) {
+ Parameter p = i.next();
+ p.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(")");
+
+ if (n.getThrows() != null) {
+ printer.print(" throws ");
+ for (Iterator<NameExpr> i = n.getThrows().iterator(); i.hasNext();) {
+ NameExpr name = i.next();
+ name.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ printer.print(" ");
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(MethodDeclaration n, Object arg) {
+
+ printJavadoc(n.getJavaDoc(), arg);
+// printMemberAnnotations(n.getAnnotations(), arg);
+// printModifiers(n.getModifiers());
+
+// printTypeParameters(n.getTypeParameters(), arg);
+// if (n.getTypeParameters() != null) {
+// printer.print(" ");
+// }
+
+ printer.print("fn ");
+ printer.print(n.getName());
+
+ printer.print("(");
+ inMethodSignature = true;
+ if (n.getParameters() != null) {
+ for (Iterator<Parameter> i = n.getParameters().iterator(); i.hasNext();) {
+ Parameter p = i.next();
+ p.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ inMethodSignature = false;
+ printer.print(")");
+
+ Type type = n.getType();
+
+ if (!(type instanceof VoidType)) {
+ printer.print(" -> ");
+ type.accept(this, arg);
+ }
+
+// for (int i = 0; i < n.getArrayCount(); i++) {
+// printer.print("[]");
+// }
+
+// if (n.getThrows() != null) {
+// printer.print(" throws ");
+// for (Iterator<NameExpr> i = n.getThrows().iterator(); i.hasNext();) {
+// NameExpr name = i.next();
+// name.accept(this, arg);
+// if (i.hasNext()) {
+// printer.print(", ");
+// }
+// }
+// }
+ if (n.getBody() == null) {
+ printer.print(";");
+ } else {
+ printer.print(" ");
+ n.getBody().accept(this, arg);
+ }
+ }
+
+ public void visit(Parameter n, Object arg) {
+// printAnnotations(n.getAnnotations(), arg);
+// printModifiers(n.getModifiers());
+
+ VariableDeclaratorId id = n.getId();
+
+ printer.print(id.getName());
+// if (n.isVarArgs()) {
+// printer.print("...");
+// }
+ printer.print(": ");
+ n.getType().accept(this, arg);
+ }
+
+ public void visit(ExplicitConstructorInvocationStmt n, Object arg) {
+ if (n.isThis()) {
+ printer.print("this");
+ } else {
+ if (n.getExpr() != null) {
+ n.getExpr().accept(this, arg);
+ printer.print(".");
+ }
+ printer.print("super");
+ }
+ printArguments(n.getArgs(), arg);
+ printer.print(";");
+ }
+
+ public void visit(VariableDeclarationExpr n, Object arg) {
+// printAnnotations(n.getAnnotations(), arg);
+
+ printer.print("let ");
+
+ if (!ModifierSet.isFinal(n.getModifiers())) {
+ printer.print("mut ");
+ }
+
+// printModifiers(n.getModifiers());
+
+ List<VariableDeclarator> vars = n.getVars();
+
+ printVariableDeclarator(n.getType(), vars, arg, false);
+ }
+
+ public void visit(TypeDeclarationStmt n, Object arg) {
+ n.getTypeDeclaration().accept(this, arg);
+ }
+
+ public void visit(AssertStmt n, Object arg) {
+ Expression check = n.getCheck();
+ if (check instanceof BooleanLiteralExpr) {
+ BooleanLiteralExpr bool = (BooleanLiteralExpr) check;
+ if (!bool.getValue()) {
+ printer.print("fail;");
+ return;
+ }
+ }
+ printer.print("assert ");
+ check.accept(this, arg);
+ printer.print(";");
+ }
+
+ public void visit(BlockStmt n, Object arg) {
+ printer.printLn("{");
+ if (n.getStmts() != null) {
+ printer.indent();
+ for (Statement s : n.getStmts()) {
+ s.accept(this, arg);
+ printer.printLn();
+ }
+ printer.unindent();
+ }
+ printer.print("}");
+ }
+
+ public void visit(LabeledStmt n, Object arg) {
+ assert arg == null;
+ n.getStmt().accept(this, n.getLabel());
+ }
+
+ public void visit(EmptyStmt n, Object arg) {
+ printer.print(";");
+ }
+
+ public void visit(ExpressionStmt n, Object arg) {
+ Expression plusplus = null;
+ Expression ex = n.getExpression();
+
+ if (ex instanceof MethodCallExpr) {
+ MethodCallExpr meth = (MethodCallExpr) ex;
+ if (meth.getName().startsWith("fatal") || meth.getName().startsWith("err")
+ || meth.getName().startsWith("warn")
+ || meth.getName().startsWith("maybeErr")
+ || meth.getName().startsWith("maybeWarn")
+ || meth.getName().startsWith("note")
+ || "releaseArray".equals(meth.getName())
+ || "deleteArray".equals(meth.getName())
+ || "delete".equals(meth.getName())) {
+ return;
+ }
+ }
+
+ if (ex instanceof AssignExpr) {
+ AssignExpr ax = (AssignExpr) ex;
+ Expression left = ax.getTarget();
+ if (left instanceof ArrayAccessExpr) {
+ ArrayAccessExpr aae = (ArrayAccessExpr) left;
+ Expression index = aae.getIndex();
+ if (index instanceof UnaryExpr) {
+ UnaryExpr unex = (UnaryExpr) index;
+ if (unex.getOperator() == Operator.posIncrement) {
+ plusplus = unex.getExpr();
+ unex.setOperator(null);
+ }
+ }
+ }
+ }
+ n.getExpression().accept(this, arg);
+ printer.print(";");
+ if (plusplus != null) {
+ printer.printLn();
+ plusplus.accept(this, arg);
+ printer.print(" = ");
+ plusplus.accept(this, arg);
+ printer.print(" + 1;");
+ }
+ }
+
+ public void visit(SwitchStmt n, Object arg) {
+ printer.print("match ");
+ n.getSelector().accept(this, arg);
+ printer.printLn(" {");
+ if (n.getEntries() != null) {
+ printer.indent();
+ List<Expression> labels = new LinkedList<Expression>();
+ for (SwitchEntryStmt e : n.getEntries()) {
+ labels.add(e.getLabel());
+ List<Statement> stmts = e.getStmts();
+ if (stmts != null) {
+ if (stmts.get(stmts.size() - 1) instanceof BreakStmt) {
+ BreakStmt brk = (BreakStmt)stmts.get(stmts.size() - 1);
+ if (brk.getId() == null) {
+ stmts.remove(stmts.size() - 1);
+ }
+ }
+ if (!stmts.isEmpty()) {
+ boolean first = true;
+ for (Expression label : labels) {
+ if (!first) {
+ printer.print(" | ");
+ }
+ first = false;
+ if (label == null) {
+ printer.print("_");
+ } else {
+ label.accept(this, arg);
+ }
+ }
+ printer.printLn(" => {");
+ printer.indent();
+ for (Statement statement : stmts) {
+ statement.accept(this, arg);
+ printer.printLn();
+ }
+ printer.unindent();
+ printer.printLn("}");
+ }
+ labels.clear();
+ }
+ }
+ printer.unindent();
+ }
+ printer.print("}");
+
+ }
+
+ public void visit(SwitchEntryStmt n, Object arg) {
+ throw new RuntimeException("Not supposed to come here.");
+ }
+
+ public void visit(BreakStmt n, Object arg) {
+ printer.print("break");
+ if (n.getId() != null && !"charsetloop".equals(n.getId()) && !"charactersloop".equals(n.getId())) {
+ printer.print(" ");
+ printer.print(n.getId());
+ }
+ printer.print(";");
+ }
+
+ public void visit(ReturnStmt n, Object arg) {
+ printer.print("return");
+ if (n.getExpr() != null) {
+ printer.print(" ");
+ n.getExpr().accept(this, arg);
+ }
+ printer.print(";");
+ }
+
+ public void visit(EnumDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printModifiers(n.getModifiers());
+
+ printer.print("enum ");
+ printer.print(n.getName());
+
+ if (n.getImplements() != null) {
+ printer.print(" implements ");
+ for (Iterator<ClassOrInterfaceType> i = n.getImplements().iterator(); i.hasNext();) {
+ ClassOrInterfaceType c = i.next();
+ c.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+
+ printer.printLn(" {");
+ printer.indent();
+ if (n.getEntries() != null) {
+ printer.printLn();
+ for (Iterator<EnumConstantDeclaration> i = n.getEntries().iterator(); i.hasNext();) {
+ EnumConstantDeclaration e = i.next();
+ e.accept(this, arg);
+ if (i.hasNext()) {
+ printer.print(", ");
+ }
+ }
+ }
+ if (n.getMembers() != null) {
+ printer.printLn(";");
+ printMethods(n.getMembers(), arg);
+ } else {
+ if (n.getEntries() != null) {
+ printer.printLn();
+ }
+ }
+ printer.unindent();
+ printer.print("}");
+ throw new RuntimeException("Unsupported syntax.");
+ }
+
+ public void visit(EnumConstantDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printMemberAnnotations(n.getAnnotations(), arg);
+ printer.print(n.getName());
+
+ if (n.getArgs() != null) {
+ printArguments(n.getArgs(), arg);
+ }
+
+ if (n.getClassBody() != null) {
+ printer.printLn(" {");
+ printer.indent();
+ printMethods(n.getClassBody(), arg);
+ printer.unindent();
+ printer.printLn("}");
+ }
+ throw new RuntimeException("Unsupported syntax.");
+ }
+
+ public void visit(EmptyMemberDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ printer.print(";");
+ }
+
+ public void visit(InitializerDeclaration n, Object arg) {
+ printJavadoc(n.getJavaDoc(), arg);
+ if (n.isStatic()) {
+ printer.print("static ");
+ }
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(IfStmt n, Object arg) {
+ Expression cond = n.getCondition();
+ if (cond instanceof BinaryExpr) {
+ BinaryExpr binex = (BinaryExpr) cond;
+ Expression left = binex.getLeft();
+ if (left instanceof UnaryExpr) {
+ UnaryExpr unex = (UnaryExpr) left;
+ if (unex.getOperator() == Operator.preIncrement) {
+ unex.getExpr().accept(this, arg);
+ printer.print(" = ");
+ unex.getExpr().accept(this, arg);
+ printer.printLn(" + 1;");
+ unex.setOperator(null);
+ }
+ }
+ }
+
+ if (!TranslatorUtils.isErrorHandlerIf(n.getCondition(), false)) {
+ if (TranslatorUtils.isErrorOnlyBlock(n.getThenStmt(), false)) {
+ if (n.getElseStmt() != null
+ && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), false)) {
+ printer.print("if ");
+ if (n.getCondition() instanceof BinaryExpr) {
+ BinaryExpr binExpr = (BinaryExpr) n.getCondition();
+ switch (binExpr.getOperator()) {
+ case equals:
+ binExpr.getLeft().accept(this, arg);
+ printer.print(" != ");
+ binExpr.getRight().accept(this, arg);
+ break;
+ case notEquals:
+ binExpr.getLeft().accept(this, arg);
+ printer.print(" == ");
+ binExpr.getRight().accept(this, arg);
+ break;
+ default:
+ printer.print("!(");
+ n.getCondition().accept(this, arg);
+ printer.print(")");
+ break;
+ }
+ } else {
+ printer.print("!(");
+ n.getCondition().accept(this, arg);
+ printer.print(")");
+ }
+ printer.print(" ");
+ n.getElseStmt().accept(this, arg);
+ }
+ } else {
+ printer.print("if ");
+ n.getCondition().accept(this, arg);
+ printer.print(" ");
+ n.getThenStmt().accept(this, arg);
+ if (n.getElseStmt() != null
+ && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), false)) {
+ printer.print(" else ");
+ n.getElseStmt().accept(this, arg);
+ }
+ }
+ }
+
+ }
+
+ public void visit(WhileStmt n, Object arg) {
+ printer.print("while ");
+ n.getCondition().accept(this, arg);
+ printer.print(" ");
+ n.getBody().accept(this, arg);
+ }
+
+ public void visit(ContinueStmt n, Object arg) {
+ if (loopUpdate != null) {
+ loopUpdate.accept(this, arg);
+ printer.printLn(";");
+ }
+ printer.print("loop");
+ if (n.getId() != null) {
+ printer.print(" ");
+ printer.print(n.getId());
+ }
+ printer.print(";");
+ }
+
+ public void visit(DoStmt n, Object arg) {
+ printer.print("do ");
+ n.getBody().accept(this, arg);
+ printer.print(" while (");
+ n.getCondition().accept(this, arg);
+ printer.print(");");
+ throw new RuntimeException("Unsupported syntax.");
+ }
+
+ public void visit(ForeachStmt n, Object arg) {
+ printer.print("for (");
+ n.getVariable().accept(this, arg);
+ printer.print(" : ");
+ n.getIterable().accept(this, arg);
+ printer.print(") ");
+ n.getBody().accept(this, arg);
+ throw new RuntimeException("Unsupported syntax.");
+ }
+
+ public void visit(ForStmt n, Object arg) {
+ String label = null;
+ if (arg instanceof String) {
+ label = (String) arg;
+ arg = null;
+ }
+ if (n.getInit() == null && n.getCompare() == null && n.getUpdate() == null) {
+ printer.print("loop ");
+ if (label != null) {
+ printer.print(label);
+ printer.print(": ");
+ }
+ n.getBody().accept(this, arg);
+ return;
+ }
+
+ assert label == null || "charsetloop".equals(label) || "charactersloop".equals(label);
+
+ Expression oldLoopUpdate = loopUpdate;
+ loopUpdate = n.getUpdate().get(0);
+
+ if (n.getInit() != null) {
+ n.getInit().get(0).accept(this, arg);
+ printer.printLn(";");
+ }
+
+ if (n.getCompare() == null) {
+ printer.print("loop ");
+ } else {
+ printer.print("while ");
+ n.getCompare().accept(this, arg);
+ printer.print(" ");
+ }
+
+ Statement body = n.getBody();
+ if (body instanceof BlockStmt) {
+ BlockStmt blockStmt = (BlockStmt) body;
+ printer.printLn("{");
+ printer.indent();
+ if (blockStmt.getStmts() != null) {
+ for (Statement s : blockStmt.getStmts()) {
+ s.accept(this, arg);
+ printer.printLn();
+ }
+ }
+ if (loopUpdate != null) {
+ loopUpdate.accept(this, arg);
+ printer.printLn(";");
+ }
+ printer.unindent();
+ printer.print("}");
+ } else {
+ throw new RuntimeException();
+ }
+
+ loopUpdate = oldLoopUpdate;
+ }
+
+ public void visit(ThrowStmt n, Object arg) {
+ printer.print("throw ");
+ n.getExpr().accept(this, arg);
+ printer.print(";");
+ }
+
+ public void visit(SynchronizedStmt n, Object arg) {
+ printer.print("synchronized (");
+ n.getExpr().accept(this, arg);
+ printer.print(") ");
+ n.getBlock().accept(this, arg);
+ }
+
+ public void visit(TryStmt n, Object arg) {
+ printer.print("try ");
+ n.getTryBlock().accept(this, arg);
+ if (n.getCatchs() != null) {
+ for (CatchClause c : n.getCatchs()) {
+ c.accept(this, arg);
+ }
+ }
+ if (n.getFinallyBlock() != null) {
+ printer.print(" finally ");
+ n.getFinallyBlock().accept(this, arg);
+ }
+ }
+
+ public void visit(CatchClause n, Object arg) {
+ printer.print(" catch (");
+ n.getExcept().accept(this, arg);
+ printer.print(") ");
+ n.getCatchBlock().accept(this, arg);
+
+ }
+
+// public void visit(AnnotationDeclaration n, Object arg) {
+// printJavadoc(n.getJavaDoc(), arg);
+// printMemberAnnotations(n.getAnnotations(), arg);
+// printModifiers(n.getModifiers());
+//
+// printer.print("@interface ");
+// printer.print(n.getName());
+// printer.printLn(" {");
+// printer.indent();
+// if (n.getMembers() != null) {
+// printMembers(n.getMembers(), arg);
+// }
+// printer.unindent();
+// printer.print("}");
+// }
+//
+// public void visit(AnnotationMemberDeclaration n, Object arg) {
+// printJavadoc(n.getJavaDoc(), arg);
+// printMemberAnnotations(n.getAnnotations(), arg);
+// printModifiers(n.getModifiers());
+//
+// n.getType().accept(this, arg);
+// printer.print(" ");
+// printer.print(n.getName());
+// printer.print("()");
+// if (n.getDefaultValue() != null) {
+// printer.print(" default ");
+// n.getDefaultValue().accept(this, arg);
+// }
+// printer.print(";");
+// }
+//
+// public void visit(MarkerAnnotationExpr n, Object arg) {
+// printer.print("@");
+// n.getName().accept(this, arg);
+// }
+//
+// public void visit(SingleMemberAnnotationExpr n, Object arg) {
+// printer.print("@");
+// n.getName().accept(this, arg);
+// printer.print("(");
+// n.getMemberValue().accept(this, arg);
+// printer.print(")");
+// }
+//
+// public void visit(NormalAnnotationExpr n, Object arg) {
+// printer.print("@");
+// n.getName().accept(this, arg);
+// printer.print("(");
+// if (n.getPairs() != null) {
+// for (Iterator<MemberValuePair> i = n.getPairs().iterator(); i.hasNext();) {
+// MemberValuePair m = i.next();
+// m.accept(this, arg);
+// if (i.hasNext()) {
+// printer.print(", ");
+// }
+// }
+// }
+// printer.print(")");
+// }
+
+ public void visit(MemberValuePair n, Object arg) {
+ printer.print(n.getName());
+ printer.print(" = ");
+ n.getValue().accept(this, arg);
+ }
+
+ public void visit(LineComment n, Object arg) {
+ printer.print("//");
+ printer.printLn(n.getContent());
+ }
+
+ public void visit(BlockComment n, Object arg) {
+ printer.print("/*");
+ printer.print(n.getContent());
+ printer.printLn("*/");
+ }
+
+} \ No newline at end of file
diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java
new file mode 100644
index 000000000..766b349ce
--- /dev/null
+++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2008 JĂșlio Vilmar Gesser.
+ * Copyright (C) 2012 Mozilla Foundation
+ *
+ * This file is part of Java 1.5 parser and Abstract Syntax Tree.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Java 1.5 parser and Abstract Syntax Tree. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Created on 09/06/2008
+ */
+package nu.validator.htmlparser.rusttranslate;
+
+import japa.parser.ast.stmt.AssertStmt;
+import japa.parser.ast.stmt.BlockStmt;
+import japa.parser.ast.stmt.BreakStmt;
+import japa.parser.ast.stmt.CatchClause;
+import japa.parser.ast.stmt.ContinueStmt;
+import japa.parser.ast.stmt.DoStmt;
+import japa.parser.ast.stmt.EmptyStmt;
+import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt;
+import japa.parser.ast.stmt.ExpressionStmt;
+import japa.parser.ast.stmt.ForStmt;
+import japa.parser.ast.stmt.ForeachStmt;
+import japa.parser.ast.stmt.IfStmt;
+import japa.parser.ast.stmt.LabeledStmt;
+import japa.parser.ast.stmt.ReturnStmt;
+import japa.parser.ast.stmt.Statement;
+import japa.parser.ast.stmt.SwitchEntryStmt;
+import japa.parser.ast.stmt.SwitchStmt;
+import japa.parser.ast.stmt.SynchronizedStmt;
+import japa.parser.ast.stmt.ThrowStmt;
+import japa.parser.ast.stmt.TryStmt;
+import japa.parser.ast.stmt.TypeDeclarationStmt;
+import japa.parser.ast.stmt.WhileStmt;
+import japa.parser.ast.type.WildcardType;
+import japa.parser.ast.visitor.GenericVisitorAdapter;
+
+import java.util.List;
+
+/**
+ * @author Julio Vilmar Gesser
+ * @author Henri Sivonen
+ */
+public class SwitchBreakAnalyzerVisitor extends GenericVisitorAdapter<Boolean, Boolean> {
+
+ private static final LoopBreakAnalyzerVisitor ANALYZER_VISITOR = new LoopBreakAnalyzerVisitor();
+
+ public Boolean visit(AssertStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(BlockStmt n, Boolean arg) {
+ // Bogus in the loop case
+ if (n.getStmts() != null) {
+ List<Statement> stms = n.getStmts();
+ return stms.get(stms.size() - 1).accept(this, arg);
+ }
+ return false;
+ }
+
+ public Boolean visit(BreakStmt n, Boolean arg) {
+ // Bogus in the general case
+ if (arg) {
+ return true;
+ }
+ return n.getId() != null;
+ }
+
+ public Boolean visit(CatchClause n, Boolean arg) {
+ return n.getCatchBlock().accept(this, arg);
+ }
+
+ public Boolean visit(ContinueStmt n, Boolean arg) {
+ // Bogus in the general case
+ if (arg) {
+ return true;
+ }
+ return n.getId() != null;
+ }
+
+ public Boolean visit(DoStmt n, Boolean arg) {
+ return n.getBody().accept(this, arg);
+ }
+
+ public Boolean visit(EmptyStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(ExplicitConstructorInvocationStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(ExpressionStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(ForeachStmt n, Boolean arg) {
+ return n.getBody().accept(this, arg);
+ }
+
+ public Boolean visit(ForStmt n, Boolean arg) {
+ return n.getBody().accept(ANALYZER_VISITOR, arg);
+ }
+
+ public Boolean visit(IfStmt n, Boolean arg) {
+ if (n.getElseStmt() != null) {
+ return n.getThenStmt().accept(this, arg) && n.getElseStmt().accept(this, arg);
+ }
+ return false;
+ }
+
+ public Boolean visit(LabeledStmt n, Boolean arg) {
+ return n.getStmt().accept(this, arg);
+ }
+
+ public Boolean visit(ReturnStmt n, Boolean arg) {
+ return true;
+ }
+
+ public Boolean visit(SwitchEntryStmt n, Boolean arg) {
+ if (n.getStmts() != null) {
+ List<Statement> stms = n.getStmts();
+ return stms.get(stms.size() - 1).accept(this, arg);
+ }
+ return false;
+ }
+
+ public Boolean visit(SwitchStmt n, Boolean arg) {
+ /*
+ List<SwitchEntryStmt> entries = n.getEntries();
+ for (int i = 0; i < array.length; i++) {
+ array_type array_element = array[i];
+
+ }
+ */
+ return true;
+ }
+
+ public Boolean visit(SynchronizedStmt n, Boolean arg) {
+ return n.getBlock().accept(this, arg);
+ }
+
+ public Boolean visit(ThrowStmt n, Boolean arg) {
+ return true;
+ }
+
+ public Boolean visit(TryStmt n, Boolean arg) {
+ if (n.getFinallyBlock() != null) {
+ return n.getFinallyBlock().accept(this, arg);
+ }
+ if (n.getCatchs() != null) {
+ for (CatchClause c : n.getCatchs()) {
+ boolean brk = c.accept(this, arg);
+ if (!brk) {
+ return false;
+ }
+ }
+ }
+ return n.getTryBlock().accept(this, arg);
+ }
+
+ public Boolean visit(TypeDeclarationStmt n, Boolean arg) {
+ return false;
+ }
+
+ public Boolean visit(WhileStmt n, Boolean arg) {
+ return n.getBody().accept(this, arg);
+ }
+
+ public Boolean visit(WildcardType n, Boolean arg) {
+ if (n.getExtends() != null) {
+ n.getExtends().accept(this, arg);
+ }
+ if (n.getSuper() != null) {
+ n.getSuper().accept(this, arg);
+ }
+ return null;
+ }
+} \ No newline at end of file