From 9dbbc6c08f2c603d29c37b3ac33f880c2507d705 Mon Sep 17 00:00:00 2001
From: "Matt A. Tobin" <email@mattatobin.com>
Date: Thu, 16 Jan 2020 07:17:55 -0500
Subject: Remove obsolete javasrc snapshot

---
 parser/html/javasrc/Tokenizer.java | 7089 ------------------------------------
 1 file changed, 7089 deletions(-)
 delete mode 100644 parser/html/javasrc/Tokenizer.java

(limited to 'parser/html/javasrc/Tokenizer.java')
diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java
deleted file mode 100644
index f141d94d7..000000000
--- a/parser/html/javasrc/Tokenizer.java
+++ /dev/null
@@ -1,7089 +0,0 @@
-/*
- * Copyright (c) 2005-2007 Henri Sivonen
- * Copyright (c) 2007-2015 Mozilla Foundation
- * Copyright (c) 2019 Moonchild Productions
- * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
- * Foundation, and Opera Software ASA.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * The comments following this one that use the same comment syntax as this
- * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007
- * amended as of June 18 2008 and May 31 2010.
- * That document came with this statement:
- * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and
- * Opera Software ASA. You are granted a license to use, reproduce and
- * create derivative works of this document."
- */
-
-package nu.validator.htmlparser.impl;
-
-import org.xml.sax.ErrorHandler;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-
-import nu.validator.htmlparser.annotation.Auto;
-import nu.validator.htmlparser.annotation.CharacterName;
-import nu.validator.htmlparser.annotation.Const;
-import nu.validator.htmlparser.annotation.Inline;
-import nu.validator.htmlparser.annotation.Local;
-import nu.validator.htmlparser.annotation.NoLength;
-import nu.validator.htmlparser.common.EncodingDeclarationHandler;
-import nu.validator.htmlparser.common.Interner;
-import nu.validator.htmlparser.common.TokenHandler;
-import nu.validator.htmlparser.common.XmlViolationPolicy;
-
-/**
- * An implementation of
- * https://html.spec.whatwg.org/multipage/syntax.html#tokenization
- *
- * This class implements the <code>Locator</code> interface. This is not an
- * incidental implementation detail: Users of this class are encouraged to make
- * use of the <code>Locator</code> nature.
- *
- * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
- * can be configured to treat these conditions as fatal or to coerce the infoset
- * to something that XML 1.0 allows.
- *
- * @version $Id$
- * @author hsivonen
- */
-public class Tokenizer implements Locator {
-
-    private static final int DATA_AND_RCDATA_MASK = ~1;
-
-    public static final int DATA = 0;
-
-    public static final int RCDATA = 1;
-
-    public static final int SCRIPT_DATA = 2;
-
-    public static final int RAWTEXT = 3;
-
-    public static final int SCRIPT_DATA_ESCAPED = 4;
-
-    public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
-
-    public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
-
-    public static final int ATTRIBUTE_VALUE_UNQUOTED = 7;
-
-    public static final int PLAINTEXT = 8;
-
-    public static final int TAG_OPEN = 9;
-
-    public static final int CLOSE_TAG_OPEN = 10;
-
-    public static final int TAG_NAME = 11;
-
-    public static final int BEFORE_ATTRIBUTE_NAME = 12;
-
-    public static final int ATTRIBUTE_NAME = 13;
-
-    public static final int AFTER_ATTRIBUTE_NAME = 14;
-
-    public static final int BEFORE_ATTRIBUTE_VALUE = 15;
-
-    public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
-
-    public static final int BOGUS_COMMENT = 17;
-
-    public static final int MARKUP_DECLARATION_OPEN = 18;
-
-    public static final int DOCTYPE = 19;
-
-    public static final int BEFORE_DOCTYPE_NAME = 20;
-
-    public static final int DOCTYPE_NAME = 21;
-
-    public static final int AFTER_DOCTYPE_NAME = 22;
-
-    public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
-
-    public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
-
-    public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
-
-    public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
-
-    public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
-
-    public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
-
-    public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
-
-    public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
-
-    public static final int BOGUS_DOCTYPE = 31;
-
-    public static final int COMMENT_START = 32;
-
-    public static final int COMMENT_START_DASH = 33;
-
-    public static final int COMMENT = 34;
-
-    public static final int COMMENT_END_DASH = 35;
-
-    public static final int COMMENT_END = 36;
-
-    public static final int COMMENT_END_BANG = 37;
-
-    public static final int NON_DATA_END_TAG_NAME = 38;
-
-    public static final int MARKUP_DECLARATION_HYPHEN = 39;
-
-    public static final int MARKUP_DECLARATION_OCTYPE = 40;
-
-    public static final int DOCTYPE_UBLIC = 41;
-
-    public static final int DOCTYPE_YSTEM = 42;
-
-    public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
-
-    public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
-
-    public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
-
-    public static final int CONSUME_CHARACTER_REFERENCE = 46;
-
-    public static final int CONSUME_NCR = 47;
-
-    public static final int CHARACTER_REFERENCE_TAIL = 48;
-
-    public static final int HEX_NCR_LOOP = 49;
-
-    public static final int DECIMAL_NRC_LOOP = 50;
-
-    public static final int HANDLE_NCR_VALUE = 51;
-
-    public static final int HANDLE_NCR_VALUE_RECONSUME = 52;
-
-    public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53;
-
-    public static final int SELF_CLOSING_START_TAG = 54;
-
-    public static final int CDATA_START = 55;
-
-    public static final int CDATA_SECTION = 56;
-
-    public static final int CDATA_RSQB = 57;
-
-    public static final int CDATA_RSQB_RSQB = 58;
-
-    public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59;
-
-    public static final int SCRIPT_DATA_ESCAPE_START = 60;
-
-    public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61;
-
-    public static final int SCRIPT_DATA_ESCAPED_DASH = 62;
-
-    public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
-
-    public static final int BOGUS_COMMENT_HYPHEN = 64;
-
-    public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
-
-    public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
-
-    public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
-
-    public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68;
-
-    public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
-
-    public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
-
-    public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
-
-    public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
-
-    public static final int PROCESSING_INSTRUCTION = 73;
-
-    public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
-
-    /**
-     * Magic value for UTF-16 operations.
-     */
-    private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
-
-    /**
-     * UTF-16 code unit array containing less than and greater than for emitting
-     * those characters on certain parse errors.
-     */
-    private static final @NoLength char[] LT_GT = { '<', '>' };
-
-    /**
-     * UTF-16 code unit array containing less than and solidus for emitting
-     * those characters on certain parse errors.
-     */
-    private static final @NoLength char[] LT_SOLIDUS = { '<', '/' };
-
-    /**
-     * UTF-16 code unit array containing ]] for emitting those characters on
-     * state transitions.
-     */
-    private static final @NoLength char[] RSQB_RSQB = { ']', ']' };
-
-    /**
-     * Array version of U+FFFD.
-     */
-    private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
-
-    // [NOCPP[
-
-    /**
-     * Array version of space.
-     */
-    private static final @NoLength char[] SPACE = { ' ' };
-
-    // ]NOCPP]
-
-    /**
-     * Array version of line feed.
-     */
-    private static final @NoLength char[] LF = { '\n' };
-
-    /**
-     * "CDATA[" as <code>char[]</code>
-     */
-    private static final @NoLength char[] CDATA_LSQB = { 'C', 'D', 'A', 'T',
-            'A', '[' };
-
-    /**
-     * "octype" as <code>char[]</code>
-     */
-    private static final @NoLength char[] OCTYPE = { 'o', 'c', 't', 'y', 'p',
-            'e' };
-
-    /**
-     * "ublic" as <code>char[]</code>
-     */
-    private static final @NoLength char[] UBLIC = { 'u', 'b', 'l', 'i', 'c' };
-
-    /**
-     * "ystem" as <code>char[]</code>
-     */
-    private static final @NoLength char[] YSTEM = { 'y', 's', 't', 'e', 'm' };
-
-    private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' };
-
-    private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' };
-
-    private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' };
-
-    private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't',
-            'e', 'x', 't' };
-
-    private static final char[] XMP_ARR = { 'x', 'm', 'p' };
-
-    private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r',
-            'e', 'a' };
-
-    private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' };
-
-    private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e',
-            'd' };
-
-    private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i',
-            'p', 't' };
-
-    private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm',
-            'e', 's' };
-
-    /**
-     * The token handler.
-     */
-    protected final TokenHandler tokenHandler;
-
-    protected EncodingDeclarationHandler encodingDeclarationHandler;
-
-    // [NOCPP[
-
-    /**
-     * The error handler.
-     */
-    protected ErrorHandler errorHandler;
-
-    // ]NOCPP]
-
-    /**
-     * Whether the previous char read was CR.
-     */
-    protected boolean lastCR;
-
-    protected int stateSave;
-
-    private int returnStateSave;
-
-    protected int index;
-
-    private boolean forceQuirks;
-
-    private char additional;
-
-    private int entCol;
-
-    private int firstCharKey;
-
-    private int lo;
-
-    private int hi;
-
-    private int candidate;
-
-    private int charRefBufMark;
-
-    protected int value;
-
-    private boolean seenDigits;
-
-    protected int cstart;
-
-    /**
-     * The SAX public id for the resource being tokenized. (Only passed to back
-     * as part of locator data.)
-     */
-    private String publicId;
-
-    /**
-     * The SAX system id for the resource being tokenized. (Only passed to back
-     * as part of locator data.)
-     */
-    private String systemId;
-
-    /**
-     * Buffer for bufferable things other than those that fit the description
-     * of <code>charRefBuf</code>.
-     */
-    private @Auto char[] strBuf;
-
-    /**
-     * Number of significant <code>char</code>s in <code>strBuf</code>.
-     */
-    private int strBufLen;
-
-    /**
-     * Buffer for characters that might form a character reference but may
-     * end up not forming one.
-     */
-    private final @Auto char[] charRefBuf;
-
-    /**
-     * Number of significant <code>char</code>s in <code>charRefBuf</code>.
-     */
-    private int charRefBufLen;
-
-    /**
-     * Buffer for expanding NCRs falling into the Basic Multilingual Plane.
-     */
-    private final @Auto char[] bmpChar;
-
-    /**
-     * Buffer for expanding astral NCRs.
-     */
-    private final @Auto char[] astralChar;
-
-    /**
-     * The element whose end tag closes the current CDATA or RCDATA element.
-     */
-    protected ElementName endTagExpectation = null;
-
-    private char[] endTagExpectationAsArray; // not @Auto!
-
-    /**
-     * <code>true</code> if tokenizing an end tag
-     */
-    protected boolean endTag;
-
-    /**
-     * The current tag token name.
-     */
-    private ElementName tagName = null;
-
-    /**
-     * The current attribute name.
-     */
-    protected AttributeName attributeName = null;
-
-    // [NOCPP[
-
-    /**
-     * Whether comment tokens are emitted.
-     */
-    private boolean wantsComments = false;
-
-    /**
-     * <code>true</code> when HTML4-specific additional errors are requested.
-     */
-    protected boolean html4;
-
-    /**
-     * Whether the stream is past the first 1024 bytes.
-     */
-    private boolean metaBoundaryPassed;
-
-    // ]NOCPP]
-
-    /**
-     * The name of the current doctype token.
-     */
-    private @Local String doctypeName;
-
-    /**
-     * The public id of the current doctype token.
-     */
-    private String publicIdentifier;
-
-    /**
-     * The system id of the current doctype token.
-     */
-    private String systemIdentifier;
-
-    /**
-     * The attribute holder.
-     */
-    private HtmlAttributes attributes;
-
-    // [NOCPP[
-
-    /**
-     * The policy for vertical tab and form feed.
-     */
-    private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET;
-
-    /**
-     * The policy for comments.
-     */
-    private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET;
-
-    private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET;
-
-    private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
-
-    private boolean html4ModeCompatibleWithXhtml1Schemata;
-
-    private int mappingLangToXmlLang;
-
-    // ]NOCPP]
-
-    private final boolean newAttributesEachTime;
-
-    private boolean shouldSuspend;
-
-    protected boolean confident;
-
-    private int line;
-
-    /*
-     * The line number of the current attribute. First set to the line of the
-     * attribute name and if there is a value, set to the line the value
-     * started on.
-     */
-    // CPPONLY: private int attributeLine;
-
-    private Interner interner;
-
-    // CPPONLY: private boolean viewingXmlSource;
-
-    // [NOCPP[
-
-    protected LocatorImpl ampersandLocation;
-
-    public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) {
-        this.tokenHandler = tokenHandler;
-        this.encodingDeclarationHandler = null;
-        this.newAttributesEachTime = newAttributesEachTime;
-        // &CounterClockwiseContourIntegral; is the longest valid char ref and
-        // the semicolon never gets appended to the buffer.
-        this.charRefBuf = new char[32];
-        this.bmpChar = new char[1];
-        this.astralChar = new char[2];
-        this.tagName = null;
-        this.attributeName = null;
-        this.doctypeName = null;
-        this.publicIdentifier = null;
-        this.systemIdentifier = null;
-        this.attributes = null;
-    }
-
-    // ]NOCPP]
-
-    /**
-     * The constructor.
-     *
-     * @param tokenHandler
-     *            the handler for receiving tokens
-     */
-    public Tokenizer(TokenHandler tokenHandler
-    // CPPONLY: , boolean viewingXmlSource
-    ) {
-        this.tokenHandler = tokenHandler;
-        this.encodingDeclarationHandler = null;
-        // [NOCPP[
-        this.newAttributesEachTime = false;
-        // ]NOCPP]
-        // &CounterClockwiseContourIntegral; is the longest valid char ref and
-        // the semicolon never gets appended to the buffer.
-        this.charRefBuf = new char[32];
-        this.bmpChar = new char[1];
-        this.astralChar = new char[2];
-        this.tagName = null;
-        this.attributeName = null;
-        this.doctypeName = null;
-        this.publicIdentifier = null;
-        this.systemIdentifier = null;
-        // [NOCPP[
-        this.attributes = null;
-        // ]NOCPP]
-        // CPPONLY: this.attributes = tokenHandler.HasBuilder() ? new HtmlAttributes(mappingLangToXmlLang) : null;
-        // CPPONLY: this.newAttributesEachTime = !tokenHandler.HasBuilder();
-        // CPPONLY: this.viewingXmlSource = viewingXmlSource;
-    }
-
-    public void setInterner(Interner interner) {
-        this.interner = interner;
-    }
-
-    public void initLocation(String newPublicId, String newSystemId) {
-        this.systemId = newSystemId;
-        this.publicId = newPublicId;
-
-    }
-
-    // CPPONLY: boolean isViewingXmlSource() {
-    // CPPONLY: return viewingXmlSource;
-    // CPPONLY: }
-
-    // [NOCPP[
-
-    /**
-     * Returns the mappingLangToXmlLang.
-     *
-     * @return the mappingLangToXmlLang
-     */
-    public boolean isMappingLangToXmlLang() {
-        return mappingLangToXmlLang == AttributeName.HTML_LANG;
-    }
-
-    /**
-     * Sets the mappingLangToXmlLang.
-     *
-     * @param mappingLangToXmlLang
-     *            the mappingLangToXmlLang to set
-     */
-    public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
-        this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG
-                : AttributeName.HTML;
-    }
-
-    /**
-     * Sets the error handler.
-     *
-     * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
-     */
-    public void setErrorHandler(ErrorHandler eh) {
-        this.errorHandler = eh;
-    }
-
-    public ErrorHandler getErrorHandler() {
-        return this.errorHandler;
-    }
-
-    /**
-     * Sets the commentPolicy.
-     *
-     * @param commentPolicy
-     *            the commentPolicy to set
-     */
-    public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
-        this.commentPolicy = commentPolicy;
-    }
-
-    /**
-     * Sets the contentNonXmlCharPolicy.
-     *
-     * @param contentNonXmlCharPolicy
-     *            the contentNonXmlCharPolicy to set
-     */
-    public void setContentNonXmlCharPolicy(
-            XmlViolationPolicy contentNonXmlCharPolicy) {
-        if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) {
-            throw new IllegalArgumentException(
-                    "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
-        }
-    }
-
-    /**
-     * Sets the contentSpacePolicy.
-     *
-     * @param contentSpacePolicy
-     *            the contentSpacePolicy to set
-     */
-    public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
-        this.contentSpacePolicy = contentSpacePolicy;
-    }
-
-    /**
-     * Sets the xmlnsPolicy.
-     *
-     * @param xmlnsPolicy
-     *            the xmlnsPolicy to set
-     */
-    public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
-        if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
-            throw new IllegalArgumentException("Can't use FATAL here.");
-        }
-        this.xmlnsPolicy = xmlnsPolicy;
-    }
-
-    public void setNamePolicy(XmlViolationPolicy namePolicy) {
-        this.namePolicy = namePolicy;
-    }
-
-    /**
-     * Sets the html4ModeCompatibleWithXhtml1Schemata.
-     *
-     * @param html4ModeCompatibleWithXhtml1Schemata
-     *            the html4ModeCompatibleWithXhtml1Schemata to set
-     */
-    public void setHtml4ModeCompatibleWithXhtml1Schemata(
-            boolean html4ModeCompatibleWithXhtml1Schemata) {
-        this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
-    }
-
-    // ]NOCPP]
-
-    // For the token handler to call
-    /**
-     * Sets the tokenizer state and the associated element name. This should
-     * only ever used to put the tokenizer into one of the states that have
-     * a special end tag expectation.
-     *
-     * @param specialTokenizerState
-     *            the tokenizer state to set
-     */
-    public void setState(int specialTokenizerState) {
-        this.stateSave = specialTokenizerState;
-        this.endTagExpectation = null;
-        this.endTagExpectationAsArray = null;
-    }
-
-    // [NOCPP[
-
-    /**
-     * Sets the tokenizer state and the associated element name. This should
-     * only ever used to put the tokenizer into one of the states that have
-     * a special end tag expectation. For use from the tokenizer test harness.
-     *
-     * @param specialTokenizerState
-     *            the tokenizer state to set
-     * @param endTagExpectation
-     *            the expected end tag for transitioning back to normal
-     */
-    public void setStateAndEndTagExpectation(int specialTokenizerState,
-            @Local String endTagExpectation) {
-        this.stateSave = specialTokenizerState;
-        if (specialTokenizerState == Tokenizer.DATA) {
-            return;
-        }
-        @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation);
-        this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0,
-                asArray.length, interner);
-        endTagExpectationToArray();
-    }
-
-    // ]NOCPP]
-
-    /**
-     * Sets the tokenizer state and the associated element name. This should
-     * only ever used to put the tokenizer into one of the states that have
-     * a special end tag expectation.
-     *
-     * @param specialTokenizerState
-     *            the tokenizer state to set
-     * @param endTagExpectation
-     *            the expected end tag for transitioning back to normal
-     */
-    public void setStateAndEndTagExpectation(int specialTokenizerState,
-            ElementName endTagExpectation) {
-        this.stateSave = specialTokenizerState;
-        this.endTagExpectation = endTagExpectation;
-        endTagExpectationToArray();
-    }
-
-    private void endTagExpectationToArray() {
-        switch (endTagExpectation.getGroup()) {
-            case TreeBuilder.TITLE:
-                endTagExpectationAsArray = TITLE_ARR;
-                return;
-            case TreeBuilder.SCRIPT:
-                endTagExpectationAsArray = SCRIPT_ARR;
-                return;
-            case TreeBuilder.STYLE:
-                endTagExpectationAsArray = STYLE_ARR;
-                return;
-            case TreeBuilder.PLAINTEXT:
-                endTagExpectationAsArray = PLAINTEXT_ARR;
-                return;
-            case TreeBuilder.XMP:
-                endTagExpectationAsArray = XMP_ARR;
-                return;
-            case TreeBuilder.TEXTAREA:
-                endTagExpectationAsArray = TEXTAREA_ARR;
-                return;
-            case TreeBuilder.IFRAME:
-                endTagExpectationAsArray = IFRAME_ARR;
-                return;
-            case TreeBuilder.NOEMBED:
-                endTagExpectationAsArray = NOEMBED_ARR;
-                return;
-            case TreeBuilder.NOSCRIPT:
-                endTagExpectationAsArray = NOSCRIPT_ARR;
-                return;
-            case TreeBuilder.NOFRAMES:
-                endTagExpectationAsArray = NOFRAMES_ARR;
-                return;
-            default:
-                assert false: "Bad end tag expectation.";
-                return;
-        }
-    }
-
-    /**
-     * For C++ use only.
-     */
-    public void setLineNumber(int line) {
-        // CPPONLY: this.attributeLine = line; // XXX is this needed?
-        this.line = line;
-    }
-
-    // start Locator impl
-
-    /**
-     * @see org.xml.sax.Locator#getLineNumber()
-     */
-    @Inline public int getLineNumber() {
-        return line;
-    }
-
-    // [NOCPP[
-
-    /**
-     * @see org.xml.sax.Locator#getColumnNumber()
-     */
-    @Inline public int getColumnNumber() {
-        return -1;
-    }
-
-    /**
-     * @see org.xml.sax.Locator#getPublicId()
-     */
-    public String getPublicId() {
-        return publicId;
-    }
-
-    /**
-     * @see org.xml.sax.Locator#getSystemId()
-     */
-    public String getSystemId() {
-        return systemId;
-    }
-
-    // end Locator impl
-
-    // end public API
-
-    public void notifyAboutMetaBoundary() {
-        metaBoundaryPassed = true;
-    }
-
-    void turnOnAdditionalHtml4Errors() {
-        html4 = true;
-    }
-
-    // ]NOCPP]
-
-    HtmlAttributes emptyAttributes() {
-        // [NOCPP[
-        if (newAttributesEachTime) {
-            return new HtmlAttributes(mappingLangToXmlLang);
-        } else {
-            // ]NOCPP]
-            return HtmlAttributes.EMPTY_ATTRIBUTES;
-            // [NOCPP[
-        }
-        // ]NOCPP]
-    }
-
-    @Inline private void appendCharRefBuf(char c) {
-        // CPPONLY: assert charRefBufLen < charRefBuf.length:
-        // CPPONLY:     "RELEASE: Attempted to overrun charRefBuf!";
-        charRefBuf[charRefBufLen++] = c;
-    }
-
-    private void emitOrAppendCharRefBuf(int returnState) throws SAXException {
-        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-            appendCharRefBufToStrBuf();
-        } else {
-            if (charRefBufLen > 0) {
-                tokenHandler.characters(charRefBuf, 0, charRefBufLen);
-                charRefBufLen = 0;
-            }
-        }
-    }
-
-    @Inline private void clearStrBufAfterUse() {
-        strBufLen = 0;
-    }
-
-    @Inline private void clearStrBufBeforeUse() {
-        assert strBufLen == 0: "strBufLen not reset after previous use!";
-        strBufLen = 0; // no-op in the absence of bugs
-    }
-
-    @Inline private void clearStrBufAfterOneHyphen() {
-        assert strBufLen == 1: "strBufLen length not one!";
-        assert strBuf[0] == '-': "strBuf does not start with a hyphen!";
-        strBufLen = 0;
-    }
-
-    /**
-     * Appends to the buffer.
-     *
-     * @param c
-     *            the UTF-16 code unit to append
-     */
-    @Inline private void appendStrBuf(char c) {
-        // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient.";
-        // CPPONLY: if (strBufLen == strBuf.length) {
-        // CPPONLY:     if (!EnsureBufferSpace(1)) {
-        // CPPONLY:         assert false: "RELEASE: Unable to recover from buffer reallocation failure";
-        // CPPONLY:     } // TODO: Add telemetry when outer if fires but inner does not
-        // CPPONLY: }
-        strBuf[strBufLen++] = c;
-    }
-
-    /**
-     * The buffer as a String. Currently only used for error reporting.
-     *
-     * <p>
-     * C++ memory note: The return value must be released.
-     *
-     * @return the buffer as a string
-     */
-    protected String strBufToString() {
-        String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen
-            // CPPONLY: , tokenHandler
-        );
-        clearStrBufAfterUse();
-        return str;
-    }
-
-    /**
-     * Returns the buffer as a local name. The return value is released in
-     * emitDoctypeToken().
-     *
-     * @return the buffer as local name
-     */
-    private void strBufToDoctypeName() {
-        doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
-                interner);
-        clearStrBufAfterUse();
-    }
-
-    /**
-     * Emits the buffer as character tokens.
-     *
-     * @throws SAXException
-     *             if the token handler threw
-     */
-    private void emitStrBuf() throws SAXException {
-        if (strBufLen > 0) {
-            tokenHandler.characters(strBuf, 0, strBufLen);
-            clearStrBufAfterUse();
-        }
-    }
-
-    @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
-        // [NOCPP[
-        switch (commentPolicy) {
-            case ALTER_INFOSET:
-                appendStrBuf(' ');
-                // FALLTHROUGH
-            case ALLOW:
-                warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
-                // ]NOCPP]
-                appendStrBuf('-');
-                // [NOCPP[
-                break;
-            case FATAL:
-                fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
-                break;
-        }
-        // ]NOCPP]
-    }
-
-    // [NOCPP[
-    private void maybeAppendSpaceToBogusComment() throws SAXException {
-        switch (commentPolicy) {
-            case ALTER_INFOSET:
-                appendStrBuf(' ');
-                // FALLTHROUGH
-            case ALLOW:
-                warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
-                break;
-            case FATAL:
-                fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
-                break;
-        }
-    }
-
-    // ]NOCPP]
-
-    @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
-            throws SAXException {
-        errConsecutiveHyphens();
-        // [NOCPP[
-        switch (commentPolicy) {
-            case ALTER_INFOSET:
-                strBufLen--;
-                // WARNING!!! This expands the worst case of the buffer length
-                // given the length of input!
-                appendStrBuf(' ');
-                appendStrBuf('-');
-                // FALLTHROUGH
-            case ALLOW:
-                warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
-                // ]NOCPP]
-                appendStrBuf(c);
-                // [NOCPP[
-                break;
-            case FATAL:
-                fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
-                break;
-        }
-        // ]NOCPP]
-    }
-
-    private void appendStrBuf(@NoLength char[] buffer, int offset, int length) {
-        int newLen = strBufLen + length;
-        // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient.";
-        // CPPONLY: if (strBuf.length < newLen) {
-        // CPPONLY:     if (!EnsureBufferSpace(length)) {
-        // CPPONLY:         assert false: "RELEASE: Unable to recover from buffer reallocation failure";
-        // CPPONLY:     } // TODO: Add telemetry when outer if fires but inner does not
-        // CPPONLY: }
-        System.arraycopy(buffer, offset, strBuf, strBufLen, length);
-        strBufLen = newLen;
-    }
-
-    /**
-     * Append the contents of the char reference buffer to the main one.
-     */
-    @Inline private void appendCharRefBufToStrBuf() {
-        appendStrBuf(charRefBuf, 0, charRefBufLen);
-        charRefBufLen = 0;
-    }
-
-    /**
-     * Emits the current comment token.
-     *
-     * @param pos
-     *            TODO
-     *
-     * @throws SAXException
-     */
-    private void emitComment(int provisionalHyphens, int pos)
-            throws SAXException {
-        // [NOCPP[
-        if (wantsComments) {
-            // ]NOCPP]
-            tokenHandler.comment(strBuf, 0, strBufLen
-                    - provisionalHyphens);
-            // [NOCPP[
-        }
-        // ]NOCPP]
-        clearStrBufAfterUse();
-        cstart = pos + 1;
-    }
-
-    /**
-     * Flushes coalesced character tokens.
-     *
-     * @param buf
-     *            TODO
-     * @param pos
-     *            TODO
-     *
-     * @throws SAXException
-     */
-    protected void flushChars(@NoLength char[] buf, int pos)
-            throws SAXException {
-        if (pos > cstart) {
-            tokenHandler.characters(buf, cstart, pos - cstart);
-        }
-        cstart = Integer.MAX_VALUE;
-    }
-
-    /**
-     * Reports an condition that would make the infoset incompatible with XML
-     * 1.0 as fatal.
-     *
-     * @param message
-     *            the message
-     * @throws SAXException
-     * @throws SAXParseException
-     */
-    public void fatal(String message) throws SAXException {
-        SAXParseException spe = new SAXParseException(message, this);
-        if (errorHandler != null) {
-            errorHandler.fatalError(spe);
-        }
-        throw spe;
-    }
-
-    /**
-     * Reports a Parse Error.
-     *
-     * @param message
-     *            the message
-     * @throws SAXException
-     */
-    public void err(String message) throws SAXException {
-        if (errorHandler == null) {
-            return;
-        }
-        SAXParseException spe = new SAXParseException(message, this);
-        errorHandler.error(spe);
-    }
-
-    public void errTreeBuilder(String message) throws SAXException {
-        ErrorHandler eh = null;
-        if (tokenHandler instanceof TreeBuilder<?>) {
-            TreeBuilder<?> treeBuilder = (TreeBuilder<?>) tokenHandler;
-            eh = treeBuilder.getErrorHandler();
-        }
-        if (eh == null) {
-            eh = errorHandler;
-        }
-        if (eh == null) {
-            return;
-        }
-        SAXParseException spe = new SAXParseException(message, this);
-        eh.error(spe);
-    }
-
-    /**
-     * Reports a warning
-     *
-     * @param message
-     *            the message
-     * @throws SAXException
-     */
-    public void warn(String message) throws SAXException {
-        if (errorHandler == null) {
-            return;
-        }
-        SAXParseException spe = new SAXParseException(message, this);
-        errorHandler.warning(spe);
-    }
-
-    private void strBufToElementNameString() {
-        tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
-                interner);
-        clearStrBufAfterUse();
-    }
-
-    private int emitCurrentTagToken(boolean selfClosing, int pos)
-            throws SAXException {
-        cstart = pos + 1;
-        maybeErrSlashInEndTag(selfClosing);
-        stateSave = Tokenizer.DATA;
-        HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
-                : attributes);
-        if (endTag) {
-            /*
-             * When an end tag token is emitted, the content model flag must be
-             * switched to the PCDATA state.
-             */
-            maybeErrAttributesOnEndTag(attrs);
-            // CPPONLY: if (!viewingXmlSource) {
-            tokenHandler.endTag(tagName);
-            // CPPONLY: }
-            // CPPONLY: if (newAttributesEachTime) {
-            // CPPONLY:   Portability.delete(attributes);
-            // CPPONLY:   attributes = null;
-            // CPPONLY: }
-        } else {
-            // CPPONLY: if (viewingXmlSource) {
-            // CPPONLY:   assert newAttributesEachTime;
-            // CPPONLY:   Portability.delete(attributes);
-            // CPPONLY:   attributes = null;
-            // CPPONLY: } else {
-            tokenHandler.startTag(tagName, attrs, selfClosing);
-            // CPPONLY: }
-        }
-        tagName.release();
-        tagName = null;
-        if (newAttributesEachTime) {
-            attributes = null;
-        } else {
-            attributes.clear(mappingLangToXmlLang);
-        }
-        /*
-         * The token handler may have called setStateAndEndTagExpectation
-         * and changed stateSave since the start of this method.
-         */
-        return stateSave;
-    }
-
-    private void attributeNameComplete() throws SAXException {
-        attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen
-        // [NOCPP[
-                , namePolicy != XmlViolationPolicy.ALLOW
-                // ]NOCPP]
-                , interner);
-        clearStrBufAfterUse();
-
-        if (attributes == null) {
-            attributes = new HtmlAttributes(mappingLangToXmlLang);
-        }
-
-        /*
-         * When the user agent leaves the attribute name state (and before
-         * emitting the tag token, if appropriate), the complete attribute's
-         * name must be compared to the other attributes on the same token; if
-         * there is already an attribute on the token with the exact same name,
-         * then this is a parse error and the new attribute must be dropped,
-         * along with the value that gets associated with it (if any).
-         */
-        if (attributes.contains(attributeName)) {
-            errDuplicateAttribute();
-            attributeName.release();
-            attributeName = null;
-        }
-    }
-
-    private void addAttributeWithoutValue() throws SAXException {
-        noteAttributeWithoutValue();
-
-        // [NOCPP[
-        if (metaBoundaryPassed && AttributeName.CHARSET == attributeName
-                && ElementName.META == tagName) {
-            err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
-        }
-        // ]NOCPP]
-        if (attributeName != null) {
-            // [NOCPP[
-            if (html4) {
-                if (attributeName.isBoolean()) {
-                    if (html4ModeCompatibleWithXhtml1Schemata) {
-                        attributes.addAttribute(attributeName,
-                                attributeName.getLocal(AttributeName.HTML),
-                                xmlnsPolicy);
-                    } else {
-                        attributes.addAttribute(attributeName, "", xmlnsPolicy);
-                    }
-                } else {
-                    if (AttributeName.BORDER != attributeName) {
-                        err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
-                        attributes.addAttribute(attributeName, "", xmlnsPolicy);
-                    }
-                }
-            } else {
-                if (AttributeName.SRC == attributeName
-                        || AttributeName.HREF == attributeName) {
-                    warn("Attribute \u201C"
-                            + attributeName.getLocal(AttributeName.HTML)
-                            + "\u201D without an explicit value seen. The attribute may be dropped by IE7.");
-                }
-                // ]NOCPP]
-                attributes.addAttribute(attributeName,
-                        Portability.newEmptyString()
-                        // [NOCPP[
-                        , xmlnsPolicy
-                // ]NOCPP]
-                // CPPONLY: , attributeLine
-                );
-                // [NOCPP[
-            }
-            // ]NOCPP]
-            attributeName = null; // attributeName has been adopted by the
-            // |attributes| object
-        } else {
-            clearStrBufAfterUse();
-        }
-    }
-
-    private void addAttributeWithValue() throws SAXException {
-        // [NOCPP[
-        if (metaBoundaryPassed && ElementName.META == tagName
-                && AttributeName.CHARSET == attributeName) {
-            err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
-        }
-        // ]NOCPP]
-        if (attributeName != null) {
-            String val = strBufToString(); // Ownership transferred to
-            // HtmlAttributes
-            // CPPONLY: if (mViewSource) {
-            // CPPONLY:   mViewSource.MaybeLinkifyAttributeValue(attributeName, val);
-            // CPPONLY: }
-            // [NOCPP[
-            if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
-                    && attributeName.isCaseFolded()) {
-                val = newAsciiLowerCaseStringFromString(val);
-            }
-            // ]NOCPP]
-            attributes.addAttribute(attributeName, val
-            // [NOCPP[
-                    , xmlnsPolicy
-            // ]NOCPP]
-            // CPPONLY: , attributeLine
-            );
-            attributeName = null; // attributeName has been adopted by the
-            // |attributes| object
-        } else {
-            // We have a duplicate attribute. Explicitly discard its value.
-            clearStrBufAfterUse();
-        }
-    }
-
-    // [NOCPP[
-
-    private static String newAsciiLowerCaseStringFromString(String str) {
-        if (str == null) {
-            return null;
-        }
-        char[] buf = new char[str.length()];
-        for (int i = 0; i < str.length(); i++) {
-            char c = str.charAt(i);
-            if (c >= 'A' && c <= 'Z') {
-                c += 0x20;
-            }
-            buf[i] = c;
-        }
-        return new String(buf);
-    }
-
-    protected void startErrorReporting() throws SAXException {
-
-    }
-
-    // ]NOCPP]
-
-    public void start() throws SAXException {
-        initializeWithoutStarting();
-        tokenHandler.startTokenization(this);
-        // [NOCPP[
-        startErrorReporting();
-        // ]NOCPP]
-    }
-
-    public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
-        int state = stateSave;
-        int returnState = returnStateSave;
-        char c = '\u0000';
-        shouldSuspend = false;
-        lastCR = false;
-
-        int start = buffer.getStart();
-        int end = buffer.getEnd();
-
-        // In C++, the caller of tokenizeBuffer needs to do this explicitly.
-        // [NOCPP[
-        ensureBufferSpace(end - start);
-        // ]NOCPP]
-
-        /**
-         * The index of the last <code>char</code> read from <code>buf</code>.
-         */
-        int pos = start - 1;
-
-        /**
-         * The index of the first <code>char</code> in <code>buf</code> that is
-         * part of a coalesced run of character tokens or
-         * <code>Integer.MAX_VALUE</code> if there is not a current run being
-         * coalesced.
-         */
-        switch (state) {
-            case DATA:
-            case RCDATA:
-            case SCRIPT_DATA:
-            case PLAINTEXT:
-            case RAWTEXT:
-            case CDATA_SECTION:
-            case SCRIPT_DATA_ESCAPED:
-            case SCRIPT_DATA_ESCAPE_START:
-            case SCRIPT_DATA_ESCAPE_START_DASH:
-            case SCRIPT_DATA_ESCAPED_DASH:
-            case SCRIPT_DATA_ESCAPED_DASH_DASH:
-            case SCRIPT_DATA_DOUBLE_ESCAPE_START:
-            case SCRIPT_DATA_DOUBLE_ESCAPED:
-            case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
-            case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
-            case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
-            case SCRIPT_DATA_DOUBLE_ESCAPE_END:
-                cstart = start;
-                break;
-            default:
-                cstart = Integer.MAX_VALUE;
-                break;
-        }
-
-        /**
-         * The number of <code>char</code>s in <code>buf</code> that have
-         * meaning. (The rest of the array is garbage and should not be
-         * examined.)
-         */
-        // CPPONLY: if (mViewSource) {
-        // CPPONLY:   mViewSource.SetBuffer(buffer);
-        // CPPONLY:   pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
-        // CPPONLY:   mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
-        // CPPONLY: } else {
-        // CPPONLY:   pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
-        // CPPONLY: }
-        // [NOCPP[
-        pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
-                end);
-        // ]NOCPP]
-        if (pos == end) {
-            // exiting due to end of buffer
-            buffer.setStart(pos);
-        } else {
-            buffer.setStart(pos + 1);
-        }
-        return lastCR;
-    }
-
-    // [NOCPP[
-    private void ensureBufferSpace(int inputLength) throws SAXException {
-        // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
-        // Adding to the general worst case instead of only the
-        // TreeBuilder-exposed worst case to avoid re-introducing a bug when
-        // unifying the tokenizer and tree builder buffers in the future.
-        int worstCase = strBufLen + inputLength + charRefBufLen + 2;
-        tokenHandler.ensureBufferSpace(worstCase);
-        if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) {
-            // When altering infoset, if the comment contents are consecutive
-            // hyphens, each hyphen generates a space, too. These buffer
-            // contents never get emitted as characters() to the tokenHandler,
-            // which is why this calculation happens after the call to
-            // ensureBufferSpace on tokenHandler.
-            worstCase *= 2;
-        }
-        if (strBuf == null) {
-            // Add an arbitrary small value to avoid immediate reallocation
-            // once there are a few characters in the buffer.
-            strBuf = new char[worstCase + 128];
-        } else if (worstCase > strBuf.length) {
-            // HotSpot reportedly allocates memory with 8-byte accuracy, so
-            // there's no point in trying to do math here to avoid slop.
-            // Maybe we should add some small constant to worstCase here
-            // but not doing that without profiling. In C++ with jemalloc,
-            // the corresponding method should do math to round up here
-            // to avoid slop.
-            char[] newBuf = new char[worstCase];
-            System.arraycopy(strBuf, 0, newBuf, 0, strBufLen);
-            strBuf = newBuf;
-        }
-    }
-    // ]NOCPP]
-
-    @SuppressWarnings("unused") private int stateLoop(int state, char c,
-            int pos, @NoLength char[] buf, boolean reconsume, int returnState,
-            int endPos) throws SAXException {
-        /*
-         * Idioms used in this code:
-         *
-         *
-         * Consuming the next input character
-         *
-         * To consume the next input character, the code does this: if (++pos ==
-         * endPos) { break stateloop; } c = checkChar(buf, pos);
-         *
-         *
-         * Staying in a state
-         *
-         * When there's a state that the tokenizer may stay in over multiple
-         * input characters, the state has a wrapper |for(;;)| loop and staying
-         * in the state continues the loop.
-         *
-         *
-         * Switching to another state
-         *
-         * To switch to another state, the code sets the state variable to the
-         * magic number of the new state. Then it either continues stateloop or
-         * breaks out of the state's own wrapper loop if the target state is
-         * right after the current state in source order. (This is a partial
-         * workaround for Java's lack of goto.)
-         *
-         *
-         * Reconsume support
-         *
-         * The spec sometimes says that an input character is reconsumed in
-         * another state. If a state can ever be entered so that an input
-         * character can be reconsumed in it, the state's code starts with an
-         * |if (reconsume)| that sets reconsume to false and skips over the
-         * normal code for consuming a new character.
-         *
-         * To reconsume the current character in another state, the code sets
-         * |reconsume| to true and then switches to the other state.
-         *
-         *
-         * Emitting character tokens
-         *
-         * This method emits character tokens lazily. Whenever a new range of
-         * character tokens starts, the field cstart must be set to the start
-         * index of the range. The flushChars() method must be called at the end
-         * of a range to flush it.
-         *
-         *
-         * U+0000 handling
-         *
-         * The various states have to handle the replacement of U+0000 with
-         * U+FFFD. However, if U+0000 would be reconsumed in another state, the
-         * replacement doesn't need to happen, because it's handled by the
-         * reconsuming state.
-         *
-         *
-         * LF handling
-         *
-         * Every state needs to increment the line number upon LF unless the LF
-         * gets reconsumed by another state which increments the line number.
-         *
-         *
-         * CR handling
-         *
-         * Every state needs to handle CR unless the CR gets reconsumed and is
-         * handled by the reconsuming state. The CR needs to be handled as if it
-         * were and LF, the lastCR field must be set to true and then this
-         * method must return. The IO driver will then swallow the next
-         * character if it is an LF to coalesce CRLF.
-         */
-        stateloop: for (;;) {
-            switch (state) {
-                case DATA:
-                    dataloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        switch (c) {
-                            case '&':
-                                /*
-                                 * U+0026 AMPERSAND (&) Switch to the character
-                                 * reference in data state.
-                                 */
-                                flushChars(buf, pos);
-                                assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
-                                appendCharRefBuf(c);
-                                setAdditionalAndRememberAmpersandLocation('\u0000');
-                                returnState = state;
-                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
-                                continue stateloop;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Switch to the tag
-                                 * open state.
-                                 */
-                                flushChars(buf, pos);
-
-                                state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
-                                break dataloop; // FALL THROUGH continue
-                            // stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the input character as a
-                                 * character token.
-                                 *
-                                 * Stay in the data state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case TAG_OPEN:
-                    tagopenloop: for (;;) {
-                        /*
-                         * The behavior of this state depends on the content
-                         * model flag.
-                         */
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * If the content model flag is set to the PCDATA state
-                         * Consume the next input character:
-                         */
-                        if (c >= 'A' && c <= 'Z') {
-                            /*
-                             * U+0041 LATIN CAPITAL LETTER A through to U+005A
-                             * LATIN CAPITAL LETTER Z Create a new start tag
-                             * token,
-                             */
-                            endTag = false;
-                            /*
-                             * set its tag name to the lowercase version of the
-                             * input character (add 0x0020 to the character's
-                             * code point),
-                             */
-                            clearStrBufBeforeUse();
-                            appendStrBuf((char) (c + 0x20));
-                            /* then switch to the tag name state. */
-                            state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
-                            /*
-                             * (Don't emit the token yet; further details will
-                             * be filled in before it is emitted.)
-                             */
-                            break tagopenloop;
-                            // continue stateloop;
-                        } else if (c >= 'a' && c <= 'z') {
-                            /*
-                             * U+0061 LATIN SMALL LETTER A through to U+007A
-                             * LATIN SMALL LETTER Z Create a new start tag
-                             * token,
-                             */
-                            endTag = false;
-                            /*
-                             * set its tag name to the input character,
-                             */
-                            clearStrBufBeforeUse();
-                            appendStrBuf(c);
-                            /* then switch to the tag name state. */
-                            state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
-                            /*
-                             * (Don't emit the token yet; further details will
-                             * be filled in before it is emitted.)
-                             */
-                            break tagopenloop;
-                            // continue stateloop;
-                        }
-                        switch (c) {
-                            case '!':
-                                /*
-                                 * U+0021 EXCLAMATION MARK (!) Switch to the
-                                 * markup declaration open state.
-                                 */
-                                state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
-                                continue stateloop;
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Switch to the close tag
-                                 * open state.
-                                 */
-                                state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
-                                continue stateloop;
-                            case '?':
-                                // CPPONLY: if (viewingXmlSource) {
-                                // CPPONLY: state = transition(state,
-                                // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
-                                // CPPONLY: reconsume,
-                                // CPPONLY: pos);
-                                // CPPONLY: continue stateloop;
-                                // CPPONLY: }
-                                /*
-                                 * U+003F QUESTION MARK (?) Parse error.
-                                 */
-                                errProcessingInstruction();
-                                /*
-                                 * Switch to the bogus comment state.
-                                 */
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Parse error.
-                                 */
-                                errLtGt();
-                                /*
-                                 * Emit a U+003C LESS-THAN SIGN character token
-                                 * and a U+003E GREATER-THAN SIGN character
-                                 * token.
-                                 */
-                                tokenHandler.characters(Tokenizer.LT_GT, 0, 2);
-                                /* Switch to the data state. */
-                                cstart = pos + 1;
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                /*
-                                 * Anything else Parse error.
-                                 */
-                                errBadCharAfterLt(c);
-                                /*
-                                 * Emit a U+003C LESS-THAN SIGN character token
-                                 */
-                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                                /*
-                                 * and reconsume the current input character in
-                                 * the data state.
-                                 */
-                                cstart = pos;
-                                reconsume = true;
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALL THROUGH DON'T REORDER
-                case TAG_NAME:
-                    tagnameloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                strBufToElementNameString();
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the before attribute name state.
-                                 */
-                                strBufToElementNameString();
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                break tagnameloop;
-                            // continue stateloop;
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Switch to the self-closing
-                                 * start tag state.
-                                 */
-                                strBufToElementNameString();
-                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * tag token.
-                                 */
-                                strBufToElementNameString();
-                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                if (shouldSuspend) {
-                                    break stateloop;
-                                }
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                if (c >= 'A' && c <= 'Z') {
-                                    /*
-                                     * U+0041 LATIN CAPITAL LETTER A through to
-                                     * U+005A LATIN CAPITAL LETTER Z Append the
-                                     * lowercase version of the current input
-                                     * character (add 0x0020 to the character's
-                                     * code point) to the current tag token's
-                                     * tag name.
-                                     */
-                                    c += 0x20;
-                                }
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current tag token's tag
-                                 * name.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the tag name state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BEFORE_ATTRIBUTE_NAME:
-                    beforeattributenameloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the before attribute name state.
-                                 */
-                                continue;
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Switch to the self-closing
-                                 * start tag state.
-                                 */
-                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * tag token.
-                                 */
-                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                if (shouldSuspend) {
-                                    break stateloop;
-                                }
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            case '\"':
-                            case '\'':
-                            case '<':
-                            case '=':
-                                /*
-                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
-                                 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
-                                 * SIGN (=) Parse error.
-                                 */
-                                errBadCharBeforeAttributeNameOrNull(c);
-                                /*
-                                 * Treat it as per the "anything else" entry
-                                 * below.
-                                 */
-                            default:
-                                /*
-                                 * Anything else Start a new attribute in the
-                                 * current tag token.
-                                 */
-                                if (c >= 'A' && c <= 'Z') {
-                                    /*
-                                     * U+0041 LATIN CAPITAL LETTER A through to
-                                     * U+005A LATIN CAPITAL LETTER Z Set that
-                                     * attribute's name to the lowercase version
-                                     * of the current input character (add
-                                     * 0x0020 to the character's code point)
-                                     */
-                                    c += 0x20;
-                                }
-                                // CPPONLY: attributeLine = line;
-                                /*
-                                 * Set that attribute's name to the current
-                                 * input character,
-                                 */
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                /*
-                                 * and its value to the empty string.
-                                 */
-                                // Will do later.
-                                /*
-                                 * Switch to the attribute name state.
-                                 */
-                                state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
-                                break beforeattributenameloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case ATTRIBUTE_NAME:
-                    attributenameloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                attributeNameComplete();
-                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the after attribute name state.
-                                 */
-                                attributeNameComplete();
-                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
-                                continue stateloop;
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Switch to the self-closing
-                                 * start tag state.
-                                 */
-                                attributeNameComplete();
-                                addAttributeWithoutValue();
-                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
-                                continue stateloop;
-                            case '=':
-                                /*
-                                 * U+003D EQUALS SIGN (=) Switch to the before
-                                 * attribute value state.
-                                 */
-                                attributeNameComplete();
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
-                                break attributenameloop;
-                            // continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * tag token.
-                                 */
-                                attributeNameComplete();
-                                addAttributeWithoutValue();
-                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                if (shouldSuspend) {
-                                    break stateloop;
-                                }
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            case '\"':
-                            case '\'':
-                            case '<':
-                                /*
-                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
-                                 * (') U+003C LESS-THAN SIGN (<) Parse error.
-                                 */
-                                errQuoteOrLtInAttributeNameOrNull(c);
-                                /*
-                                 * Treat it as per the "anything else" entry
-                                 * below.
-                                 */
-                            default:
-                                if (c >= 'A' && c <= 'Z') {
-                                    /*
-                                     * U+0041 LATIN CAPITAL LETTER A through to
-                                     * U+005A LATIN CAPITAL LETTER Z Append the
-                                     * lowercase version of the current input
-                                     * character (add 0x0020 to the character's
-                                     * code point) to the current attribute's
-                                     * name.
-                                     */
-                                    c += 0x20;
-                                }
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current attribute's name.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the attribute name state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BEFORE_ATTRIBUTE_VALUE:
-                    beforeattributevalueloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the before attribute value state.
-                                 */
-                                continue;
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Switch to the
-                                 * attribute value (double-quoted) state.
-                                 */
-                                // CPPONLY: attributeLine = line;
-                                clearStrBufBeforeUse();
-                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
-                                break beforeattributevalueloop;
-                            // continue stateloop;
-                            case '&':
-                                /*
-                                 * U+0026 AMPERSAND (&) Switch to the attribute
-                                 * value (unquoted) state and reconsume this
-                                 * input character.
-                                 */
-                                // CPPONLY: attributeLine = line;
-                                clearStrBufBeforeUse();
-                                reconsume = true;
-                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
-                                noteUnquotedAttributeValue();
-                                continue stateloop;
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Switch to the attribute
-                                 * value (single-quoted) state.
-                                 */
-                                // CPPONLY: attributeLine = line;
-                                clearStrBufBeforeUse();
-                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Parse error.
-                                 */
-                                errAttributeValueMissing();
-                                /*
-                                 * Emit the current tag token.
-                                 */
-                                addAttributeWithoutValue();
-                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                if (shouldSuspend) {
-                                    break stateloop;
-                                }
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            case '<':
-                            case '=':
-                            case '`':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
-                                 * (=) U+0060 GRAVE ACCENT (`)
-                                 */
-                                errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
-                                /*
-                                 * Treat it as per the "anything else" entry
-                                 * below.
-                                 */
-                            default:
-                                // [NOCPP[
-                                errHtml4NonNameInUnquotedAttribute(c);
-                                // ]NOCPP]
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current attribute's value.
-                                 */
-                                // CPPONLY: attributeLine = line;
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                /*
-                                 * Switch to the attribute value (unquoted)
-                                 * state.
-                                 */
-
-                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
-                                noteUnquotedAttributeValue();
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
-                    attributevaluedoublequotedloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Switch to the after
-                                 * attribute value (quoted) state.
-                                 */
-                                addAttributeWithValue();
-
-                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
-                                break attributevaluedoublequotedloop;
-                            // continue stateloop;
-                            case '&':
-                                /*
-                                 * U+0026 AMPERSAND (&) Switch to the character
-                                 * reference in attribute value state, with the
-                                 * additional allowed character being U+0022
-                                 * QUOTATION MARK (").
-                                 */
-                                assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
-                                appendCharRefBuf(c);
-                                setAdditionalAndRememberAmpersandLocation('\"');
-                                returnState = state;
-                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current attribute's value.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the attribute value (double-quoted)
-                                 * state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case AFTER_ATTRIBUTE_VALUE_QUOTED:
-                    afterattributevaluequotedloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the before attribute name state.
-                                 */
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                continue stateloop;
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Switch to the self-closing
-                                 * start tag state.
-                                 */
-                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
-                                break afterattributevaluequotedloop;
-                            // continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * tag token.
-                                 */
-                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                if (shouldSuspend) {
-                                    break stateloop;
-                                }
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                continue stateloop;
-                            default:
-                                /*
-                                 * Anything else Parse error.
-                                 */
-                                errNoSpaceBetweenAttributes();
-                                /*
-                                 * Reconsume the character in the before
-                                 * attribute name state.
-                                 */
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case SELF_CLOSING_START_TAG:
-                    if (++pos == endPos) {
-                        break stateloop;
-                    }
-                    c = checkChar(buf, pos);
-                    /*
-                     * Consume the next input character:
-                     */
-                    switch (c) {
-                        case '>':
-                            /*
-                             * U+003E GREATER-THAN SIGN (>) Set the self-closing
-                             * flag of the current tag token. Emit the current
-                             * tag token.
-                             */
-                            // [NOCPP[
-                            errHtml4XmlVoidSyntax();
-                            // ]NOCPP]
-                            state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos);
-                            if (shouldSuspend) {
-                                break stateloop;
-                            }
-                            /*
-                             * Switch to the data state.
-                             */
-                            continue stateloop;
-                        default:
-                            /* Anything else Parse error. */
-                            errSlashNotFollowedByGt();
-                            /*
-                             * Reconsume the character in the before attribute
-                             * name state.
-                             */
-                            reconsume = true;
-                            state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                            continue stateloop;
-                    }
-                    // XXX reorder point
-                case ATTRIBUTE_VALUE_UNQUOTED:
-                    for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                addAttributeWithValue();
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the before attribute name state.
-                                 */
-                                addAttributeWithValue();
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                continue stateloop;
-                            case '&':
-                                /*
-                                 * U+0026 AMPERSAND (&) Switch to the character
-                                 * reference in attribute value state, with the
-                                 * additional allowed character being U+003E
-                                 * GREATER-THAN SIGN (>)
-                                 */
-                                assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
-                                appendCharRefBuf(c);
-                                setAdditionalAndRememberAmpersandLocation('>');
-                                returnState = state;
-                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * tag token.
-                                 */
-                                addAttributeWithValue();
-                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                if (shouldSuspend) {
-                                    break stateloop;
-                                }
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            case '<':
-                            case '\"':
-                            case '\'':
-                            case '=':
-                            case '`':
-                                /*
-                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
-                                 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
-                                 * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
-                                 */
-                                errUnquotedAttributeValOrNull(c);
-                                /*
-                                 * Treat it as per the "anything else" entry
-                                 * below.
-                                 */
-                                // fall through
-                            default:
-                                // [NOCPP]
-                                errHtml4NonNameInUnquotedAttribute(c);
-                                // ]NOCPP]
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current attribute's value.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the attribute value (unquoted) state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // XXX reorder point
-                case AFTER_ATTRIBUTE_NAME:
-                    for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the after attribute name state.
-                                 */
-                                continue;
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Switch to the self-closing
-                                 * start tag state.
-                                 */
-                                addAttributeWithoutValue();
-                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
-                                continue stateloop;
-                            case '=':
-                                /*
-                                 * U+003D EQUALS SIGN (=) Switch to the before
-                                 * attribute value state.
-                                 */
-                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * tag token.
-                                 */
-                                addAttributeWithoutValue();
-                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                if (shouldSuspend) {
-                                    break stateloop;
-                                }
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            case '\"':
-                            case '\'':
-                            case '<':
-                                errQuoteOrLtInAttributeNameOrNull(c);
-                                /*
-                                 * Treat it as per the "anything else" entry
-                                 * below.
-                                 */
-                            default:
-                                addAttributeWithoutValue();
-                                /*
-                                 * Anything else Start a new attribute in the
-                                 * current tag token.
-                                 */
-                                if (c >= 'A' && c <= 'Z') {
-                                    /*
-                                     * U+0041 LATIN CAPITAL LETTER A through to
-                                     * U+005A LATIN CAPITAL LETTER Z Set that
-                                     * attribute's name to the lowercase version
-                                     * of the current input character (add
-                                     * 0x0020 to the character's code point)
-                                     */
-                                    c += 0x20;
-                                }
-                                /*
-                                 * Set that attribute's name to the current
-                                 * input character,
-                                 */
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                /*
-                                 * and its value to the empty string.
-                                 */
-                                // Will do later.
-                                /*
-                                 * Switch to the attribute name state.
-                                 */
-                                state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // XXX reorder point
-                case MARKUP_DECLARATION_OPEN:
-                    markupdeclarationopenloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * If the next two characters are both U+002D
-                         * HYPHEN-MINUS characters (-), consume those two
-                         * characters, create a comment token whose data is the
-                         * empty string, and switch to the comment start state.
-                         *
-                         * Otherwise, if the next seven characters are an ASCII
-                         * case-insensitive match for the word "DOCTYPE", then
-                         * consume those characters and switch to the DOCTYPE
-                         * state.
-                         *
-                         * Otherwise, if the insertion mode is
-                         * "in foreign content" and the current node is not an
-                         * element in the HTML namespace and the next seven
-                         * characters are an case-sensitive match for the string
-                         * "[CDATA[" (the five uppercase letters "CDATA" with a
-                         * U+005B LEFT SQUARE BRACKET character before and
-                         * after), then consume those characters and switch to
-                         * the CDATA section state.
-                         *
-                         * Otherwise, is is a parse error. Switch to the bogus
-                         * comment state. The next character that is consumed,
-                         * if any, is the first character that will be in the
-                         * comment.
-                         */
-                        switch (c) {
-                            case '-':
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
-                                break markupdeclarationopenloop;
-                            // continue stateloop;
-                            case 'd':
-                            case 'D':
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                index = 0;
-                                state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
-                                continue stateloop;
-                            case '[':
-                                if (tokenHandler.cdataSectionAllowed()) {
-                                    clearStrBufBeforeUse();
-                                    appendStrBuf(c);
-                                    index = 0;
-                                    state = transition(state, Tokenizer.CDATA_START, reconsume, pos);
-                                    continue stateloop;
-                                }
-                                // else fall through
-                            default:
-                                errBogusComment();
-                                clearStrBufBeforeUse();
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case MARKUP_DECLARATION_HYPHEN:
-                    markupdeclarationhyphenloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        switch (c) {
-                            case '\u0000':
-                                break stateloop;
-                            case '-':
-                                clearStrBufAfterOneHyphen();
-                                state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
-                                break markupdeclarationhyphenloop;
-                            // continue stateloop;
-                            default:
-                                errBogusComment();
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case COMMENT_START:
-                    commentstartloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Comment start state
-                         *
-                         *
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Switch to the comment
-                                 * start dash state.
-                                 */
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Parse error.
-                                 */
-                                errPrematureEndOfComment();
-                                /* Emit the comment token. */
-                                emitComment(0, pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                break commentstartloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the input character to
-                                 * the comment token's data.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Switch to the comment state.
-                                 */
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                break commentstartloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case COMMENT:
-                    commentloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Comment state Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Switch to the comment
-                                 * end dash state
-                                 */
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
-                                break commentloop;
-                            // continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the input character to
-                                 * the comment token's data.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the comment state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case COMMENT_END_DASH:
-                    commentenddashloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Comment end dash state Consume the next input
-                         * character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Switch to the comment
-                                 * end state
-                                 */
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
-                                break commentenddashloop;
-                            // continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append a U+002D HYPHEN-MINUS
-                                 * (-) character and the input character to the
-                                 * comment token's data.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Switch to the comment state.
-                                 */
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case COMMENT_END:
-                    commentendloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Comment end dash state Consume the next input
-                         * character:
-                         */
-                        switch (c) {
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the comment
-                                 * token.
-                                 */
-                                emitComment(2, pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '-':
-                                /* U+002D HYPHEN-MINUS (-) Parse error. */
-                                /*
-                                 * Append a U+002D HYPHEN-MINUS (-) character to
-                                 * the comment token's data.
-                                 */
-                                adjustDoubleHyphenAndAppendToStrBufAndErr(c);
-                                /*
-                                 * Stay in the comment end state.
-                                 */
-                                continue;
-                            case '\r':
-                                adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                adjustDoubleHyphenAndAppendToStrBufLineFeed();
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                continue stateloop;
-                            case '!':
-                                errHyphenHyphenBang();
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Append two U+002D HYPHEN-MINUS (-) characters
-                                 * and the input character to the comment
-                                 * token's data.
-                                 */
-                                adjustDoubleHyphenAndAppendToStrBufAndErr(c);
-                                /*
-                                 * Switch to the comment state.
-                                 */
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // XXX reorder point
-                case COMMENT_END_BANG:
-                    for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Comment end bang state
-                         *
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the comment
-                                 * token.
-                                 */
-                                emitComment(3, pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '-':
-                                /*
-                                 * Append two U+002D HYPHEN-MINUS (-) characters
-                                 * and a U+0021 EXCLAMATION MARK (!) character
-                                 * to the comment token's data.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Switch to the comment end dash state.
-                                 */
-                                state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append two U+002D HYPHEN-MINUS
-                                 * (-) characters, a U+0021 EXCLAMATION MARK (!)
-                                 * character, and the input character to the
-                                 * comment token's data. Switch to the comment
-                                 * state.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Switch to the comment state.
-                                 */
-                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // XXX reorder point
-                case COMMENT_START_DASH:
-                    if (++pos == endPos) {
-                        break stateloop;
-                    }
-                    c = checkChar(buf, pos);
-                    /*
-                     * Comment start dash state
-                     *
-                     * Consume the next input character:
-                     */
-                    switch (c) {
-                        case '-':
-                            /*
-                             * U+002D HYPHEN-MINUS (-) Switch to the comment end
-                             * state
-                             */
-                            appendStrBuf(c);
-                            state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
-                            continue stateloop;
-                        case '>':
-                            errPrematureEndOfComment();
-                            /* Emit the comment token. */
-                            emitComment(1, pos);
-                            /*
-                             * Switch to the data state.
-                             */
-                            state = transition(state, Tokenizer.DATA, reconsume, pos);
-                            continue stateloop;
-                        case '\r':
-                            appendStrBufCarriageReturn();
-                            state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                            break stateloop;
-                        case '\n':
-                            appendStrBufLineFeed();
-                            state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                            continue stateloop;
-                        case '\u0000':
-                            c = '\uFFFD';
-                            // fall thru
-                        default:
-                            /*
-                             * Append a U+002D HYPHEN-MINUS character (-) and
-                             * the current input character to the comment
-                             * token's data.
-                             */
-                            appendStrBuf(c);
-                            /*
-                             * Switch to the comment state.
-                             */
-                            state = transition(state, Tokenizer.COMMENT, reconsume, pos);
-                            continue stateloop;
-                    }
-                    // XXX reorder point
-                case CDATA_START:
-                    for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        if (index < 6) { // CDATA_LSQB.length
-                            if (c == Tokenizer.CDATA_LSQB[index]) {
-                                appendStrBuf(c);
-                            } else {
-                                errBogusComment();
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                            }
-                            index++;
-                            continue;
-                        } else {
-                            clearStrBufAfterUse();
-                            cstart = pos; // start coalescing
-                            reconsume = true;
-                            state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
-                            break; // FALL THROUGH continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case CDATA_SECTION:
-                    cdatasectionloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        switch (c) {
-                            case ']':
-                                flushChars(buf, pos);
-                                state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
-                                break cdatasectionloop; // FALL THROUGH
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            default:
-                                continue;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case CDATA_RSQB:
-                    cdatarsqb: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        switch (c) {
-                            case ']':
-                                state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
-                                break cdatarsqb;
-                            default:
-                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0,
-                                        1);
-                                cstart = pos;
-                                reconsume = true;
-                                state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case CDATA_RSQB_RSQB:
-                    cdatarsqbrsqb: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        switch (c) {
-                            case ']':
-                                // Saw a third ]. Emit one ] (logically the
-                                // first one) and stay in this state to
-                                // remember that the last two characters seen
-                                // have been ]].
-                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
-                                continue;
-                            case '>':
-                                cstart = pos + 1;
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
-                                cstart = pos;
-                                reconsume = true;
-                                state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // XXX reorder point
-                case ATTRIBUTE_VALUE_SINGLE_QUOTED:
-                    attributevaluesinglequotedloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Switch to the after
-                                 * attribute value (quoted) state.
-                                 */
-                                addAttributeWithValue();
-
-                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '&':
-                                /*
-                                 * U+0026 AMPERSAND (&) Switch to the character
-                                 * reference in attribute value state, with the
-                                 * + additional allowed character being U+0027
-                                 * APOSTROPHE (').
-                                 */
-                                assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
-                                appendCharRefBuf(c);
-                                setAdditionalAndRememberAmpersandLocation('\'');
-                                returnState = state;
-                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
-                                break attributevaluesinglequotedloop;
-                            // continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current attribute's value.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the attribute value (double-quoted)
-                                 * state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case CONSUME_CHARACTER_REFERENCE:
-                    if (++pos == endPos) {
-                        break stateloop;
-                    }
-                    c = checkChar(buf, pos);
-                    if (c == '\u0000') {
-                        break stateloop;
-                    }
-                    /*
-                     * Unlike the definition is the spec, this state does not
-                     * return a value and never requires the caller to
-                     * backtrack. This state takes care of emitting characters
-                     * or appending to the current attribute value. It also
-                     * takes care of that in the case when consuming the
-                     * character reference fails.
-                     */
-                    /*
-                     * This section defines how to consume a character
-                     * reference. This definition is used when parsing character
-                     * references in text and in attributes.
-                     *
-                     * The behavior depends on the identity of the next
-                     * character (the one immediately after the U+0026 AMPERSAND
-                     * character):
-                     */
-                    switch (c) {
-                        case ' ':
-                        case '\t':
-                        case '\n':
-                        case '\r': // we'll reconsume!
-                        case '\u000C':
-                        case '<':
-                        case '&':
-                            emitOrAppendCharRefBuf(returnState);
-                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                cstart = pos;
-                            }
-                            reconsume = true;
-                            state = transition(state, returnState, reconsume, pos);
-                            continue stateloop;
-                        case '#':
-                            /*
-                             * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
-                             * SIGN.
-                             */
-                            appendCharRefBuf('#');
-                            state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
-                            continue stateloop;
-                        default:
-                            if (c == additional) {
-                                emitOrAppendCharRefBuf(returnState);
-                                reconsume = true;
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                            }
-                            if (c >= 'a' && c <= 'z') {
-                                firstCharKey = c - 'a' + 26;
-                            } else if (c >= 'A' && c <= 'Z') {
-                                firstCharKey = c - 'A';
-                            } else {
-                                // No match
-                                /*
-                                 * If no match can be made, then this is a parse
-                                 * error.
-                                 */
-                                errNoNamedCharacterMatch();
-                                emitOrAppendCharRefBuf(returnState);
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos;
-                                }
-                                reconsume = true;
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                            }
-                            // Didn't fail yet
-                            appendCharRefBuf(c);
-                            state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
-                            // FALL THROUGH continue stateloop;
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case CHARACTER_REFERENCE_HILO_LOOKUP:
-                    {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        if (c == '\u0000') {
-                            break stateloop;
-                        }
-                        /*
-                         * The data structure is as follows:
-                         *
-                         * HILO_ACCEL is a two-dimensional int array whose major
-                         * index corresponds to the second character of the
-                         * character reference (code point as index) and the
-                         * minor index corresponds to the first character of the
-                         * character reference (packed so that A-Z runs from 0
-                         * to 25 and a-z runs from 26 to 51). This layout makes
-                         * it easier to use the sparseness of the data structure
-                         * to omit parts of it: The second dimension of the
-                         * table is null when no character reference starts with
-                         * the character corresponding to that row.
-                         *
-                         * The int value HILO_ACCEL (by these indeces) is zero
-                         * if there exists no character reference starting with
-                         * that two-letter prefix. Otherwise, the value is an
-                         * int that packs two shorts so that the higher short is
-                         * the index of the highest character reference name
-                         * with that prefix in NAMES and the lower short
-                         * corresponds to the index of the lowest character
-                         * reference name with that prefix. (It happens that the
-                         * first two character reference names share their
-                         * prefix so the packed int cannot be 0 by packing the
-                         * two shorts.)
-                         *
-                         * NAMES is an array of byte arrays where each byte
-                         * array encodes the name of a character references as
-                         * ASCII. The names omit the first two letters of the
-                         * name. (Since storing the first two letters would be
-                         * redundant with the data contained in HILO_ACCEL.) The
-                         * entries are lexically sorted.
-                         *
-                         * For a given index in NAMES, the same index in VALUES
-                         * contains the corresponding expansion as an array of
-                         * two UTF-16 code units (either the character and
-                         * U+0000 or a suggogate pair).
-                         */
-                        int hilo = 0;
-                        if (c <= 'z') {
-                            @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c];
-                            if (row != null) {
-                                hilo = row[firstCharKey];
-                            }
-                        }
-                        if (hilo == 0) {
-                            /*
-                             * If no match can be made, then this is a parse
-                             * error.
-                             */
-                            errNoNamedCharacterMatch();
-                            emitOrAppendCharRefBuf(returnState);
-                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                cstart = pos;
-                            }
-                            reconsume = true;
-                            state = transition(state, returnState, reconsume, pos);
-                            continue stateloop;
-                        }
-                        // Didn't fail yet
-                        appendCharRefBuf(c);
-                        lo = hilo & 0xFFFF;
-                        hi = hilo >> 16;
-                        entCol = -1;
-                        candidate = -1;
-                        charRefBufMark = 0;
-                        state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
-                        // FALL THROUGH continue stateloop;
-                    }
-                case CHARACTER_REFERENCE_TAIL:
-                    outer: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        if (c == '\u0000') {
-                            break stateloop;
-                        }
-                        entCol++;
-                        /*
-                         * Consume the maximum number of characters possible,
-                         * with the consumed characters matching one of the
-                         * identifiers in the first column of the named
-                         * character references table (in a case-sensitive
-                         * manner).
-                         */
-                        loloop: for (;;) {
-                            if (hi < lo) {
-                                break outer;
-                            }
-                            if (entCol == NamedCharacters.NAMES[lo].length()) {
-                                candidate = lo;
-                                charRefBufMark = charRefBufLen;
-                                lo++;
-                            } else if (entCol > NamedCharacters.NAMES[lo].length()) {
-                                break outer;
-                            } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
-                                lo++;
-                            } else {
-                                break loloop;
-                            }
-                        }
-
-                        hiloop: for (;;) {
-                            if (hi < lo) {
-                                break outer;
-                            }
-                            if (entCol == NamedCharacters.NAMES[hi].length()) {
-                                break hiloop;
-                            }
-                            if (entCol > NamedCharacters.NAMES[hi].length()) {
-                                break outer;
-                            } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
-                                hi--;
-                            } else {
-                                break hiloop;
-                            }
-                        }
-
-                        if (c == ';') {
-                            // If we see a semicolon, there cannot be a
-                            // longer match. Break the loop. However, before
-                            // breaking, take the longest match so far as the
-                            // candidate, if we are just about to complete a
-                            // match.
-                            if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
-                                candidate = lo;
-                                charRefBufMark = charRefBufLen;
-                            }
-                            break outer;
-                        }
-
-                        if (hi < lo) {
-                            break outer;
-                        }
-                        appendCharRefBuf(c);
-                        continue;
-                    }
-
-                    if (candidate == -1) {
-                        // reconsume deals with CR, LF or nul
-                        /*
-                         * If no match can be made, then this is a parse error.
-                         */
-                        errNoNamedCharacterMatch();
-                        emitOrAppendCharRefBuf(returnState);
-                        if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                            cstart = pos;
-                        }
-                        reconsume = true;
-                        state = transition(state, returnState, reconsume, pos);
-                        continue stateloop;
-                    } else {
-                        // c can't be CR, LF or nul if we got here
-                        @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
-                        if (candidateName.length() == 0
-                                || candidateName.charAt(candidateName.length() - 1) != ';') {
-                            /*
-                             * If the last character matched is not a U+003B
-                             * SEMICOLON (;), there is a parse error.
-                             */
-                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                /*
-                                 * If the entity is being consumed as part of an
-                                 * attribute, and the last character matched is
-                                 * not a U+003B SEMICOLON (;),
-                                 */
-                                char ch;
-                                if (charRefBufMark == charRefBufLen) {
-                                    ch = c;
-                                } else {
-                                    ch = charRefBuf[charRefBufMark];
-                                }
-                                if (ch == '=' || (ch >= '0' && ch <= '9')
-                                        || (ch >= 'A' && ch <= 'Z')
-                                        || (ch >= 'a' && ch <= 'z')) {
-                                    /*
-                                     * and the next character is either a U+003D
-                                     * EQUALS SIGN character (=) or in the range
-                                     * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
-                                     * U+0041 LATIN CAPITAL LETTER A to U+005A
-                                     * LATIN CAPITAL LETTER Z, or U+0061 LATIN
-                                     * SMALL LETTER A to U+007A LATIN SMALL
-                                     * LETTER Z, then, for historical reasons,
-                                     * all the characters that were matched
-                                     * after the U+0026 AMPERSAND (&) must be
-                                     * unconsumed, and nothing is returned.
-                                     */
-                                    errNoNamedCharacterMatch();
-                                    appendCharRefBufToStrBuf();
-                                    reconsume = true;
-                                    state = transition(state, returnState, reconsume, pos);
-                                    continue stateloop;
-                                }
-                            }
-                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                errUnescapedAmpersandInterpretedAsCharacterReference();
-                            } else {
-                                errNotSemicolonTerminated();
-                            }
-                        }
-
-                        /*
-                         * Otherwise, return a character token for the character
-                         * corresponding to the entity name (as given by the
-                         * second column of the named character references
-                         * table).
-                         */
-                        // CPPONLY: completedNamedCharacterReference();
-                        @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
-                        if (
-                        // [NOCPP[
-                        val.length == 1
-                        // ]NOCPP]
-                        // CPPONLY: val[1] == 0
-                        ) {
-                            emitOrAppendOne(val, returnState);
-                        } else {
-                            emitOrAppendTwo(val, returnState);
-                        }
-                        // this is so complicated!
-                        if (charRefBufMark < charRefBufLen) {
-                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                appendStrBuf(charRefBuf, charRefBufMark,
-                                        charRefBufLen - charRefBufMark);
-                            } else {
-                                tokenHandler.characters(charRefBuf, charRefBufMark,
-                                        charRefBufLen - charRefBufMark);
-                            }
-                        }
-                        // charRefBufLen will be zeroed below!
-
-                        // Check if we broke out early with c being the last
-                        // character that matched as opposed to being the
-                        // first one that didn't match. In the case of an
-                        // early break, the next run on text should start
-                        // *after* the current character and the current
-                        // character shouldn't be reconsumed.
-                        boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
-                        charRefBufLen = 0;
-                        if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                            cstart = earlyBreak ? pos + 1 : pos;
-                        }
-                        reconsume = !earlyBreak;
-                        state = transition(state, returnState, reconsume, pos);
-                        continue stateloop;
-                        /*
-                         * If the markup contains I'm &notit; I tell you, the
-                         * entity is parsed as "not", as in, I'm ¬it; I tell
-                         * you. But if the markup was I'm &notin; I tell you,
-                         * the entity would be parsed as "notin;", resulting in
-                         * I'm ∉ I tell you.
-                         */
-                    }
-                    // XXX reorder point
-                case CONSUME_NCR:
-                    if (++pos == endPos) {
-                        break stateloop;
-                    }
-                    c = checkChar(buf, pos);
-                    value = 0;
-                    seenDigits = false;
-                    /*
-                     * The behavior further depends on the character after the
-                     * U+0023 NUMBER SIGN:
-                     */
-                    switch (c) {
-                        case 'x':
-                        case 'X':
-
-                            /*
-                             * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
-                             * LETTER X Consume the X.
-                             *
-                             * Follow the steps below, but using the range of
-                             * characters U+0030 DIGIT ZERO through to U+0039
-                             * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
-                             * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
-                             * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
-                             * LETTER F (in other words, 0-9, A-F, a-f).
-                             *
-                             * When it comes to interpreting the number,
-                             * interpret it as a hexadecimal number.
-                             */
-                            appendCharRefBuf(c);
-                            state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
-                            continue stateloop;
-                        default:
-                            /*
-                             * Anything else Follow the steps below, but using
-                             * the range of characters U+0030 DIGIT ZERO through
-                             * to U+0039 DIGIT NINE (i.e. just 0-9).
-                             *
-                             * When it comes to interpreting the number,
-                             * interpret it as a decimal number.
-                             */
-                            reconsume = true;
-                            state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
-                            // FALL THROUGH continue stateloop;
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case DECIMAL_NRC_LOOP:
-                    decimalloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume as many characters as match the range of
-                         * characters given above.
-                         */
-                        assert value >= 0: "value must not become negative.";
-                        if (c >= '0' && c <= '9') {
-                            seenDigits = true;
-                            // Avoid overflow
-                            if (value <= 0x10FFFF) {
-                                value *= 10;
-                                value += c - '0';
-                            }
-                            continue;
-                        } else if (c == ';') {
-                            if (seenDigits) {
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos + 1;
-                                }
-                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
-                                // FALL THROUGH continue stateloop;
-                                break decimalloop;
-                            } else {
-                                errNoDigitsInNCR();
-                                appendCharRefBuf(';');
-                                emitOrAppendCharRefBuf(returnState);
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos + 1;
-                                }
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                            }
-                        } else {
-                            /*
-                             * If no characters match the range, then don't
-                             * consume any characters (and unconsume the U+0023
-                             * NUMBER SIGN character and, if appropriate, the X
-                             * character). This is a parse error; nothing is
-                             * returned.
-                             *
-                             * Otherwise, if the next character is a U+003B
-                             * SEMICOLON, consume that too. If it isn't, there
-                             * is a parse error.
-                             */
-                            if (!seenDigits) {
-                                errNoDigitsInNCR();
-                                emitOrAppendCharRefBuf(returnState);
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos;
-                                }
-                                reconsume = true;
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                            } else {
-                                errCharRefLacksSemicolon();
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos;
-                                }
-                                reconsume = true;
-                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
-                                // FALL THROUGH continue stateloop;
-                                break decimalloop;
-                            }
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case HANDLE_NCR_VALUE:
-                    // WARNING previous state sets reconsume
-                    // We are not going to emit the contents of charRefBuf.
-                    charRefBufLen = 0;
-                    // XXX inline this case if the method size can take it
-                    handleNcrValue(returnState);
-                    state = transition(state, returnState, reconsume, pos);
-                    continue stateloop;
-                    // XXX reorder point
-                case HEX_NCR_LOOP:
-                    for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume as many characters as match the range of
-                         * characters given above.
-                         */
-                        assert value >= 0: "value must not become negative.";
-                        if (c >= '0' && c <= '9') {
-                            seenDigits = true;
-                            // Avoid overflow
-                            if (value <= 0x10FFFF) {
-                                value *= 16;
-                                value += c - '0';
-                            }
-                            continue;
-                        } else if (c >= 'A' && c <= 'F') {
-                            seenDigits = true;
-                            // Avoid overflow
-                            if (value <= 0x10FFFF) {
-                                value *= 16;
-                                value += c - 'A' + 10;
-                            }
-                            continue;
-                        } else if (c >= 'a' && c <= 'f') {
-                            seenDigits = true;
-                            // Avoid overflow
-                            if (value <= 0x10FFFF) {
-                                value *= 16;
-                                value += c - 'a' + 10;
-                            }
-                            continue;
-                        } else if (c == ';') {
-                            if (seenDigits) {
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos + 1;
-                                }
-                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
-                                continue stateloop;
-                            } else {
-                                errNoDigitsInNCR();
-                                appendCharRefBuf(';');
-                                emitOrAppendCharRefBuf(returnState);
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos + 1;
-                                }
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                            }
-                        } else {
-                            /*
-                             * If no characters match the range, then don't
-                             * consume any characters (and unconsume the U+0023
-                             * NUMBER SIGN character and, if appropriate, the X
-                             * character). This is a parse error; nothing is
-                             * returned.
-                             *
-                             * Otherwise, if the next character is a U+003B
-                             * SEMICOLON, consume that too. If it isn't, there
-                             * is a parse error.
-                             */
-                            if (!seenDigits) {
-                                errNoDigitsInNCR();
-                                emitOrAppendCharRefBuf(returnState);
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos;
-                                }
-                                reconsume = true;
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                            } else {
-                                errCharRefLacksSemicolon();
-                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
-                                    cstart = pos;
-                                }
-                                reconsume = true;
-                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
-                                continue stateloop;
-                            }
-                        }
-                    }
-                    // XXX reorder point
-                case PLAINTEXT:
-                    plaintextloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        switch (c) {
-                            case '\u0000':
-                                emitPlaintextReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Stay in the
-                                 * RAWTEXT state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // XXX reorder point
-                case CLOSE_TAG_OPEN:
-                    if (++pos == endPos) {
-                        break stateloop;
-                    }
-                    c = checkChar(buf, pos);
-                    /*
-                     * Otherwise, if the content model flag is set to the PCDATA
-                     * state, or if the next few characters do match that tag
-                     * name, consume the next input character:
-                     */
-                    switch (c) {
-                        case '>':
-                            /* U+003E GREATER-THAN SIGN (>) Parse error. */
-                            errLtSlashGt();
-                            /*
-                             * Switch to the data state.
-                             */
-                            cstart = pos + 1;
-                            state = transition(state, Tokenizer.DATA, reconsume, pos);
-                            continue stateloop;
-                        case '\r':
-                            silentCarriageReturn();
-                            /* Anything else Parse error. */
-                            errGarbageAfterLtSlash();
-                            /*
-                             * Switch to the bogus comment state.
-                             */
-                            clearStrBufBeforeUse();
-                            appendStrBuf('\n');
-                            state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                            break stateloop;
-                        case '\n':
-                            silentLineFeed();
-                            /* Anything else Parse error. */
-                            errGarbageAfterLtSlash();
-                            /*
-                             * Switch to the bogus comment state.
-                             */
-                            clearStrBufBeforeUse();
-                            appendStrBuf(c);
-                            state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                            continue stateloop;
-                        case '\u0000':
-                            c = '\uFFFD';
-                            // fall thru
-                        default:
-                            if (c >= 'A' && c <= 'Z') {
-                                c += 0x20;
-                            }
-                            if (c >= 'a' && c <= 'z') {
-                                /*
-                                 * U+0061 LATIN SMALL LETTER A through to U+007A
-                                 * LATIN SMALL LETTER Z Create a new end tag
-                                 * token,
-                                 */
-                                endTag = true;
-                                /*
-                                 * set its tag name to the input character,
-                                 */
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                /*
-                                 * then switch to the tag name state. (Don't
-                                 * emit the token yet; further details will be
-                                 * filled in before it is emitted.)
-                                 */
-                                state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
-                                continue stateloop;
-                            } else {
-                                /* Anything else Parse error. */
-                                errGarbageAfterLtSlash();
-                                /*
-                                 * Switch to the bogus comment state.
-                                 */
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                            }
-                    }
-                    // XXX reorder point
-                case RCDATA:
-                    rcdataloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        switch (c) {
-                            case '&':
-                                /*
-                                 * U+0026 AMPERSAND (&) Switch to the character
-                                 * reference in RCDATA state.
-                                 */
-                                flushChars(buf, pos);
-                                assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
-                                appendCharRefBuf(c);
-                                setAdditionalAndRememberAmpersandLocation('\u0000');
-                                returnState = state;
-                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
-                                continue stateloop;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Switch to the
-                                 * RCDATA less-than sign state.
-                                 */
-                                flushChars(buf, pos);
-
-                                returnState = state;
-                                state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Emit the current input character as a
-                                 * character token. Stay in the RCDATA state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // XXX reorder point
-                case RAWTEXT:
-                    rawtextloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        switch (c) {
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Switch to the
-                                 * RAWTEXT less-than sign state.
-                                 */
-                                flushChars(buf, pos);
-
-                                returnState = state;
-                                state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
-                                break rawtextloop;
-                            // FALL THRU continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Emit the current input character as a
-                                 * character token. Stay in the RAWTEXT state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // XXX fallthru don't reorder
-                case RAWTEXT_RCDATA_LESS_THAN_SIGN:
-                    rawtextrcdatalessthansignloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        switch (c) {
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Set the temporary buffer
-                                 * to the empty string. Switch to the script
-                                 * data end tag open state.
-                                 */
-                                index = 0;
-                                clearStrBufBeforeUse();
-                                state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
-                                break rawtextrcdatalessthansignloop;
-                            // FALL THRU continue stateloop;
-                            default:
-                                /*
-                                 * Otherwise, emit a U+003C LESS-THAN SIGN
-                                 * character token
-                                 */
-                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                                /*
-                                 * and reconsume the current input character in
-                                 * the data state.
-                                 */
-                                cstart = pos;
-                                reconsume = true;
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // XXX fall thru. don't reorder.
-                case NON_DATA_END_TAG_NAME:
-                    for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * ASSERT! when entering this state, set index to 0 and
-                         * call clearStrBufBeforeUse(); Let's implement the above
-                         * without lookahead. strBuf is the 'temporary buffer'.
-                         */
-                        if (endTagExpectationAsArray == null) {
-                            tokenHandler.characters(Tokenizer.LT_SOLIDUS,
-                                    0, 2);
-                            cstart = pos;
-                            reconsume = true;
-                            state = transition(state, returnState, reconsume, pos);
-                            continue stateloop;
-                        } else if (index < endTagExpectationAsArray.length) {
-                            char e = endTagExpectationAsArray[index];
-                            char folded = c;
-                            if (c >= 'A' && c <= 'Z') {
-                                folded += 0x20;
-                            }
-                            if (folded != e) {
-                                // [NOCPP[
-                                errHtml4LtSlashInRcdata(folded);
-                                // ]NOCPP]
-                                tokenHandler.characters(Tokenizer.LT_SOLIDUS,
-                                        0, 2);
-                                emitStrBuf();
-                                cstart = pos;
-                                reconsume = true;
-                                state = transition(state, returnState, reconsume, pos);
-                                continue stateloop;
-                            }
-                            appendStrBuf(c);
-                            index++;
-                            continue;
-                        } else {
-                            endTag = true;
-                            // XXX replace contentModelElement with different
-                            // type
-                            tagName = endTagExpectation;
-                            switch (c) {
-                                case '\r':
-                                    silentCarriageReturn();
-                                    clearStrBufAfterUse(); // strBuf not used
-                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                    break stateloop;
-                                case '\n':
-                                    silentLineFeed();
-                                    // fall thru
-                                case ' ':
-                                case '\t':
-                                case '\u000C':
-                                    /*
-                                     * U+0009 CHARACTER TABULATION U+000A LINE
-                                     * FEED (LF) U+000C FORM FEED (FF) U+0020
-                                     * SPACE If the current end tag token is an
-                                     * appropriate end tag token, then switch to
-                                     * the before attribute name state.
-                                     */
-                                    clearStrBufAfterUse(); // strBuf not used
-                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
-                                    continue stateloop;
-                                case '/':
-                                    /*
-                                     * U+002F SOLIDUS (/) If the current end tag
-                                     * token is an appropriate end tag token,
-                                     * then switch to the self-closing start tag
-                                     * state.
-                                     */
-                                    clearStrBufAfterUse(); // strBuf not used
-                                    state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
-                                    continue stateloop;
-                                case '>':
-                                    /*
-                                     * U+003E GREATER-THAN SIGN (>) If the
-                                     * current end tag token is an appropriate
-                                     * end tag token, then emit the current tag
-                                     * token and switch to the data state.
-                                     */
-                                    clearStrBufAfterUse(); // strBuf not used
-                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
-                                    if (shouldSuspend) {
-                                        break stateloop;
-                                    }
-                                    continue stateloop;
-                                default:
-                                    /*
-                                     * Emit a U+003C LESS-THAN SIGN character
-                                     * token, a U+002F SOLIDUS character token,
-                                     * a character token for each of the
-                                     * characters in the temporary buffer (in
-                                     * the order they were added to the buffer),
-                                     * and reconsume the current input character
-                                     * in the RAWTEXT state.
-                                     */
-                                    // [NOCPP[
-                                    errWarnLtSlashInRcdata();
-                                    // ]NOCPP]
-                                    tokenHandler.characters(
-                                            Tokenizer.LT_SOLIDUS, 0, 2);
-                                    emitStrBuf();
-                                    cstart = pos; // don't drop the
-                                                  // character
-                                    reconsume = true;
-                                    state = transition(state, returnState, reconsume, pos);
-                                    continue stateloop;
-                            }
-                        }
-                    }
-                    // XXX reorder point
-                    // BEGIN HOTSPOT WORKAROUND
-                case BOGUS_COMMENT:
-                    boguscommentloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume every character up to and including the first
-                         * U+003E GREATER-THAN SIGN character (>) or the end of
-                         * the file (EOF), whichever comes first. Emit a comment
-                         * token whose data is the concatenation of all the
-                         * characters starting from and including the character
-                         * that caused the state machine to switch into the
-                         * bogus comment state, up to and including the
-                         * character immediately before the last consumed
-                         * character (i.e. up to the character just before the
-                         * U+003E or EOF character). (If the comment was started
-                         * by the end of the file (EOF), the token is empty.)
-                         *
-                         * Switch to the data state.
-                         *
-                         * If the end of the file was reached, reconsume the EOF
-                         * character.
-                         */
-                        switch (c) {
-                            case '>':
-                                emitComment(0, pos);
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '-':
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
-                                break boguscommentloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                appendStrBuf(c);
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BOGUS_COMMENT_HYPHEN:
-                    boguscommenthyphenloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        switch (c) {
-                            case '>':
-                                // [NOCPP[
-                                maybeAppendSpaceToBogusComment();
-                                // ]NOCPP]
-                                emitComment(0, pos);
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '-':
-                                appendSecondHyphenToBogusComment();
-                                continue boguscommenthyphenloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                appendStrBuf(c);
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // XXX reorder point
-                case SCRIPT_DATA:
-                    scriptdataloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        switch (c) {
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Switch to the
-                                 * script data less-than sign state.
-                                 */
-                                flushChars(buf, pos);
-                                returnState = state;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
-                                break scriptdataloop; // FALL THRU continue
-                            // stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Stay in the
-                                 * script data state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_LESS_THAN_SIGN:
-                    scriptdatalessthansignloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        switch (c) {
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Set the temporary buffer
-                                 * to the empty string. Switch to the script
-                                 * data end tag open state.
-                                 */
-                                index = 0;
-                                clearStrBufBeforeUse();
-                                state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
-                                continue stateloop;
-                            case '!':
-                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                                cstart = pos;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
-                                break scriptdatalessthansignloop; // FALL THRU
-                            // continue
-                            // stateloop;
-                            default:
-                                /*
-                                 * Otherwise, emit a U+003C LESS-THAN SIGN
-                                 * character token
-                                 */
-                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                                /*
-                                 * and reconsume the current input character in
-                                 * the data state.
-                                 */
-                                cstart = pos;
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_ESCAPE_START:
-                    scriptdataescapestartloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Switch to the
-                                 * script data escape start dash state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
-                                break scriptdataescapestartloop; // FALL THRU
-                            // continue
-                            // stateloop;
-                            default:
-                                /*
-                                 * Anything else Reconsume the current input
-                                 * character in the script data state.
-                                 */
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_ESCAPE_START_DASH:
-                    scriptdataescapestartdashloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Switch to the
-                                 * script data escaped dash dash state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
-                                break scriptdataescapestartdashloop;
-                            // continue stateloop;
-                            default:
-                                /*
-                                 * Anything else Reconsume the current input
-                                 * character in the script data state.
-                                 */
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_ESCAPED_DASH_DASH:
-                    scriptdataescapeddashdashloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Stay in the
-                                 * script data escaped dash dash state.
-                                 */
-                                continue;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Switch to the
-                                 * script data escaped less-than sign state.
-                                 */
-                                flushChars(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
-                                 * GREATER-THAN SIGN character token. Switch to
-                                 * the script data state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                break scriptdataescapeddashdashloop;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Switch to the
-                                 * script data escaped state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                break scriptdataescapeddashdashloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_ESCAPED:
-                    scriptdataescapedloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Switch to the
-                                 * script data escaped dash state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
-                                break scriptdataescapedloop; // FALL THRU
-                            // continue
-                            // stateloop;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Switch to the
-                                 * script data escaped less-than sign state.
-                                 */
-                                flushChars(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Stay in the
-                                 * script data escaped state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_ESCAPED_DASH:
-                    scriptdataescapeddashloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Switch to the
-                                 * script data escaped dash dash state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
-                                continue stateloop;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Switch to the
-                                 * script data escaped less-than sign state.
-                                 */
-                                flushChars(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
-                                break scriptdataescapeddashloop;
-                            // continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Switch to the
-                                 * script data escaped state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
-                    scriptdataescapedlessthanloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Set the temporary buffer
-                                 * to the empty string. Switch to the script
-                                 * data escaped end tag open state.
-                                 */
-                                index = 0;
-                                clearStrBufBeforeUse();
-                                returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
-                                state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
-                                continue stateloop;
-                            case 'S':
-                            case 's':
-                                /*
-                                 * U+0041 LATIN CAPITAL LETTER A through to
-                                 * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
-                                 * LESS-THAN SIGN character token and the
-                                 * current input character as a character token.
-                                 */
-                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                                cstart = pos;
-                                index = 1;
-                                /*
-                                 * Set the temporary buffer to the empty string.
-                                 * Append the lowercase version of the current
-                                 * input character (add 0x0020 to the
-                                 * character's code point) to the temporary
-                                 * buffer. Switch to the script data double
-                                 * escape start state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
-                                break scriptdataescapedlessthanloop;
-                            // continue stateloop;
-                            default:
-                                /*
-                                 * Anything else Emit a U+003C LESS-THAN SIGN
-                                 * character token and reconsume the current
-                                 * input character in the script data escaped
-                                 * state.
-                                 */
-                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                                cstart = pos;
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_DOUBLE_ESCAPE_START:
-                    scriptdatadoubleescapestartloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        assert index > 0;
-                        if (index < 6) { // SCRIPT_ARR.length
-                            char folded = c;
-                            if (c >= 'A' && c <= 'Z') {
-                                folded += 0x20;
-                            }
-                            if (folded != Tokenizer.SCRIPT_ARR[index]) {
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                            }
-                            index++;
-                            continue;
-                        }
-                        switch (c) {
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                            case '/':
-                            case '>':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
-                                 * (>) Emit the current input character as a
-                                 * character token. If the temporary buffer is
-                                 * the string "script", then switch to the
-                                 * script data double escaped state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                break scriptdatadoubleescapestartloop;
-                            // continue stateloop;
-                            default:
-                                /*
-                                 * Anything else Reconsume the current input
-                                 * character in the script data escaped state.
-                                 */
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_DOUBLE_ESCAPED:
-                    scriptdatadoubleescapedloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Switch to the
-                                 * script data double escaped dash state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
-                                break scriptdatadoubleescapedloop; // FALL THRU
-                            // continue
-                            // stateloop;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Emit a U+003C
-                                 * LESS-THAN SIGN character token. Switch to the
-                                 * script data double escaped less-than sign
-                                 * state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                continue;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Stay in the
-                                 * script data double escaped state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
-                    scriptdatadoubleescapeddashloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Switch to the
-                                 * script data double escaped dash dash state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
-                                break scriptdatadoubleescapeddashloop;
-                            // continue stateloop;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Emit a U+003C
-                                 * LESS-THAN SIGN character token. Switch to the
-                                 * script data double escaped less-than sign
-                                 * state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Switch to the
-                                 * script data double escaped state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
-                    scriptdatadoubleescapeddashdashloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '-':
-                                /*
-                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
-                                 * HYPHEN-MINUS character token. Stay in the
-                                 * script data double escaped dash dash state.
-                                 */
-                                continue;
-                            case '<':
-                                /*
-                                 * U+003C LESS-THAN SIGN (<) Emit a U+003C
-                                 * LESS-THAN SIGN character token. Switch to the
-                                 * script data double escaped less-than sign
-                                 * state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
-                                break scriptdatadoubleescapeddashdashloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
-                                 * GREATER-THAN SIGN character token. Switch to
-                                 * the script data state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                emitReplacementCharacter(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            default:
-                                /*
-                                 * Anything else Emit the current input
-                                 * character as a character token. Switch to the
-                                 * script data double escaped state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
-                    scriptdatadoubleescapedlessthanloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '/':
-                                /*
-                                 * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
-                                 * character token. Set the temporary buffer to
-                                 * the empty string. Switch to the script data
-                                 * double escape end state.
-                                 */
-                                index = 0;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
-                                break scriptdatadoubleescapedlessthanloop;
-                            default:
-                                /*
-                                 * Anything else Reconsume the current input
-                                 * character in the script data double escaped
-                                 * state.
-                                 */
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
-                case SCRIPT_DATA_DOUBLE_ESCAPE_END:
-                    scriptdatadoubleescapeendloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        if (index < 6) { // SCRIPT_ARR.length
-                            char folded = c;
-                            if (c >= 'A' && c <= 'Z') {
-                                folded += 0x20;
-                            }
-                            if (folded != Tokenizer.SCRIPT_ARR[index]) {
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                            }
-                            index++;
-                            continue;
-                        }
-                        switch (c) {
-                            case '\r':
-                                emitCarriageReturn(buf, pos);
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                            case '/':
-                            case '>':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
-                                 * (>) Emit the current input character as a
-                                 * character token. If the temporary buffer is
-                                 * the string "script", then switch to the
-                                 * script data escaped state.
-                                 */
-                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                /*
-                                 * Reconsume the current input character in the
-                                 * script data double escaped state.
-                                 */
-                                reconsume = true;
-                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // XXX reorder point
-                case MARKUP_DECLARATION_OCTYPE:
-                    markupdeclarationdoctypeloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        if (index < 6) { // OCTYPE.length
-                            char folded = c;
-                            if (c >= 'A' && c <= 'Z') {
-                                folded += 0x20;
-                            }
-                            if (folded == Tokenizer.OCTYPE[index]) {
-                                appendStrBuf(c);
-                            } else {
-                                errBogusComment();
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
-                                continue stateloop;
-                            }
-                            index++;
-                            continue;
-                        } else {
-                            reconsume = true;
-                            state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
-                            break markupdeclarationdoctypeloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case DOCTYPE:
-                    doctypeloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        initDoctypeFields();
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the before DOCTYPE name state.
-                                 */
-                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
-                                break doctypeloop;
-                            // continue stateloop;
-                            default:
-                                /*
-                                 * Anything else Parse error.
-                                 */
-                                errMissingSpaceBeforeDoctypeName();
-                                /*
-                                 * Reconsume the current character in the before
-                                 * DOCTYPE name state.
-                                 */
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
-                                break doctypeloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BEFORE_DOCTYPE_NAME:
-                    beforedoctypenameloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the before DOCTYPE name state.
-                                 */
-                                continue;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Parse error.
-                                 */
-                                errNamelessDoctype();
-                                /*
-                                 * Create a new DOCTYPE token. Set its
-                                 * force-quirks flag to on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit the token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                if (c >= 'A' && c <= 'Z') {
-                                    /*
-                                     * U+0041 LATIN CAPITAL LETTER A through to
-                                     * U+005A LATIN CAPITAL LETTER Z Create a
-                                     * new DOCTYPE token. Set the token's name
-                                     * to the lowercase version of the input
-                                     * character (add 0x0020 to the character's
-                                     * code point).
-                                     */
-                                    c += 0x20;
-                                }
-                                /* Anything else Create a new DOCTYPE token. */
-                                /*
-                                 * Set the token's name name to the current
-                                 * input character.
-                                 */
-                                clearStrBufBeforeUse();
-                                appendStrBuf(c);
-                                /*
-                                 * Switch to the DOCTYPE name state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
-                                break beforedoctypenameloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case DOCTYPE_NAME:
-                    doctypenameloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                strBufToDoctypeName();
-                                state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the after DOCTYPE name state.
-                                 */
-                                strBufToDoctypeName();
-                                state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
-                                break doctypenameloop;
-                            // continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * DOCTYPE token.
-                                 */
-                                strBufToDoctypeName();
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * U+0041 LATIN CAPITAL LETTER A through to
-                                 * U+005A LATIN CAPITAL LETTER Z Append the
-                                 * lowercase version of the input character (add
-                                 * 0x0020 to the character's code point) to the
-                                 * current DOCTYPE token's name.
-                                 */
-                                if (c >= 'A' && c <= 'Z') {
-                                    c += 0x0020;
-                                }
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current DOCTYPE token's
-                                 * name.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the DOCTYPE name state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case AFTER_DOCTYPE_NAME:
-                    afterdoctypenameloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the after DOCTYPE name state.
-                                 */
-                                continue;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case 'p':
-                            case 'P':
-                                index = 0;
-                                state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
-                                break afterdoctypenameloop;
-                            // continue stateloop;
-                            case 's':
-                            case 'S':
-                                index = 0;
-                                state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                /*
-                                 * Otherwise, this is the parse error.
-                                 */
-                                bogusDoctype();
-
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                // done by bogusDoctype();
-                                /*
-                                 * Switch to the bogus DOCTYPE state.
-                                 */
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case DOCTYPE_UBLIC:
-                    doctypeublicloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * If the six characters starting from the current input
-                         * character are an ASCII case-insensitive match for the
-                         * word "PUBLIC", then consume those characters and
-                         * switch to the before DOCTYPE public identifier state.
-                         */
-                        if (index < 5) { // UBLIC.length
-                            char folded = c;
-                            if (c >= 'A' && c <= 'Z') {
-                                folded += 0x20;
-                            }
-                            if (folded != Tokenizer.UBLIC[index]) {
-                                bogusDoctype();
-                                // forceQuirks = true;
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                            }
-                            index++;
-                            continue;
-                        } else {
-                            reconsume = true;
-                            state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
-                            break doctypeublicloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case AFTER_DOCTYPE_PUBLIC_KEYWORD:
-                    afterdoctypepublickeywordloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the before DOCTYPE public
-                                 * identifier state.
-                                 */
-                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
-                                break afterdoctypepublickeywordloop;
-                            // FALL THROUGH continue stateloop
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Parse Error.
-                                 */
-                                errNoSpaceBetweenDoctypePublicKeywordAndQuote();
-                                /*
-                                 * Set the DOCTYPE token's public identifier to
-                                 * the empty string (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE public identifier
-                                 * (double-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Parse Error.
-                                 */
-                                errNoSpaceBetweenDoctypePublicKeywordAndQuote();
-                                /*
-                                 * Set the DOCTYPE token's public identifier to
-                                 * the empty string (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE public identifier
-                                 * (single-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
-                                errExpectedPublicId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                bogusDoctype();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                // done by bogusDoctype();
-                                /*
-                                 * Switch to the bogus DOCTYPE state.
-                                 */
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
-                    beforedoctypepublicidentifierloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the before DOCTYPE public identifier
-                                 * state.
-                                 */
-                                continue;
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Set the DOCTYPE
-                                 * token's public identifier to the empty string
-                                 * (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE public identifier
-                                 * (double-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
-                                break beforedoctypepublicidentifierloop;
-                            // continue stateloop;
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
-                                 * public identifier to the empty string (not
-                                 * missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE public identifier
-                                 * (single-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
-                                errExpectedPublicId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                bogusDoctype();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                // done by bogusDoctype();
-                                /*
-                                 * Switch to the bogus DOCTYPE state.
-                                 */
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
-                    doctypepublicidentifierdoublequotedloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Switch to the after
-                                 * DOCTYPE public identifier state.
-                                 */
-                                publicIdentifier = strBufToString();
-                                state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
-                                break doctypepublicidentifierdoublequotedloop;
-                            // continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Parse error.
-                                 */
-                                errGtInPublicId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                publicIdentifier = strBufToString();
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current DOCTYPE token's
-                                 * public identifier.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the DOCTYPE public identifier
-                                 * (double-quoted) state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
-                    afterdoctypepublicidentifierloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the between DOCTYPE public and
-                                 * system identifiers state.
-                                 */
-                                state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
-                                break afterdoctypepublicidentifierloop;
-                            // continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Parse error.
-                                 */
-                                errNoSpaceBetweenPublicAndSystemIds();
-                                /*
-                                 * Set the DOCTYPE token's system identifier to
-                                 * the empty string (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE system identifier
-                                 * (double-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Parse error.
-                                 */
-                                errNoSpaceBetweenPublicAndSystemIds();
-                                /*
-                                 * Set the DOCTYPE token's system identifier to
-                                 * the empty string (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE system identifier
-                                 * (single-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                bogusDoctype();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                // done by bogusDoctype();
-                                /*
-                                 * Switch to the bogus DOCTYPE state.
-                                 */
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
-                    betweendoctypepublicandsystemidentifiersloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the between DOCTYPE public and system
-                                 * identifiers state.
-                                 */
-                                continue;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Set the DOCTYPE
-                                 * token's system identifier to the empty string
-                                 * (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE system identifier
-                                 * (double-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
-                                break betweendoctypepublicandsystemidentifiersloop;
-                            // continue stateloop;
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
-                                 * system identifier to the empty string (not
-                                 * missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE system identifier
-                                 * (single-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                bogusDoctype();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                // done by bogusDoctype();
-                                /*
-                                 * Switch to the bogus DOCTYPE state.
-                                 */
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
-                    doctypesystemidentifierdoublequotedloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Switch to the after
-                                 * DOCTYPE system identifier state.
-                                 */
-                                systemIdentifier = strBufToString();
-                                state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Parse error.
-                                 */
-                                errGtInSystemId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                systemIdentifier = strBufToString();
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current DOCTYPE token's
-                                 * system identifier.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the DOCTYPE system identifier
-                                 * (double-quoted) state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
-                    afterdoctypesystemidentifierloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the after DOCTYPE system identifier state.
-                                 */
-                                continue;
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit the current
-                                 * DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                /*
-                                 * Switch to the bogus DOCTYPE state. (This does
-                                 * not set the DOCTYPE token's force-quirks flag
-                                 * to on.)
-                                 */
-                                bogusDoctypeWithoutQuirks();
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                break afterdoctypesystemidentifierloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BOGUS_DOCTYPE:
-                    for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '>':
-                                /*
-                                 * U+003E GREATER-THAN SIGN (>) Emit that
-                                 * DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Stay in the bogus DOCTYPE
-                                 * state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // XXX reorder point
-                case DOCTYPE_YSTEM:
-                    doctypeystemloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Otherwise, if the six characters starting from the
-                         * current input character are an ASCII case-insensitive
-                         * match for the word "SYSTEM", then consume those
-                         * characters and switch to the before DOCTYPE system
-                         * identifier state.
-                         */
-                        if (index < 5) { // YSTEM.length
-                            char folded = c;
-                            if (c >= 'A' && c <= 'Z') {
-                                folded += 0x20;
-                            }
-                            if (folded != Tokenizer.YSTEM[index]) {
-                                bogusDoctype();
-                                reconsume = true;
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                            }
-                            index++;
-                            continue stateloop;
-                        } else {
-                            reconsume = true;
-                            state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
-                            break doctypeystemloop;
-                            // continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case AFTER_DOCTYPE_SYSTEM_KEYWORD:
-                    afterdoctypesystemkeywordloop: for (;;) {
-                        if (reconsume) {
-                            reconsume = false;
-                        } else {
-                            if (++pos == endPos) {
-                                break stateloop;
-                            }
-                            c = checkChar(buf, pos);
-                        }
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
-                                 * Switch to the before DOCTYPE public
-                                 * identifier state.
-                                 */
-                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
-                                break afterdoctypesystemkeywordloop;
-                            // FALL THROUGH continue stateloop
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Parse Error.
-                                 */
-                                errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
-                                /*
-                                 * Set the DOCTYPE token's system identifier to
-                                 * the empty string (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE public identifier
-                                 * (double-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Parse Error.
-                                 */
-                                errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
-                                /*
-                                 * Set the DOCTYPE token's public identifier to
-                                 * the empty string (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE public identifier
-                                 * (single-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
-                                errExpectedPublicId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                bogusDoctype();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                // done by bogusDoctype();
-                                /*
-                                 * Switch to the bogus DOCTYPE state.
-                                 */
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
-                    beforedoctypesystemidentifierloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\r':
-                                silentCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                silentLineFeed();
-                                // fall thru
-                            case ' ':
-                            case '\t':
-                            case '\u000C':
-                                /*
-                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
-                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
-                                 * in the before DOCTYPE system identifier
-                                 * state.
-                                 */
-                                continue;
-                            case '"':
-                                /*
-                                 * U+0022 QUOTATION MARK (") Set the DOCTYPE
-                                 * token's system identifier to the empty string
-                                 * (not missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE system identifier
-                                 * (double-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
-                                continue stateloop;
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
-                                 * system identifier to the empty string (not
-                                 * missing),
-                                 */
-                                clearStrBufBeforeUse();
-                                /*
-                                 * then switch to the DOCTYPE system identifier
-                                 * (single-quoted) state.
-                                 */
-                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
-                                break beforedoctypesystemidentifierloop;
-                            // continue stateloop;
-                            case '>':
-                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
-                                errExpectedSystemId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            default:
-                                bogusDoctype();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                // done by bogusDoctype();
-                                /*
-                                 * Switch to the bogus DOCTYPE state.
-                                 */
-                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
-                                continue stateloop;
-                        }
-                    }
-                    // FALLTHRU DON'T REORDER
-                case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
-                    for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Switch to the after
-                                 * DOCTYPE system identifier state.
-                                 */
-                                systemIdentifier = strBufToString();
-                                state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                errGtInSystemId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                systemIdentifier = strBufToString();
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current DOCTYPE token's
-                                 * system identifier.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the DOCTYPE system identifier
-                                 * (double-quoted) state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // XXX reorder point
-                case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
-                    for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        /*
-                         * Consume the next input character:
-                         */
-                        switch (c) {
-                            case '\'':
-                                /*
-                                 * U+0027 APOSTROPHE (') Switch to the after
-                                 * DOCTYPE public identifier state.
-                                 */
-                                publicIdentifier = strBufToString();
-                                state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
-                                continue stateloop;
-                            case '>':
-                                errGtInPublicId();
-                                /*
-                                 * Set the DOCTYPE token's force-quirks flag to
-                                 * on.
-                                 */
-                                forceQuirks = true;
-                                /*
-                                 * Emit that DOCTYPE token.
-                                 */
-                                publicIdentifier = strBufToString();
-                                emitDoctypeToken(pos);
-                                /*
-                                 * Switch to the data state.
-                                 */
-                                state = transition(state, Tokenizer.DATA, reconsume, pos);
-                                continue stateloop;
-                            case '\r':
-                                appendStrBufCarriageReturn();
-                                break stateloop;
-                            case '\n':
-                                appendStrBufLineFeed();
-                                continue;
-                            case '\u0000':
-                                c = '\uFFFD';
-                                // fall thru
-                            default:
-                                /*
-                                 * Anything else Append the current input
-                                 * character to the current DOCTYPE token's
-                                 * public identifier.
-                                 */
-                                appendStrBuf(c);
-                                /*
-                                 * Stay in the DOCTYPE public identifier
-                                 * (single-quoted) state.
-                                 */
-                                continue;
-                        }
-                    }
-                    // XXX reorder point
-                case PROCESSING_INSTRUCTION:
-                    processinginstructionloop: for (;;) {
-                        if (++pos == endPos) {
-                            break stateloop;
-                        }
-                        c = checkChar(buf, pos);
-                        switch (c) {
-                            case '?':
-                                state = transition(
-                                        state,
-                                        Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
-                                        reconsume, pos);
-                                break processinginstructionloop;
-                            // continue stateloop;
-                            default:
-                                continue;
-                        }
-                    }
-                case PROCESSING_INSTRUCTION_QUESTION_MARK:
-                    if (++pos == endPos) {
-                        break stateloop;
-                    }
-                    c = checkChar(buf, pos);
-                    switch (c) {
-                        case '>':
-                            state = transition(state, Tokenizer.DATA,
-                                    reconsume, pos);
-                            continue stateloop;
-                        default:
-                            state = transition(state,
-                                    Tokenizer.PROCESSING_INSTRUCTION,
-                                    reconsume, pos);
-                            continue stateloop;
-                    }
-                    // END HOTSPOT WORKAROUND
-            }
-        }
-        flushChars(buf, pos);
-        /*
-         * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
-         */
-        // Save locals
-        stateSave = state;
-        returnStateSave = returnState;
-        return pos;
-    }
-
-    // HOTSPOT WORKAROUND INSERTION POINT
-
-    // [NOCPP[
-
-    protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException {
-        return to;
-    }
-
-    // ]NOCPP]
-
-    private void initDoctypeFields() {
-        // Discard the characters "DOCTYPE" accumulated as a potential bogus
-        // comment into strBuf.
-        clearStrBufAfterUse();
-        doctypeName = "";
-        if (systemIdentifier != null) {
-            Portability.releaseString(systemIdentifier);
-            systemIdentifier = null;
-        }
-        if (publicIdentifier != null) {
-            Portability.releaseString(publicIdentifier);
-            publicIdentifier = null;
-        }
-        forceQuirks = false;
-    }
-
-    @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
-            throws SAXException {
-        silentCarriageReturn();
-        adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
-    }
-
-    @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
-            throws SAXException {
-        silentLineFeed();
-        adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
-    }
-
-    @Inline private void appendStrBufLineFeed() {
-        silentLineFeed();
-        appendStrBuf('\n');
-    }
-
-    @Inline private void appendStrBufCarriageReturn() {
-        silentCarriageReturn();
-        appendStrBuf('\n');
-    }
-
-    @Inline protected void silentCarriageReturn() {
-        ++line;
-        lastCR = true;
-    }
-
-    @Inline protected void silentLineFeed() {
-        ++line;
-    }
-
-    private void emitCarriageReturn(@NoLength char[] buf, int pos)
-            throws SAXException {
-        silentCarriageReturn();
-        flushChars(buf, pos);
-        tokenHandler.characters(Tokenizer.LF, 0, 1);
-        cstart = Integer.MAX_VALUE;
-    }
-
-    private void emitReplacementCharacter(@NoLength char[] buf, int pos)
-            throws SAXException {
-        flushChars(buf, pos);
-        tokenHandler.zeroOriginatingReplacementCharacter();
-        cstart = pos + 1;
-    }
-
-    private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos)
-            throws SAXException {
-        flushChars(buf, pos);
-        tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1);
-        cstart = pos + 1;
-    }
-
-    private void setAdditionalAndRememberAmpersandLocation(char add) {
-        additional = add;
-        // [NOCPP[
-        ampersandLocation = new LocatorImpl(this);
-        // ]NOCPP]
-    }
-
-    private void bogusDoctype() throws SAXException {
-        errBogusDoctype();
-        forceQuirks = true;
-    }
-
-    private void bogusDoctypeWithoutQuirks() throws SAXException {
-        errBogusDoctype();
-        forceQuirks = false;
-    }
-
-    private void handleNcrValue(int returnState) throws SAXException {
-        /*
-         * If one or more characters match the range, then take them all and
-         * interpret the string of characters as a number (either hexadecimal or
-         * decimal as appropriate).
-         */
-        if (value <= 0xFFFF) {
-            if (value >= 0x80 && value <= 0x9f) {
-                /*
-                 * If that number is one of the numbers in the first column of
-                 * the following table, then this is a parse error.
-                 */
-                errNcrInC1Range();
-                /*
-                 * Find the row with that number in the first column, and return
-                 * a character token for the Unicode character given in the
-                 * second column of that row.
-                 */
-                @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
-                emitOrAppendOne(val, returnState);
-                // [NOCPP[
-            } else if (value == 0xC
-                    && contentSpacePolicy != XmlViolationPolicy.ALLOW) {
-                if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) {
-                    emitOrAppendOne(Tokenizer.SPACE, returnState);
-                } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) {
-                    fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
-                }
-                // ]NOCPP]
-            } else if (value == 0x0) {
-                errNcrZero();
-                emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
-            } else if ((value & 0xF800) == 0xD800) {
-                errNcrSurrogate();
-                emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
-            } else {
-                /*
-                 * Otherwise, return a character token for the Unicode character
-                 * whose code point is that number.
-                 */
-                char ch = (char) value;
-                // [NOCPP[
-                if (value == 0x0D) {
-                    errNcrCr();
-                } else if ((value <= 0x0008) || (value == 0x000B)
-                        || (value >= 0x000E && value <= 0x001F)) {
-                    ch = errNcrControlChar(ch);
-                } else if (value >= 0xFDD0 && value <= 0xFDEF) {
-                    errNcrUnassigned();
-                } else if ((value & 0xFFFE) == 0xFFFE) {
-                    ch = errNcrNonCharacter(ch);
-                } else if (value >= 0x007F && value <= 0x009F) {
-                    errNcrControlChar();
-                } else {
-                    maybeWarnPrivateUse(ch);
-                }
-                // ]NOCPP]
-                bmpChar[0] = ch;
-                emitOrAppendOne(bmpChar, returnState);
-            }
-        } else if (value <= 0x10FFFF) {
-            // [NOCPP[
-            maybeWarnPrivateUseAstral();
-            if ((value & 0xFFFE) == 0xFFFE) {
-                errAstralNonCharacter(value);
-            }
-            // ]NOCPP]
-            astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10));
-            astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
-            emitOrAppendTwo(astralChar, returnState);
-        } else {
-            errNcrOutOfRange();
-            emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
-        }
-    }
-
-    public void eof() throws SAXException {
-        int state = stateSave;
-        int returnState = returnStateSave;
-
-        eofloop: for (;;) {
-            switch (state) {
-                case SCRIPT_DATA_LESS_THAN_SIGN:
-                case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
-                    /*
-                     * Otherwise, emit a U+003C LESS-THAN SIGN character token
-                     */
-                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                    /*
-                     * and reconsume the current input character in the data
-                     * state.
-                     */
-                    break eofloop;
-                case TAG_OPEN:
-                    /*
-                     * The behavior of this state depends on the content model
-                     * flag.
-                     */
-                    /*
-                     * Anything else Parse error.
-                     */
-                    errEofAfterLt();
-                    /*
-                     * Emit a U+003C LESS-THAN SIGN character token
-                     */
-                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                    /*
-                     * and reconsume the current input character in the data
-                     * state.
-                     */
-                    break eofloop;
-                case RAWTEXT_RCDATA_LESS_THAN_SIGN:
-                    /*
-                     * Emit a U+003C LESS-THAN SIGN character token
-                     */
-                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
-                    /*
-                     * and reconsume the current input character in the RCDATA
-                     * state.
-                     */
-                    break eofloop;
-                case NON_DATA_END_TAG_NAME:
-                    /*
-                     * Emit a U+003C LESS-THAN SIGN character token, a U+002F
-                     * SOLIDUS character token,
-                     */
-                    tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
-                    /*
-                     * a character token for each of the characters in the
-                     * temporary buffer (in the order they were added to the
-                     * buffer),
-                     */
-                    emitStrBuf();
-                    /*
-                     * and reconsume the current input character in the RCDATA
-                     * state.
-                     */
-                    break eofloop;
-                case CLOSE_TAG_OPEN:
-                    /* EOF Parse error. */
-                    errEofAfterLt();
-                    /*
-                     * Emit a U+003C LESS-THAN SIGN character token and a U+002F
-                     * SOLIDUS character token.
-                     */
-                    tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case TAG_NAME:
-                    /*
-                     * EOF Parse error.
-                     */
-                    errEofInTagName();
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case BEFORE_ATTRIBUTE_NAME:
-                case AFTER_ATTRIBUTE_VALUE_QUOTED:
-                case SELF_CLOSING_START_TAG:
-                    /* EOF Parse error. */
-                    errEofWithoutGt();
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case ATTRIBUTE_NAME:
-                    /*
-                     * EOF Parse error.
-                     */
-                    errEofInAttributeName();
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case AFTER_ATTRIBUTE_NAME:
-                case BEFORE_ATTRIBUTE_VALUE:
-                    /* EOF Parse error. */
-                    errEofWithoutGt();
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
-                case ATTRIBUTE_VALUE_SINGLE_QUOTED:
-                case ATTRIBUTE_VALUE_UNQUOTED:
-                    /* EOF Parse error. */
-                    errEofInAttributeValue();
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case BOGUS_COMMENT:
-                    emitComment(0, 0);
-                    break eofloop;
-                case BOGUS_COMMENT_HYPHEN:
-                    // [NOCPP[
-                    maybeAppendSpaceToBogusComment();
-                    // ]NOCPP]
-                    emitComment(0, 0);
-                    break eofloop;
-                case MARKUP_DECLARATION_OPEN:
-                    errBogusComment();
-                    emitComment(0, 0);
-                    break eofloop;
-                case MARKUP_DECLARATION_HYPHEN:
-                    errBogusComment();
-                    emitComment(0, 0);
-                    break eofloop;
-                case MARKUP_DECLARATION_OCTYPE:
-                    if (index < 6) {
-                        errBogusComment();
-                        emitComment(0, 0);
-                    } else {
-                        /* EOF Parse error. */
-                        errEofInDoctype();
-                        /*
-                         * Create a new DOCTYPE token. Set its force-quirks flag
-                         * to on.
-                         */
-                        doctypeName = "";
-                        if (systemIdentifier != null) {
-                            Portability.releaseString(systemIdentifier);
-                            systemIdentifier = null;
-                        }
-                        if (publicIdentifier != null) {
-                            Portability.releaseString(publicIdentifier);
-                            publicIdentifier = null;
-                        }
-                        forceQuirks = true;
-                        /*
-                         * Emit the token.
-                         */
-                        emitDoctypeToken(0);
-                        /*
-                         * Reconsume the EOF character in the data state.
-                         */
-                        break eofloop;
-                    }
-                    break eofloop;
-                case COMMENT_START:
-                case COMMENT:
-                    /*
-                     * EOF Parse error.
-                     */
-                    errEofInComment();
-                    /* Emit the comment token. */
-                    emitComment(0, 0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case COMMENT_END:
-                    errEofInComment();
-                    /* Emit the comment token. */
-                    emitComment(2, 0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case COMMENT_END_DASH:
-                case COMMENT_START_DASH:
-                    errEofInComment();
-                    /* Emit the comment token. */
-                    emitComment(1, 0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case COMMENT_END_BANG:
-                    errEofInComment();
-                    /* Emit the comment token. */
-                    emitComment(3, 0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case DOCTYPE:
-                case BEFORE_DOCTYPE_NAME:
-                    errEofInDoctype();
-                    /*
-                     * Create a new DOCTYPE token. Set its force-quirks flag to
-                     * on.
-                     */
-                    forceQuirks = true;
-                    /*
-                     * Emit the token.
-                     */
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case DOCTYPE_NAME:
-                    errEofInDoctype();
-                    strBufToDoctypeName();
-                    /*
-                     * Set the DOCTYPE token's force-quirks flag to on.
-                     */
-                    forceQuirks = true;
-                    /*
-                     * Emit that DOCTYPE token.
-                     */
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case DOCTYPE_UBLIC:
-                case DOCTYPE_YSTEM:
-                case AFTER_DOCTYPE_NAME:
-                case AFTER_DOCTYPE_PUBLIC_KEYWORD:
-                case AFTER_DOCTYPE_SYSTEM_KEYWORD:
-                case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
-                    errEofInDoctype();
-                    /*
-                     * Set the DOCTYPE token's force-quirks flag to on.
-                     */
-                    forceQuirks = true;
-                    /*
-                     * Emit that DOCTYPE token.
-                     */
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
-                case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
-                    /* EOF Parse error. */
-                    errEofInPublicId();
-                    /*
-                     * Set the DOCTYPE token's force-quirks flag to on.
-                     */
-                    forceQuirks = true;
-                    /*
-                     * Emit that DOCTYPE token.
-                     */
-                    publicIdentifier = strBufToString();
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
-                case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
-                case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
-                    errEofInDoctype();
-                    /*
-                     * Set the DOCTYPE token's force-quirks flag to on.
-                     */
-                    forceQuirks = true;
-                    /*
-                     * Emit that DOCTYPE token.
-                     */
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
-                case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
-                    /* EOF Parse error. */
-                    errEofInSystemId();
-                    /*
-                     * Set the DOCTYPE token's force-quirks flag to on.
-                     */
-                    forceQuirks = true;
-                    /*
-                     * Emit that DOCTYPE token.
-                     */
-                    systemIdentifier = strBufToString();
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
-                    errEofInDoctype();
-                    /*
-                     * Set the DOCTYPE token's force-quirks flag to on.
-                     */
-                    forceQuirks = true;
-                    /*
-                     * Emit that DOCTYPE token.
-                     */
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case BOGUS_DOCTYPE:
-                    /*
-                     * Emit that DOCTYPE token.
-                     */
-                    emitDoctypeToken(0);
-                    /*
-                     * Reconsume the EOF character in the data state.
-                     */
-                    break eofloop;
-                case CONSUME_CHARACTER_REFERENCE:
-                    /*
-                     * Unlike the definition is the spec, this state does not
-                     * return a value and never requires the caller to
-                     * backtrack. This state takes care of emitting characters
-                     * or appending to the current attribute value. It also
-                     * takes care of that in the case when consuming the entity
-                     * fails.
-                     */
-                    /*
-                     * This section defines how to consume an entity. This
-                     * definition is used when parsing entities in text and in
-                     * attributes.
-                     *
-                     * The behavior depends on the identity of the next
-                     * character (the one immediately after the U+0026 AMPERSAND
-                     * character):
-                     */
-
-                    emitOrAppendCharRefBuf(returnState);
-                    state = returnState;
-                    continue;
-                case CHARACTER_REFERENCE_HILO_LOOKUP:
-                    errNoNamedCharacterMatch();
-                    emitOrAppendCharRefBuf(returnState);
-                    state = returnState;
-                    continue;
-                case CHARACTER_REFERENCE_TAIL:
-                    outer: for (;;) {
-                        char c = '\u0000';
-                        entCol++;
-                        /*
-                         * Consume the maximum number of characters possible,
-                         * with the consumed characters matching one of the
-                         * identifiers in the first column of the named
-                         * character references table (in a case-sensitive
-                         * manner).
-                         */
-                        hiloop: for (;;) {
-                            if (hi == -1) {
-                                break hiloop;
-                            }
-                            if (entCol == NamedCharacters.NAMES[hi].length()) {
-                                break hiloop;
-                            }
-                            if (entCol > NamedCharacters.NAMES[hi].length()) {
-                                break outer;
-                            } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
-                                hi--;
-                            } else {
-                                break hiloop;
-                            }
-                        }
-
-                        loloop: for (;;) {
-                            if (hi < lo) {
-                                break outer;
-                            }
-                            if (entCol == NamedCharacters.NAMES[lo].length()) {
-                                candidate = lo;
-                                charRefBufMark = charRefBufLen;
-                                lo++;
-                            } else if (entCol > NamedCharacters.NAMES[lo].length()) {
-                                break outer;
-                            } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
-                                lo++;
-                            } else {
-                                break loloop;
-                            }
-                        }
-                        if (hi < lo) {
-                            break outer;
-                        }
-                        continue;
-                    }
-
-                    if (candidate == -1) {
-                        /*
-                         * If no match can be made, then this is a parse error.
-                         */
-                        errNoNamedCharacterMatch();
-                        emitOrAppendCharRefBuf(returnState);
-                        state = returnState;
-                        continue eofloop;
-                    } else {
-                        @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
-                        if (candidateName.length() == 0
-                                || candidateName.charAt(candidateName.length() - 1) != ';') {
-                            /*
-                             * If the last character matched is not a U+003B
-                             * SEMICOLON (;), there is a parse error.
-                             */
-                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                /*
-                                 * If the entity is being consumed as part of an
-                                 * attribute, and the last character matched is
-                                 * not a U+003B SEMICOLON (;),
-                                 */
-                                char ch;
-                                if (charRefBufMark == charRefBufLen) {
-                                    ch = '\u0000';
-                                } else {
-                                    ch = charRefBuf[charRefBufMark];
-                                }
-                                if ((ch >= '0' && ch <= '9')
-                                        || (ch >= 'A' && ch <= 'Z')
-                                        || (ch >= 'a' && ch <= 'z')) {
-                                    /*
-                                     * and the next character is in the range
-                                     * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
-                                     * U+0041 LATIN CAPITAL LETTER A to U+005A
-                                     * LATIN CAPITAL LETTER Z, or U+0061 LATIN
-                                     * SMALL LETTER A to U+007A LATIN SMALL
-                                     * LETTER Z, then, for historical reasons,
-                                     * all the characters that were matched
-                                     * after the U+0026 AMPERSAND (&) must be
-                                     * unconsumed, and nothing is returned.
-                                     */
-                                    errNoNamedCharacterMatch();
-                                    appendCharRefBufToStrBuf();
-                                    state = returnState;
-                                    continue eofloop;
-                                }
-                            }
-                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                errUnescapedAmpersandInterpretedAsCharacterReference();
-                            } else {
-                                errNotSemicolonTerminated();
-                            }
-                        }
-
-                        /*
-                         * Otherwise, return a character token for the character
-                         * corresponding to the entity name (as given by the
-                         * second column of the named character references
-                         * table).
-                         */
-                        @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
-                        if (
-                        // [NOCPP[
-                        val.length == 1
-                        // ]NOCPP]
-                        // CPPONLY: val[1] == 0
-                        ) {
-                            emitOrAppendOne(val, returnState);
-                        } else {
-                            emitOrAppendTwo(val, returnState);
-                        }
-                        // this is so complicated!
-                        if (charRefBufMark < charRefBufLen) {
-                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                appendStrBuf(charRefBuf, charRefBufMark,
-                                        charRefBufLen - charRefBufMark);
-                            } else {
-                                tokenHandler.characters(charRefBuf, charRefBufMark,
-                                        charRefBufLen - charRefBufMark);
-                            }
-                        }
-                        charRefBufLen = 0;
-                        state = returnState;
-                        continue eofloop;
-                        /*
-                         * If the markup contains I'm &notit; I tell you, the
-                         * entity is parsed as "not", as in, I'm ¬it; I tell
-                         * you. But if the markup was I'm &notin; I tell you,
-                         * the entity would be parsed as "notin;", resulting in
-                         * I'm ∉ I tell you.
-                         */
-                    }
-                case CONSUME_NCR:
-                case DECIMAL_NRC_LOOP:
-                case HEX_NCR_LOOP:
-                    /*
-                     * If no characters match the range, then don't consume any
-                     * characters (and unconsume the U+0023 NUMBER SIGN
-                     * character and, if appropriate, the X character). This is
-                     * a parse error; nothing is returned.
-                     *
-                     * Otherwise, if the next character is a U+003B SEMICOLON,
-                     * consume that too. If it isn't, there is a parse error.
-                     */
-                    if (!seenDigits) {
-                        errNoDigitsInNCR();
-                        emitOrAppendCharRefBuf(returnState);
-                        state = returnState;
-                        continue;
-                    } else {
-                        errCharRefLacksSemicolon();
-                    }
-                    // WARNING previous state sets reconsume
-                    handleNcrValue(returnState);
-                    state = returnState;
-                    continue;
-                case CDATA_RSQB:
-                    tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
-                    break eofloop;
-                case CDATA_RSQB_RSQB:
-                    tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
-                    break eofloop;
-                case DATA:
-                default:
-                    break eofloop;
-            }
-        }
-        // case DATA:
-        /*
-         * EOF Emit an end-of-file token.
-         */
-        tokenHandler.eof();
-        return;
-    }
-
-    private void emitDoctypeToken(int pos) throws SAXException {
-        cstart = pos + 1;
-        tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
-                forceQuirks);
-        // It is OK and sufficient to release these here, since
-        // there's no way out of the doctype states than through paths
-        // that call this method.
-        doctypeName = null;
-        Portability.releaseString(publicIdentifier);
-        publicIdentifier = null;
-        Portability.releaseString(systemIdentifier);
-        systemIdentifier = null;
-    }
-
-    @Inline protected char checkChar(@NoLength char[] buf, int pos)
-            throws SAXException {
-        return buf[pos];
-    }
-
-    public boolean internalEncodingDeclaration(String internalCharset)
-            throws SAXException {
-        if (encodingDeclarationHandler != null) {
-            return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset);
-        }
-        return false;
-    }
-
-    /**
-     * @param val
-     * @throws SAXException
-     */
-    private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
-            throws SAXException {
-        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-            appendStrBuf(val[0]);
-            appendStrBuf(val[1]);
-        } else {
-            tokenHandler.characters(val, 0, 2);
-        }
-    }
-
-    private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
-            throws SAXException {
-        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-            appendStrBuf(val[0]);
-        } else {
-            tokenHandler.characters(val, 0, 1);
-        }
-    }
-
-    public void end() throws SAXException {
-        strBuf = null;
-        doctypeName = null;
-        if (systemIdentifier != null) {
-            Portability.releaseString(systemIdentifier);
-            systemIdentifier = null;
-        }
-        if (publicIdentifier != null) {
-            Portability.releaseString(publicIdentifier);
-            publicIdentifier = null;
-        }
-        if (tagName != null) {
-            tagName.release();
-            tagName = null;
-        }
-        if (attributeName != null) {
-            attributeName.release();
-            attributeName = null;
-        }
-        tokenHandler.endTokenization();
-        if (attributes != null) {
-            // [NOCPP[
-            attributes = null;
-            // ]NOCPP]
-            // CPPONLY: attributes.clear(mappingLangToXmlLang);
-        }
-    }
-
-    public void requestSuspension() {
-        shouldSuspend = true;
-    }
-
-    // [NOCPP[
-
-    public void becomeConfident() {
-        confident = true;
-    }
-
-    /**
-     * Returns the nextCharOnNewLine.
-     *
-     * @return the nextCharOnNewLine
-     */
-    public boolean isNextCharOnNewLine() {
-        return false;
-    }
-
-    public boolean isPrevCR() {
-        return lastCR;
-    }
-
-    /**
-     * Returns the line.
-     *
-     * @return the line
-     */
-    public int getLine() {
-        return -1;
-    }
-
-    /**
-     * Returns the col.
-     *
-     * @return the col
-     */
-    public int getCol() {
-        return -1;
-    }
-
-    // ]NOCPP]
-
-    public boolean isInDataState() {
-        return (stateSave == DATA);
-    }
-
-    public void resetToDataState() {
-        clearStrBufAfterUse();
-        charRefBufLen = 0;
-        stateSave = Tokenizer.DATA;
-        // line = 1; XXX line numbers
-        lastCR = false;
-        index = 0;
-        forceQuirks = false;
-        additional = '\u0000';
-        entCol = -1;
-        firstCharKey = -1;
-        lo = 0;
-        hi = 0; // will always be overwritten before use anyway
-        candidate = -1;
-        charRefBufMark = 0;
-        value = 0;
-        seenDigits = false;
-        endTag = false;
-        shouldSuspend = false;
-        initDoctypeFields();
-        if (tagName != null) {
-            tagName.release();
-            tagName = null;
-        }
-        if (attributeName != null) {
-            attributeName.release();
-            attributeName = null;
-        }
-        if (newAttributesEachTime) {
-            if (attributes != null) {
-                Portability.delete(attributes);
-                attributes = null;
-            }
-        }
-    }
-
-    public void loadState(Tokenizer other) throws SAXException {
-        strBufLen = other.strBufLen;
-        if (strBufLen > strBuf.length) {
-            strBuf = new char[strBufLen];
-        }
-        System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen);
-
-        charRefBufLen = other.charRefBufLen;
-        System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen);
-
-        stateSave = other.stateSave;
-        returnStateSave = other.returnStateSave;
-        endTagExpectation = other.endTagExpectation;
-        endTagExpectationAsArray = other.endTagExpectationAsArray;
-        // line = 1; XXX line numbers
-        lastCR = other.lastCR;
-        index = other.index;
-        forceQuirks = other.forceQuirks;
-        additional = other.additional;
-        entCol = other.entCol;
-        firstCharKey = other.firstCharKey;
-        lo = other.lo;
-        hi = other.hi;
-        candidate = other.candidate;
-        charRefBufMark = other.charRefBufMark;
-        value = other.value;
-        seenDigits = other.seenDigits;
-        endTag = other.endTag;
-        shouldSuspend = false;
-
-        if (other.doctypeName == null) {
-            doctypeName = null;
-        } else {
-            doctypeName = Portability.newLocalFromLocal(other.doctypeName,
-                    interner);
-        }
-
-        Portability.releaseString(systemIdentifier);
-        if (other.systemIdentifier == null) {
-            systemIdentifier = null;
-        } else {
-            systemIdentifier = Portability.newStringFromString(other.systemIdentifier);
-        }
-
-        Portability.releaseString(publicIdentifier);
-        if (other.publicIdentifier == null) {
-            publicIdentifier = null;
-        } else {
-            publicIdentifier = Portability.newStringFromString(other.publicIdentifier);
-        }
-
-        if (tagName != null) {
-            tagName.release();
-        }
-        if (other.tagName == null) {
-            tagName = null;
-        } else {
-            tagName = other.tagName.cloneElementName(interner);
-        }
-
-        if (attributeName != null) {
-            attributeName.release();
-        }
-        if (other.attributeName == null) {
-            attributeName = null;
-        } else {
-            attributeName = other.attributeName.cloneAttributeName(interner);
-        }
-
-        Portability.delete(attributes);
-        if (other.attributes == null) {
-            attributes = null;
-        } else {
-            attributes = other.attributes.cloneAttributes(interner);
-        }
-    }
-
-    public void initializeWithoutStarting() throws SAXException {
-        confident = false;
-        strBuf = null;
-        line = 1;
-        // CPPONLY: attributeLine = 1;
-        // [NOCPP[
-        html4 = false;
-        metaBoundaryPassed = false;
-        wantsComments = tokenHandler.wantsComments();
-        if (!newAttributesEachTime) {
-            attributes = new HtmlAttributes(mappingLangToXmlLang);
-        }
-        // ]NOCPP]
-        resetToDataState();
-    }
-
-    protected void errGarbageAfterLtSlash() throws SAXException {
-    }
-
-    protected void errLtSlashGt() throws SAXException {
-    }
-
-    protected void errWarnLtSlashInRcdata() throws SAXException {
-    }
-
-    protected void errHtml4LtSlashInRcdata(char folded) throws SAXException {
-    }
-
-    protected void errCharRefLacksSemicolon() throws SAXException {
-    }
-
-    protected void errNoDigitsInNCR() throws SAXException {
-    }
-
-    protected void errGtInSystemId() throws SAXException {
-    }
-
-    protected void errGtInPublicId() throws SAXException {
-    }
-
-    protected void errNamelessDoctype() throws SAXException {
-    }
-
-    protected void errConsecutiveHyphens() throws SAXException {
-    }
-
-    protected void errPrematureEndOfComment() throws SAXException {
-    }
-
-    protected void errBogusComment() throws SAXException {
-    }
-
-    protected void errUnquotedAttributeValOrNull(char c) throws SAXException {
-    }
-
-    protected void errSlashNotFollowedByGt() throws SAXException {
-    }
-
-    protected void errHtml4XmlVoidSyntax() throws SAXException {
-    }
-
-    protected void errNoSpaceBetweenAttributes() throws SAXException {
-    }
-
-    protected void errHtml4NonNameInUnquotedAttribute(char c)
-            throws SAXException {
-    }
-
-    protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
-            throws SAXException {
-    }
-
-    protected void errAttributeValueMissing() throws SAXException {
-    }
-
-    protected void errBadCharBeforeAttributeNameOrNull(char c)
-            throws SAXException {
-    }
-
-    protected void errEqualsSignBeforeAttributeName() throws SAXException {
-    }
-
-    protected void errBadCharAfterLt(char c) throws SAXException {
-    }
-
-    protected void errLtGt() throws SAXException {
-    }
-
-    protected void errProcessingInstruction() throws SAXException {
-    }
-
-    protected void errUnescapedAmpersandInterpretedAsCharacterReference()
-            throws SAXException {
-    }
-
-    protected void errNotSemicolonTerminated() throws SAXException {
-    }
-
-    protected void errNoNamedCharacterMatch() throws SAXException {
-    }
-
-    protected void errQuoteBeforeAttributeName(char c) throws SAXException {
-    }
-
-    protected void errQuoteOrLtInAttributeNameOrNull(char c)
-            throws SAXException {
-    }
-
-    protected void errExpectedPublicId() throws SAXException {
-    }
-
-    protected void errBogusDoctype() throws SAXException {
-    }
-
-    protected void maybeWarnPrivateUseAstral() throws SAXException {
-    }
-
-    protected void maybeWarnPrivateUse(char ch) throws SAXException {
-    }
-
-    protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
-            throws SAXException {
-    }
-
-    protected void maybeErrSlashInEndTag(boolean selfClosing)
-            throws SAXException {
-    }
-
-    protected char errNcrNonCharacter(char ch) throws SAXException {
-        return ch;
-    }
-
-    protected void errAstralNonCharacter(int ch) throws SAXException {
-    }
-
-    protected void errNcrSurrogate() throws SAXException {
-    }
-
-    protected char errNcrControlChar(char ch) throws SAXException {
-        return ch;
-    }
-
-    protected void errNcrCr() throws SAXException {
-    }
-
-    protected void errNcrInC1Range() throws SAXException {
-    }
-
-    protected void errEofInPublicId() throws SAXException {
-    }
-
-    protected void errEofInComment() throws SAXException {
-    }
-
-    protected void errEofInDoctype() throws SAXException {
-    }
-
-    protected void errEofInAttributeValue() throws SAXException {
-    }
-
-    protected void errEofInAttributeName() throws SAXException {
-    }
-
-    protected void errEofWithoutGt() throws SAXException {
-    }
-
-    protected void errEofInTagName() throws SAXException {
-    }
-
-    protected void errEofInEndTag() throws SAXException {
-    }
-
-    protected void errEofAfterLt() throws SAXException {
-    }
-
-    protected void errNcrOutOfRange() throws SAXException {
-    }
-
-    protected void errNcrUnassigned() throws SAXException {
-    }
-
-    protected void errDuplicateAttribute() throws SAXException {
-    }
-
-    protected void errEofInSystemId() throws SAXException {
-    }
-
-    protected void errExpectedSystemId() throws SAXException {
-    }
-
-    protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
-    }
-
-    protected void errHyphenHyphenBang() throws SAXException {
-    }
-
-    protected void errNcrControlChar() throws SAXException {
-    }
-
-    protected void errNcrZero() throws SAXException {
-    }
-
-    protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
-            throws SAXException {
-    }
-
-    protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException {
-    }
-
-    protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
-            throws SAXException {
-    }
-
-    protected void noteAttributeWithoutValue() throws SAXException {
-    }
-
-    protected void noteUnquotedAttributeValue() throws SAXException {
-    }
-
-    /**
-     * Sets the encodingDeclarationHandler.
-     *
-     * @param encodingDeclarationHandler
-     *            the encodingDeclarationHandler to set
-     */
-    public void setEncodingDeclarationHandler(
-            EncodingDeclarationHandler encodingDeclarationHandler) {
-        this.encodingDeclarationHandler = encodingDeclarationHandler;
-    }
-
-    void destructor() {
-        // The translator will write refcount tracing stuff here
-        Portability.delete(attributes);
-        attributes = null;
-    }
-
-    // [NOCPP[
-
-    /**
-     * Sets an offset to be added to the position reported to
-     * <code>TransitionHandler</code>.
-     *
-     * @param offset the offset
-     */
-    public void setTransitionBaseOffset(int offset) {
-
-    }
-
-    // ]NOCPP]
-
-}
-- 
cgit v1.2.3