summaryrefslogtreecommitdiffstats
path: root/parser/html/javasrc/MetaScanner.java
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /parser/html/javasrc/MetaScanner.java
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'parser/html/javasrc/MetaScanner.java')
-rw-r--r--parser/html/javasrc/MetaScanner.java854
1 files changed, 854 insertions, 0 deletions
diff --git a/parser/html/javasrc/MetaScanner.java b/parser/html/javasrc/MetaScanner.java
new file mode 100644
index 000000000..be9aabfe3
--- /dev/null
+++ b/parser/html/javasrc/MetaScanner.java
@@ -0,0 +1,854 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2008-2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.impl;
+
+import java.io.IOException;
+
+import nu.validator.htmlparser.annotation.Auto;
+import nu.validator.htmlparser.annotation.Inline;
+import nu.validator.htmlparser.common.ByteReadable;
+
+import org.xml.sax.SAXException;
+
+public abstract class MetaScanner {
+
+ /**
+ * Constant for "charset".
+ */
+ private static final char[] CHARSET = { 'h', 'a', 'r', 's', 'e', 't' };
+
+ /**
+ * Constant for "content".
+ */
+ private static final char[] CONTENT = { 'o', 'n', 't', 'e', 'n', 't' };
+
+ /**
+ * Constant for "http-equiv".
+ */
+ private static final char[] HTTP_EQUIV = { 't', 't', 'p', '-', 'e', 'q',
+ 'u', 'i', 'v' };
+
+ /**
+ * Constant for "content-type".
+ */
+ private static final char[] CONTENT_TYPE = { 'c', 'o', 'n', 't', 'e', 'n',
+ 't', '-', 't', 'y', 'p', 'e' };
+
+ private static final int NO = 0;
+
+ private static final int M = 1;
+
+ private static final int E = 2;
+
+ private static final int T = 3;
+
+ private static final int A = 4;
+
+ private static final int DATA = 0;
+
+ private static final int TAG_OPEN = 1;
+
+ private static final int SCAN_UNTIL_GT = 2;
+
+ private static final int TAG_NAME = 3;
+
+ private static final int BEFORE_ATTRIBUTE_NAME = 4;
+
+ private static final int ATTRIBUTE_NAME = 5;
+
+ private static final int AFTER_ATTRIBUTE_NAME = 6;
+
+ private static final int BEFORE_ATTRIBUTE_VALUE = 7;
+
+ private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
+
+ private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
+
+ private static final int ATTRIBUTE_VALUE_UNQUOTED = 10;
+
+ private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
+
+ private static final int MARKUP_DECLARATION_OPEN = 13;
+
+ private static final int MARKUP_DECLARATION_HYPHEN = 14;
+
+ private static final int COMMENT_START = 15;
+
+ private static final int COMMENT_START_DASH = 16;
+
+ private static final int COMMENT = 17;
+
+ private static final int COMMENT_END_DASH = 18;
+
+ private static final int COMMENT_END = 19;
+
+ private static final int SELF_CLOSING_START_TAG = 20;
+
+ private static final int HTTP_EQUIV_NOT_SEEN = 0;
+
+ private static final int HTTP_EQUIV_CONTENT_TYPE = 1;
+
+ private static final int HTTP_EQUIV_OTHER = 2;
+
+ /**
+ * The data source.
+ */
+ protected ByteReadable readable;
+
+ /**
+ * The state of the state machine that recognizes the tag name "meta".
+ */
+ private int metaState = NO;
+
+ /**
+ * The current position in recognizing the attribute name "content".
+ */
+ private int contentIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute name "charset".
+ */
+ private int charsetIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute name "http-equive".
+ */
+ private int httpEquivIndex = Integer.MAX_VALUE;
+
+ /**
+ * The current position in recognizing the attribute value "content-type".
+ */
+ private int contentTypeIndex = Integer.MAX_VALUE;
+
+ /**
+ * The tokenizer state.
+ */
+ protected int stateSave = DATA;
+
+ /**
+ * The currently filled length of strBuf.
+ */
+ private int strBufLen;
+
+ /**
+ * Accumulation buffer for attribute values.
+ */
+ private @Auto char[] strBuf;
+
+ private String content;
+
+ private String charset;
+
+ private int httpEquivState;
+
+ // CPPONLY: private TreeBuilder treeBuilder;
+
+ public MetaScanner(
+ // CPPONLY: TreeBuilder tb
+ ) {
+ this.readable = null;
+ this.metaState = NO;
+ this.contentIndex = Integer.MAX_VALUE;
+ this.charsetIndex = Integer.MAX_VALUE;
+ this.httpEquivIndex = Integer.MAX_VALUE;
+ this.contentTypeIndex = Integer.MAX_VALUE;
+ this.stateSave = DATA;
+ this.strBufLen = 0;
+ this.strBuf = new char[36];
+ this.content = null;
+ this.charset = null;
+ this.httpEquivState = HTTP_EQUIV_NOT_SEEN;
+ // CPPONLY: this.treeBuilder = tb;
+ }
+
+ @SuppressWarnings("unused") private void destructor() {
+ Portability.releaseString(content);
+ Portability.releaseString(charset);
+ }
+
+ // [NOCPP[
+
+ /**
+ * Reads a byte from the data source.
+ *
+ * -1 means end.
+ * @return
+ * @throws IOException
+ */
+ protected int read() throws IOException {
+ return readable.readByte();
+ }
+
+ // ]NOCPP]
+
+ // WARNING When editing this, makes sure the bytecode length shown by javap
+ // stays under 8000 bytes!
+ /**
+ * The runs the meta scanning algorithm.
+ */
+ protected final void stateLoop(int state)
+ throws SAXException, IOException {
+ int c = -1;
+ boolean reconsume = false;
+ stateloop: for (;;) {
+ switch (state) {
+ case DATA:
+ dataloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '<':
+ state = MetaScanner.TAG_OPEN;
+ break dataloop; // FALL THROUGH continue
+ // stateloop;
+ default:
+ continue;
+ }
+ }
+ // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
+ case TAG_OPEN:
+ tagopenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case 'm':
+ case 'M':
+ metaState = M;
+ state = MetaScanner.TAG_NAME;
+ break tagopenloop;
+ // continue stateloop;
+ case '!':
+ state = MetaScanner.MARKUP_DECLARATION_OPEN;
+ continue stateloop;
+ case '?':
+ case '/':
+ state = MetaScanner.SCAN_UNTIL_GT;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+ metaState = NO;
+ state = MetaScanner.TAG_NAME;
+ break tagopenloop;
+ // continue stateloop;
+ }
+ state = MetaScanner.DATA;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALL THROUGH DON'T REORDER
+ case TAG_NAME:
+ tagnameloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ break tagnameloop;
+ // continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case 'e':
+ case 'E':
+ if (metaState == M) {
+ metaState = E;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ case 't':
+ case 'T':
+ if (metaState == E) {
+ metaState = T;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ case 'a':
+ case 'A':
+ if (metaState == T) {
+ metaState = A;
+ } else {
+ metaState = NO;
+ }
+ continue;
+ default:
+ metaState = NO;
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_NAME:
+ beforeattributenameloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ /*
+ * Consume the next input character:
+ */
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = DATA;
+ continue stateloop;
+ case 'c':
+ case 'C':
+ contentIndex = 0;
+ charsetIndex = 0;
+ httpEquivIndex = Integer.MAX_VALUE;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ case 'h':
+ case 'H':
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ httpEquivIndex = 0;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ default:
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ httpEquivIndex = Integer.MAX_VALUE;
+ contentTypeIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ break beforeattributenameloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_NAME:
+ attributenameloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.AFTER_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '=':
+ strBufLen = 0;
+ contentTypeIndex = 0;
+ state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
+ break attributenameloop;
+ // continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ if (metaState == A) {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) {
+ ++contentIndex;
+ } else {
+ contentIndex = Integer.MAX_VALUE;
+ }
+ if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) {
+ ++charsetIndex;
+ } else {
+ charsetIndex = Integer.MAX_VALUE;
+ }
+ if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) {
+ ++httpEquivIndex;
+ } else {
+ httpEquivIndex = Integer.MAX_VALUE;
+ }
+ }
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case BEFORE_ATTRIBUTE_VALUE:
+ beforeattributevalueloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '"':
+ state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
+ break beforeattributevalueloop;
+ // continue stateloop;
+ case '\'':
+ state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED;
+ continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ attributevaluedoublequotedloop: for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '"':
+ handleAttributeValue();
+ state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
+ break attributevaluedoublequotedloop;
+ // continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case AFTER_ATTRIBUTE_VALUE_QUOTED:
+ afterattributevaluequotedloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '/':
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ break afterattributevaluequotedloop;
+ // continue stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case SELF_CLOSING_START_TAG:
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ reconsume = true;
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_UNQUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+
+ case '\u000C':
+ handleAttributeValue();
+ state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
+ continue stateloop;
+ case '>':
+ handleAttributeValue();
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // XXX reorder point
+ case AFTER_ATTRIBUTE_NAME:
+ for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\u000C':
+ continue;
+ case '/':
+ handleAttributeValue();
+ state = MetaScanner.SELF_CLOSING_START_TAG;
+ continue stateloop;
+ case '=':
+ strBufLen = 0;
+ contentTypeIndex = 0;
+ state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
+ continue stateloop;
+ case '>':
+ handleAttributeValue();
+ if (handleTag()) {
+ break stateloop;
+ }
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case 'c':
+ case 'C':
+ contentIndex = 0;
+ charsetIndex = 0;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ continue stateloop;
+ default:
+ contentIndex = Integer.MAX_VALUE;
+ charsetIndex = Integer.MAX_VALUE;
+ state = MetaScanner.ATTRIBUTE_NAME;
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case MARKUP_DECLARATION_OPEN:
+ markupdeclarationopenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.MARKUP_DECLARATION_HYPHEN;
+ break markupdeclarationopenloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.SCAN_UNTIL_GT;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case MARKUP_DECLARATION_HYPHEN:
+ markupdeclarationhyphenloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_START;
+ break markupdeclarationhyphenloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.SCAN_UNTIL_GT;
+ reconsume = true;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_START:
+ commentstartloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_START_DASH;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ break commentstartloop;
+ // continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT:
+ commentloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END_DASH;
+ break commentloop;
+ // continue stateloop;
+ default:
+ continue;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END_DASH:
+ commentenddashloop: for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END;
+ break commentenddashloop;
+ // continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ }
+ // FALLTHRU DON'T REORDER
+ case COMMENT_END:
+ for (;;) {
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ case '-':
+ continue;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ }
+ // XXX reorder point
+ case COMMENT_START_DASH:
+ c = read();
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '-':
+ state = MetaScanner.COMMENT_END;
+ continue stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ state = MetaScanner.COMMENT;
+ continue stateloop;
+ }
+ // XXX reorder point
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '\'':
+ handleAttributeValue();
+ state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
+ continue stateloop;
+ default:
+ handleCharInAttributeValue(c);
+ continue;
+ }
+ }
+ // XXX reorder point
+ case SCAN_UNTIL_GT:
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ c = read();
+ }
+ switch (c) {
+ case -1:
+ break stateloop;
+ case '>':
+ state = MetaScanner.DATA;
+ continue stateloop;
+ default:
+ continue;
+ }
+ }
+ }
+ }
+ stateSave = state;
+ }
+
+ private void handleCharInAttributeValue(int c) {
+ if (metaState == A) {
+ if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
+ addToBuffer(c);
+ } else if (httpEquivIndex == HTTP_EQUIV.length) {
+ if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
+ ++contentTypeIndex;
+ } else {
+ contentTypeIndex = Integer.MAX_VALUE;
+ }
+ }
+ }
+ }
+
+ @Inline private int toAsciiLowerCase(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c + 0x20;
+ }
+ return c;
+ }
+
+ /**
+ * Adds a character to the accumulation buffer.
+ * @param c the character to add
+ */
+ private void addToBuffer(int c) {
+ if (strBufLen == strBuf.length) {
+ char[] newBuf = new char[strBuf.length + (strBuf.length << 1)];
+ System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
+ strBuf = newBuf;
+ }
+ strBuf[strBufLen++] = (char)c;
+ }
+
+ /**
+ * Attempts to extract a charset name from the accumulation buffer.
+ * @return <code>true</code> if successful
+ * @throws SAXException
+ */
+ private void handleAttributeValue() throws SAXException {
+ if (metaState != A) {
+ return;
+ }
+ if (contentIndex == CONTENT.length && content == null) {
+ content = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , treeBuilder
+ );
+ return;
+ }
+ if (charsetIndex == CHARSET.length && charset == null) {
+ charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen
+ // CPPONLY: , treeBuilder
+ );
+ return;
+ }
+ if (httpEquivIndex == HTTP_EQUIV.length
+ && httpEquivState == HTTP_EQUIV_NOT_SEEN) {
+ httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE
+ : HTTP_EQUIV_OTHER;
+ return;
+ }
+ }
+
+ private boolean handleTag() throws SAXException {
+ boolean stop = handleTagInner();
+ Portability.releaseString(content);
+ content = null;
+ Portability.releaseString(charset);
+ charset = null;
+ httpEquivState = HTTP_EQUIV_NOT_SEEN;
+ return stop;
+ }
+
+ private boolean handleTagInner() throws SAXException {
+ if (charset != null && tryCharset(charset)) {
+ return true;
+ }
+ if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
+ String extract = TreeBuilder.extractCharsetFromContent(content
+ // CPPONLY: , treeBuilder
+ );
+ if (extract == null) {
+ return false;
+ }
+ boolean success = tryCharset(extract);
+ Portability.releaseString(extract);
+ return success;
+ }
+ return false;
+ }
+
+ /**
+ * Tries to switch to an encoding.
+ *
+ * @param encoding
+ * @return <code>true</code> if successful
+ * @throws SAXException
+ */
+ protected abstract boolean tryCharset(String encoding) throws SAXException;
+
+}