From 3da18fda029a038784525e840c831ffd73b25c33 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Wed, 15 Jan 2020 18:10:14 -0500 Subject: Bug 1562033 Adjust tokenization of U+0000 (java htmlparser) --- .../src/nu/validator/htmlparser/impl/Tokenizer.java | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'parser/html/java/htmlparser/src/nu/validator') diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java index 70e1df75c..75ba2e1e4 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -2457,8 +2457,6 @@ public class Tokenizer implements Locator { } c = checkChar(buf, pos); switch (c) { - case '\u0000': - break stateloop; case '-': clearStrBufAfterOneHyphen(); state = transition(state, Tokenizer.COMMENT_START, reconsume, pos); @@ -2963,9 +2961,6 @@ public class Tokenizer implements Locator { break stateloop; } c = checkChar(buf, pos); - if (c == '\u0000') { - break stateloop; - } /* * Unlike the definition is the spec, this state does not * return a value and never requires the caller to @@ -2991,6 +2986,7 @@ public class Tokenizer implements Locator { case '\u000C': case '<': case '&': + case '\u0000': emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; @@ -3044,9 +3040,6 @@ public class Tokenizer implements Locator { break stateloop; } c = checkChar(buf, pos); - if (c == '\u0000') { - break stateloop; - } /* * The data structure is as follows: * @@ -3122,9 +3115,6 @@ public class Tokenizer implements Locator { break stateloop; } c = checkChar(buf, pos); - if (c == '\u0000') { - break stateloop; - } entCol++; /* * Consume the maximum number of characters possible, -- cgit v1.2.3