diff options
Diffstat (limited to 'parser')
-rw-r--r-- | parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java | 7 | ||||
-rw-r--r-- | parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java | 40 |
2 files changed, 39 insertions, 8 deletions
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java index 0f9940420..e772e7e70 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java @@ -223,6 +223,8 @@ public final class ElementName public static final ElementName ISINDEX = new ElementName("isindex", "isindex", TreeBuilder.OTHER); // ]NOCPP] + public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML); + // START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate // /** @@ -261,6 +263,9 @@ public final class ElementName // for (int i = 0; i < name.length(); i++) { // char c = name.charAt(i); // if (c == '-') { +// // if (!"annotation-xml".equals(name)) { +// // throw new RuntimeException("Non-annotation-xml element name with hyphen: " + name); +// // } // buf[i] = '_'; // } else if (c >= '0' && c <= '9') { // buf[i] = c; @@ -702,7 +707,6 @@ public final class ElementName public static final ElementName TBREAK = new ElementName("tbreak", "tbreak", TreeBuilder.OTHER); public static final ElementName TRACK = new ElementName("track", "track", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL); public static final ElementName DL = new ElementName("dl", "dl", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL); - public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML); public static final ElementName CSYMBOL = new ElementName("csymbol", "csymbol", TreeBuilder.OTHER); public static final ElementName CURL = new ElementName("curl", "curl", TreeBuilder.OTHER); public static final ElementName FACTORIAL = new ElementName("factorial", "factorial", TreeBuilder.OTHER); @@ -1076,7 +1080,6 @@ public final class ElementName FONT_FACE_URI, PI, MASK, - ANNOTATION_XML, FORALL, LABEL, SYMBOL, diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java index 435f071e9..c0ab7378e 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -417,6 +417,12 @@ public class Tokenizer implements Locator { protected boolean endTag; /** + * <code>true</code> iff the current element/attribute name contains + * a hyphen. + */ + private boolean containsHyphen; + + /** * The current tag token name. One of * 1) null, * 2) non-owning reference to nonInternedTagName @@ -529,6 +535,7 @@ public class Tokenizer implements Locator { this.charRefBuf = new char[32]; this.bmpChar = new char[1]; this.astralChar = new char[2]; + this.containsHyphen = false; this.tagName = null; this.nonInternedTagName = new ElementName(); this.attributeName = null; @@ -560,6 +567,7 @@ public class Tokenizer implements Locator { this.charRefBuf = new char[32]; this.bmpChar = new char[1]; this.astralChar = new char[2]; + this.containsHyphen = false; this.tagName = null; this.nonInternedTagName = new ElementName(); this.attributeName = null; @@ -1125,13 +1133,26 @@ public class Tokenizer implements Locator { } private void strBufToElementNameString() { - tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen, - interner); - if (tagName == null) { - nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen, - interner)); - tagName = nonInternedTagName; + if (containsHyphen) { + // We've got a custom element or annotation-xml. + @Local String annotationName = ElementName.ANNOTATION_XML.getName(); + if (Portability.localEqualsBuffer(annotationName, strBuf, 0, strBufLen)) { + tagName = ElementName.ANNOTATION_XML; + } else { + nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen, + interner)); + tagName = nonInternedTagName; + } + } else { + tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen, + interner); + if (tagName == null) { + nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen, + interner)); + tagName = nonInternedTagName; + } } + containsHyphen = false; clearStrBufAfterUse(); } @@ -1585,6 +1606,7 @@ public class Tokenizer implements Locator { */ clearStrBufBeforeUse(); appendStrBuf((char) (c + 0x20)); + containsHyphen = false; /* then switch to the tag name state. */ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); /* @@ -1605,6 +1627,7 @@ public class Tokenizer implements Locator { */ clearStrBufBeforeUse(); appendStrBuf(c); + containsHyphen = false; /* then switch to the tag name state. */ state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); /* @@ -1748,6 +1771,8 @@ public class Tokenizer implements Locator { * tag name. */ c += 0x20; + } else if (c == '-') { + containsHyphen = true; } /* * Anything else Append the current input @@ -1755,6 +1780,7 @@ public class Tokenizer implements Locator { * name. */ appendStrBuf(c); + containsHyphen = false; /* * Stay in the tag name state. */ @@ -6752,6 +6778,7 @@ public class Tokenizer implements Locator { endTag = false; shouldSuspend = false; initDoctypeFields(); + containsHyphen = false; tagName = null; attributeName = null; if (newAttributesEachTime) { @@ -6813,6 +6840,7 @@ public class Tokenizer implements Locator { publicIdentifier = Portability.newStringFromString(other.publicIdentifier); } + containsHyphen = other.containsHyphen; if (other.tagName == null) { tagName = null; } else if (other.tagName.isInterned()) { |