summaryrefslogtreecommitdiffstats
path: root/parser
diff options
context:
space:
mode:
Diffstat (limited to 'parser')
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java7
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java40
2 files changed, 39 insertions, 8 deletions
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java
index 0f9940420..e772e7e70 100644
--- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java
@@ -223,6 +223,8 @@ public final class ElementName
public static final ElementName ISINDEX = new ElementName("isindex", "isindex", TreeBuilder.OTHER);
// ]NOCPP]
+ public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML);
+
// START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate
// /**
@@ -261,6 +263,9 @@ public final class ElementName
// for (int i = 0; i < name.length(); i++) {
// char c = name.charAt(i);
// if (c == '-') {
+// // if (!"annotation-xml".equals(name)) {
+// // throw new RuntimeException("Non-annotation-xml element name with hyphen: " + name);
+// // }
// buf[i] = '_';
// } else if (c >= '0' && c <= '9') {
// buf[i] = c;
@@ -702,7 +707,6 @@ public final class ElementName
public static final ElementName TBREAK = new ElementName("tbreak", "tbreak", TreeBuilder.OTHER);
public static final ElementName TRACK = new ElementName("track", "track", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL);
public static final ElementName DL = new ElementName("dl", "dl", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL);
- public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML);
public static final ElementName CSYMBOL = new ElementName("csymbol", "csymbol", TreeBuilder.OTHER);
public static final ElementName CURL = new ElementName("curl", "curl", TreeBuilder.OTHER);
public static final ElementName FACTORIAL = new ElementName("factorial", "factorial", TreeBuilder.OTHER);
@@ -1076,7 +1080,6 @@ public final class ElementName
FONT_FACE_URI,
PI,
MASK,
- ANNOTATION_XML,
FORALL,
LABEL,
SYMBOL,
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
index 435f071e9..c0ab7378e 100644
--- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
+++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java
@@ -417,6 +417,12 @@ public class Tokenizer implements Locator {
protected boolean endTag;
/**
+ * <code>true</code> iff the current element/attribute name contains
+ * a hyphen.
+ */
+ private boolean containsHyphen;
+
+ /**
* The current tag token name. One of
* 1) null,
* 2) non-owning reference to nonInternedTagName
@@ -529,6 +535,7 @@ public class Tokenizer implements Locator {
this.charRefBuf = new char[32];
this.bmpChar = new char[1];
this.astralChar = new char[2];
+ this.containsHyphen = false;
this.tagName = null;
this.nonInternedTagName = new ElementName();
this.attributeName = null;
@@ -560,6 +567,7 @@ public class Tokenizer implements Locator {
this.charRefBuf = new char[32];
this.bmpChar = new char[1];
this.astralChar = new char[2];
+ this.containsHyphen = false;
this.tagName = null;
this.nonInternedTagName = new ElementName();
this.attributeName = null;
@@ -1125,13 +1133,26 @@ public class Tokenizer implements Locator {
}
private void strBufToElementNameString() {
- tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
- interner);
- if (tagName == null) {
- nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
- interner));
- tagName = nonInternedTagName;
+ if (containsHyphen) {
+ // We've got a custom element or annotation-xml.
+ @Local String annotationName = ElementName.ANNOTATION_XML.getName();
+ if (Portability.localEqualsBuffer(annotationName, strBuf, 0, strBufLen)) {
+ tagName = ElementName.ANNOTATION_XML;
+ } else {
+ nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
+ interner));
+ tagName = nonInternedTagName;
+ }
+ } else {
+ tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
+ interner);
+ if (tagName == null) {
+ nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
+ interner));
+ tagName = nonInternedTagName;
+ }
}
+ containsHyphen = false;
clearStrBufAfterUse();
}
@@ -1585,6 +1606,7 @@ public class Tokenizer implements Locator {
*/
clearStrBufBeforeUse();
appendStrBuf((char) (c + 0x20));
+ containsHyphen = false;
/* then switch to the tag name state. */
state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
/*
@@ -1605,6 +1627,7 @@ public class Tokenizer implements Locator {
*/
clearStrBufBeforeUse();
appendStrBuf(c);
+ containsHyphen = false;
/* then switch to the tag name state. */
state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
/*
@@ -1748,6 +1771,8 @@ public class Tokenizer implements Locator {
* tag name.
*/
c += 0x20;
+ } else if (c == '-') {
+ containsHyphen = true;
}
/*
* Anything else Append the current input
@@ -1755,6 +1780,7 @@ public class Tokenizer implements Locator {
* name.
*/
appendStrBuf(c);
+ containsHyphen = false;
/*
* Stay in the tag name state.
*/
@@ -6752,6 +6778,7 @@ public class Tokenizer implements Locator {
endTag = false;
shouldSuspend = false;
initDoctypeFields();
+ containsHyphen = false;
tagName = null;
attributeName = null;
if (newAttributesEachTime) {
@@ -6813,6 +6840,7 @@ public class Tokenizer implements Locator {
publicIdentifier = Portability.newStringFromString(other.publicIdentifier);
}
+ containsHyphen = other.containsHyphen;
if (other.tagName == null) {
tagName = null;
} else if (other.tagName.isInterned()) {