summaryrefslogtreecommitdiffstats
path: root/parser/html/nsHtml5Tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'parser/html/nsHtml5Tokenizer.h')
-rw-r--r--parser/html/nsHtml5Tokenizer.h243
1 files changed, 161 insertions, 82 deletions
diff --git a/parser/html/nsHtml5Tokenizer.h b/parser/html/nsHtml5Tokenizer.h
index 101cd1618..4572cc847 100644
--- a/parser/html/nsHtml5Tokenizer.h
+++ b/parser/html/nsHtml5Tokenizer.h
@@ -51,11 +51,10 @@
class nsHtml5StreamParser;
-class nsHtml5TreeBuilder;
-class nsHtml5MetaScanner;
class nsHtml5AttributeName;
class nsHtml5ElementName;
-class nsHtml5HtmlAttributes;
+class nsHtml5TreeBuilder;
+class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;
@@ -64,6 +63,162 @@ class nsHtml5Portability;
class nsHtml5Tokenizer
{
private:
+ static const int32_t DATA_AND_RCDATA_MASK = ~1;
+
+ public:
+ static const int32_t DATA = 0;
+
+ static const int32_t RCDATA = 1;
+
+ static const int32_t SCRIPT_DATA = 2;
+
+ static const int32_t RAWTEXT = 3;
+
+ static const int32_t SCRIPT_DATA_ESCAPED = 4;
+
+ static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
+
+ static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
+
+ static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7;
+
+ static const int32_t PLAINTEXT = 8;
+
+ static const int32_t TAG_OPEN = 9;
+
+ static const int32_t CLOSE_TAG_OPEN = 10;
+
+ static const int32_t TAG_NAME = 11;
+
+ static const int32_t BEFORE_ATTRIBUTE_NAME = 12;
+
+ static const int32_t ATTRIBUTE_NAME = 13;
+
+ static const int32_t AFTER_ATTRIBUTE_NAME = 14;
+
+ static const int32_t BEFORE_ATTRIBUTE_VALUE = 15;
+
+ static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
+
+ static const int32_t BOGUS_COMMENT = 17;
+
+ static const int32_t MARKUP_DECLARATION_OPEN = 18;
+
+ static const int32_t DOCTYPE = 19;
+
+ static const int32_t BEFORE_DOCTYPE_NAME = 20;
+
+ static const int32_t DOCTYPE_NAME = 21;
+
+ static const int32_t AFTER_DOCTYPE_NAME = 22;
+
+ static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
+
+ static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
+
+ static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
+
+ static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
+
+ static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
+
+ static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
+
+ static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
+
+ static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
+
+ static const int32_t BOGUS_DOCTYPE = 31;
+
+ static const int32_t COMMENT_START = 32;
+
+ static const int32_t COMMENT_START_DASH = 33;
+
+ static const int32_t COMMENT = 34;
+
+ static const int32_t COMMENT_END_DASH = 35;
+
+ static const int32_t COMMENT_END = 36;
+
+ static const int32_t COMMENT_END_BANG = 37;
+
+ static const int32_t NON_DATA_END_TAG_NAME = 38;
+
+ static const int32_t MARKUP_DECLARATION_HYPHEN = 39;
+
+ static const int32_t MARKUP_DECLARATION_OCTYPE = 40;
+
+ static const int32_t DOCTYPE_UBLIC = 41;
+
+ static const int32_t DOCTYPE_YSTEM = 42;
+
+ static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
+
+ static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
+
+ static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
+
+ static const int32_t CONSUME_CHARACTER_REFERENCE = 46;
+
+ static const int32_t CONSUME_NCR = 47;
+
+ static const int32_t CHARACTER_REFERENCE_TAIL = 48;
+
+ static const int32_t HEX_NCR_LOOP = 49;
+
+ static const int32_t DECIMAL_NRC_LOOP = 50;
+
+ static const int32_t HANDLE_NCR_VALUE = 51;
+
+ static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52;
+
+ static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53;
+
+ static const int32_t SELF_CLOSING_START_TAG = 54;
+
+ static const int32_t CDATA_START = 55;
+
+ static const int32_t CDATA_SECTION = 56;
+
+ static const int32_t CDATA_RSQB = 57;
+
+ static const int32_t CDATA_RSQB_RSQB = 58;
+
+ static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59;
+
+ static const int32_t SCRIPT_DATA_ESCAPE_START = 60;
+
+ static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61;
+
+ static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62;
+
+ static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
+
+ static const int32_t BOGUS_COMMENT_HYPHEN = 64;
+
+ static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
+
+ static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
+
+ static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
+
+ static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68;
+
+ static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
+
+ static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
+
+ static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
+
+ static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
+
+ static const int32_t PROCESSING_INSTRUCTION = 73;
+
+ static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
+
+ private:
+ static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
+
static char16_t LT_GT[];
static char16_t LT_SOLIDUS[];
static char16_t RSQB_RSQB[];
@@ -123,10 +278,13 @@ class nsHtml5Tokenizer
protected:
bool endTag;
private:
+ bool containsHyphen;
nsHtml5ElementName* tagName;
+ nsHtml5ElementName* nonInternedTagName;
protected:
nsHtml5AttributeName* attributeName;
private:
+ nsHtml5AttributeName* nonInternedAttributeName;
nsIAtom* doctypeName;
nsHtml5String publicIdentifier;
nsHtml5String systemIdentifier;
@@ -306,84 +464,5 @@ class nsHtml5Tokenizer
#include "nsHtml5TokenizerHSupplement.h"
};
-#define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
-#define NS_HTML5TOKENIZER_DATA 0
-#define NS_HTML5TOKENIZER_RCDATA 1
-#define NS_HTML5TOKENIZER_SCRIPT_DATA 2
-#define NS_HTML5TOKENIZER_RAWTEXT 3
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
-#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
-#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
-#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
-#define NS_HTML5TOKENIZER_PLAINTEXT 8
-#define NS_HTML5TOKENIZER_TAG_OPEN 9
-#define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
-#define NS_HTML5TOKENIZER_TAG_NAME 11
-#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
-#define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
-#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
-#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
-#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
-#define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
-#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
-#define NS_HTML5TOKENIZER_DOCTYPE 19
-#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
-#define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
-#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
-#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
-#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
-#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
-#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
-#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
-#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
-#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
-#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
-#define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
-#define NS_HTML5TOKENIZER_COMMENT_START 32
-#define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
-#define NS_HTML5TOKENIZER_COMMENT 34
-#define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
-#define NS_HTML5TOKENIZER_COMMENT_END 36
-#define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
-#define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
-#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
-#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
-#define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
-#define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
-#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
-#define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
-#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
-#define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
-#define NS_HTML5TOKENIZER_CONSUME_NCR 47
-#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
-#define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
-#define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
-#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
-#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
-#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
-#define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
-#define NS_HTML5TOKENIZER_CDATA_START 55
-#define NS_HTML5TOKENIZER_CDATA_SECTION 56
-#define NS_HTML5TOKENIZER_CDATA_RSQB 57
-#define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
-#define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
-#define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
-#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
-#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
-#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
-#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
-
-
#endif