summaryrefslogtreecommitdiffstats
path: root/parser/htmlparser
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-04-17 06:36:20 -0400
committerMatt A. Tobin <email@mattatobin.com>2020-04-17 06:36:20 -0400
commit846e0fa4f5aca0e75e37c421689d69bc58a2d3f6 (patch)
tree2e8fe86f85b6d9538d5e0ab70ef67e840a2b2f1a /parser/htmlparser
parent5f12940329ba496da5730863cae94cd8c0b145da (diff)
downloadUXP-846e0fa4f5aca0e75e37c421689d69bc58a2d3f6.tar
UXP-846e0fa4f5aca0e75e37c421689d69bc58a2d3f6.tar.gz
UXP-846e0fa4f5aca0e75e37c421689d69bc58a2d3f6.tar.lz
UXP-846e0fa4f5aca0e75e37c421689d69bc58a2d3f6.tar.xz
UXP-846e0fa4f5aca0e75e37c421689d69bc58a2d3f6.zip
Bug 1401097 - Simplify gHTMLElements and remove redundant conditions in nsXHTMLContentSerializer::LineBreakAfterClose()
Tag #1375
Diffstat (limited to 'parser/htmlparser')
-rw-r--r--parser/htmlparser/nsElementTable.cpp369
1 files changed, 166 insertions, 203 deletions
diff --git a/parser/htmlparser/nsElementTable.cpp b/parser/htmlparser/nsElementTable.cpp
index f8522c572..b7742e56f 100644
--- a/parser/htmlparser/nsElementTable.cpp
+++ b/parser/htmlparser/nsElementTable.cpp
@@ -6,233 +6,196 @@
#include "nsElementTable.h"
-static const int kNone= 0x0;
-
-static const int kHTMLContent = 0x0001; // HEAD, (FRAMESET | BODY)
-static const int kHeadContent = 0x0002; // Elements that *must* be in the head.
-static const int kHeadMisc = 0x0004; // Elements that *can* be in the head.
-
-static const int kSpecial = 0x0008; // A, IMG, APPLET, OBJECT, FONT, BASEFONT, BR, SCRIPT,
- // MAP, Q, SUB, SUP, SPAN, BDO, IFRAME
-
-static const int kFormControl = 0x0010; // INPUT SELECT TEXTAREA LABEL BUTTON
-static const int kPreformatted = 0x0020; // PRE
-static const int kPreExclusion = 0x0040; // IMG, OBJECT, APPLET, BIG, SMALL, SUB, SUP, FONT, BASEFONT
-static const int kFontStyle = 0x0080; // TT, I, B, U, S, STRIKE, BIG, SMALL
-static const int kPhrase = 0x0100; // EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, ABBR, ACRONYM
-static const int kHeading = 0x0200; // H1..H6
-static const int kBlockMisc = 0x0400; // OBJECT, SCRIPT
-static const int kBlock = 0x0800; // ADDRESS, BLOCKQUOTE, CENTER, DIV, DL, FIELDSET, FORM,
- // ISINDEX, HR, NOSCRIPT, NOFRAMES, P, TABLE
-static const int kList = 0x1000; // UL, OL, DIR, MENU
-static const int kPCDATA = 0x2000; // plain text and entities...
-static const int kSelf = 0x4000; // whatever THIS tag is...
-static const int kExtensions = 0x8000; // BGSOUND, WBR, NOBR
-static const int kTable = 0x10000;// TR,TD,THEAD,TBODY,TFOOT,CAPTION,TH
-static const int kDLChild = 0x20000;// DL, DT
-static const int kCDATA = 0x40000;// just plain text...
-
-static const int kInlineEntity = (kPCDATA|kFontStyle|kPhrase|kSpecial|kFormControl|kExtensions); // #PCDATA, %fontstyle, %phrase, %special, %formctrl
-static const int kBlockEntity = (kHeading|kList|kPreformatted|kBlock); // %heading, %list, %preformatted, %block
-static const int kFlowEntity = (kBlockEntity|kInlineEntity); // %blockentity, %inlineentity
-static const int kAllTags = 0xffffff;
-
-// Is aTest a member of aBitset?
-static bool
-TestBits(int32_t aBitset, int32_t aTest)
-{
- if (aTest) {
- int32_t result = aBitset & aTest;
- return result == aTest;
- }
- return false;
-}
-
struct HTMLElement
{
- bool IsMemberOf(int32_t aBitset) const
- {
- return TestBits(aBitset, mParentBits);
- }
-
#ifdef DEBUG
nsHTMLTag mTagID;
#endif
- int mParentBits; // defines groups that can contain this element
- bool mLeaf;
+ bool mIsBlock;
+ bool mIsContainer;
};
#ifdef DEBUG
-#define ELEM(tag, parent, leaf) { eHTMLTag_##tag, parent, leaf },
+#define ELEM(tag, block, container) { eHTMLTag_##tag, block, container },
#else
-#define ELEM(tag, parent, leaf) { parent, leaf },
+#define ELEM(tag, block, container) { block, container },
#endif
+#define ____ false // This makes the table easier to read.
+
+// Note that the mIsBlock field disagrees with
+// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements for
+// the following elements: center, details, dialog, dir, dt, figcaption,
+// listing, menu, multicol, noscript, output, summary, tfoot, video.
+//
+// mrbkap thinks that the field values were pulled from the old HTML4 DTD and
+// then got modified in mostly random ways to make the old parser's behavior
+// compatible with the web. So it might make sense to change the mIsBlock
+// values for the abovementioned tags at some point.
+//
+
static const HTMLElement gHTMLElements[] = {
- ELEM(unknown, kNone, true)
- ELEM(a, kSpecial, false)
- ELEM(abbr, kPhrase, false)
- ELEM(acronym, kPhrase, false)
- ELEM(address, kBlock, false)
- ELEM(applet, kSpecial, false)
- ELEM(area, kNone, true)
- ELEM(article, kBlock, false)
- ELEM(aside, kBlock, false)
- ELEM(audio, kSpecial, false)
- ELEM(b, kFontStyle, false)
- ELEM(base, kHeadContent, true)
- ELEM(basefont, kSpecial, true)
- ELEM(bdo, kSpecial, false)
- ELEM(bgsound, (kFlowEntity|kHeadMisc), true)
- ELEM(big, kFontStyle, false)
- ELEM(blockquote, kBlock, false)
- ELEM(body, kHTMLContent, false)
- ELEM(br, kSpecial, true)
- ELEM(button, kFormControl, false)
- ELEM(canvas, kSpecial, false)
- ELEM(caption, kNone, false)
- ELEM(center, kBlock, false)
- ELEM(cite, kPhrase, false)
- ELEM(code, kPhrase, false)
- ELEM(col, kNone, true)
- ELEM(colgroup, kNone, false)
- ELEM(content, kNone, false)
- ELEM(data, kPhrase, false)
- ELEM(datalist, kSpecial, false)
- ELEM(dd, kInlineEntity, false)
- ELEM(del, kFlowEntity, false)
- ELEM(details, kBlock, false)
- ELEM(dfn, kPhrase, false)
- ELEM(dialog, kBlock, false)
- ELEM(dir, kList, false)
- ELEM(div, kBlock, false)
- ELEM(dl, kBlock, false)
- ELEM(dt, kInlineEntity, false)
- ELEM(em, kPhrase, false)
- ELEM(embed, kSpecial, true)
- ELEM(fieldset, kBlock, false)
- ELEM(figcaption, kPhrase, false)
- ELEM(figure, kBlock, false)
- ELEM(font, kFontStyle, false)
- ELEM(footer, kBlock, false)
- ELEM(form, kBlock, false)
- ELEM(frame, kNone, true)
- ELEM(frameset, kHTMLContent, false)
- ELEM(h1, kHeading, false)
- ELEM(h2, kHeading, false)
- ELEM(h3, kHeading, false)
- ELEM(h4, kHeading, false)
- ELEM(h5, kHeading, false)
- ELEM(h6, kHeading, false)
- ELEM(head, kHTMLContent, false)
- ELEM(header, kBlock, false)
- ELEM(hgroup, kBlock, false)
- ELEM(hr, kBlock, true)
- ELEM(html, kNone, false)
- ELEM(i, kFontStyle, false)
- ELEM(iframe, kSpecial, false)
- ELEM(image, kSpecial, true)
- ELEM(img, kSpecial, true)
- ELEM(input, kFormControl, true)
- ELEM(ins, kFlowEntity, false)
- ELEM(kbd, kPhrase, false)
- ELEM(keygen, kFlowEntity, true)
- ELEM(label, kFormControl, false)
- ELEM(legend, kNone, false)
- ELEM(li, kBlockEntity, false)
- ELEM(link, kAllTags - kHeadContent, true)
- ELEM(listing, kPreformatted, false)
- ELEM(main, kBlock, false)
- ELEM(map, kSpecial, false)
- ELEM(mark, kSpecial, false)
- ELEM(marquee, kSpecial, false)
- ELEM(menu, kList, false)
- ELEM(menuitem, kFlowEntity, false)
- ELEM(meta, kHeadContent, true)
- ELEM(meter, kFormControl, false)
- ELEM(multicol, kBlock, false)
- ELEM(nav, kBlock, false)
- ELEM(nobr, kExtensions, false)
- ELEM(noembed, kFlowEntity, false)
- ELEM(noframes, kFlowEntity, false)
- ELEM(noscript, kFlowEntity|kHeadMisc, false)
- ELEM(object, kSpecial, false)
- ELEM(ol, kList, false)
- ELEM(optgroup, kNone, false)
- ELEM(option, kNone, false)
- ELEM(output, kSpecial, false)
- ELEM(p, kBlock, false)
- ELEM(param, kSpecial, true)
- ELEM(picture, kSpecial, false)
- ELEM(plaintext, kExtensions, false)
- ELEM(pre, kBlock|kPreformatted, false)
- ELEM(progress, kFormControl, false)
- ELEM(q, kSpecial, false)
- ELEM(rb, kPhrase, false)
- ELEM(rp, kPhrase, false)
- ELEM(rt, kPhrase, false)
- ELEM(rtc, kPhrase, false)
- ELEM(ruby, kPhrase, false)
- ELEM(s, kFontStyle, false)
- ELEM(samp, kPhrase, false)
- ELEM(script, (kSpecial|kHeadContent), false)
- ELEM(section, kBlock, false)
- ELEM(select, kFormControl, false)
- ELEM(small, kFontStyle, false)
- ELEM(source, kSpecial, true)
- ELEM(span, kSpecial, false)
- ELEM(strike, kFontStyle, false)
- ELEM(strong, kPhrase, false)
- ELEM(style, kAllTags - kHeadContent, false)
- ELEM(sub, kSpecial, false)
- ELEM(summary, kBlock, false)
- ELEM(sup, kSpecial, false)
- ELEM(table, kBlock, false)
- ELEM(tbody, kNone, false)
- ELEM(td, kNone, false)
- ELEM(textarea, kFormControl, false)
- ELEM(tfoot, kNone, false)
- ELEM(th, kNone, false)
- ELEM(thead, kNone, false)
- ELEM(template, kNone, false)
- ELEM(time, kPhrase, false)
- ELEM(title, kHeadContent, false)
- ELEM(tr, kNone, false)
- ELEM(track, kSpecial, true)
- ELEM(tt, kFontStyle, false)
- ELEM(u, kFontStyle, false)
- ELEM(ul, kList, false)
- ELEM(var, kPhrase, false)
- ELEM(video, kSpecial, false)
- ELEM(wbr, kExtensions, true)
- ELEM(xmp, kInlineEntity|kPreformatted, false)
- ELEM(text, kFlowEntity, true)
- ELEM(whitespace, kFlowEntity|kHeadMisc, true)
- ELEM(newline, kFlowEntity|kHeadMisc, true)
- ELEM(comment, kFlowEntity|kHeadMisc, false)
- ELEM(entity, kFlowEntity, false)
- ELEM(doctypeDecl, kFlowEntity, false)
- ELEM(markupDecl, kFlowEntity, false)
- ELEM(instruction, kFlowEntity, false)
- ELEM(userdefined, (kFlowEntity|kHeadMisc), false)
+ ELEM(unknown, ____, ____)
+ ELEM(a, ____, true)
+ ELEM(abbr, ____, true)
+ ELEM(acronym, ____, true)
+ ELEM(address, true, true)
+ ELEM(applet, ____, true)
+ ELEM(area, ____, ____)
+ ELEM(article, true, true)
+ ELEM(aside, true, true)
+ ELEM(audio, ____, true)
+ ELEM(b, ____, true)
+ ELEM(base, ____, ____)
+ ELEM(basefont, ____, ____)
+ ELEM(bdo, ____, true)
+ ELEM(bgsound, ____, ____)
+ ELEM(big, ____, true)
+ ELEM(blockquote, true, true)
+ ELEM(body, ____, true)
+ ELEM(br, ____, ____)
+ ELEM(button, ____, true)
+ ELEM(canvas, ____, true)
+ ELEM(caption, ____, true)
+ ELEM(center, true, true)
+ ELEM(cite, ____, true)
+ ELEM(code, ____, true)
+ ELEM(col, ____, ____)
+ ELEM(colgroup, ____, true)
+ ELEM(content, ____, true)
+ ELEM(data, ____, true)
+ ELEM(datalist, ____, true)
+ ELEM(dd, ____, true)
+ ELEM(del, ____, true)
+ ELEM(details, true, true)
+ ELEM(dfn, ____, true)
+ ELEM(dialog, true, true)
+ ELEM(dir, true, true)
+ ELEM(div, true, true)
+ ELEM(dl, true, true)
+ ELEM(dt, ____, true)
+ ELEM(em, ____, true)
+ ELEM(embed, ____, ____)
+ ELEM(fieldset, true, true)
+ ELEM(figcaption, ____, true)
+ ELEM(figure, true, true)
+ ELEM(font, ____, true)
+ ELEM(footer, true, true)
+ ELEM(form, true, true)
+ ELEM(frame, ____, ____)
+ ELEM(frameset, ____, true)
+ ELEM(h1, true, true)
+ ELEM(h2, true, true)
+ ELEM(h3, true, true)
+ ELEM(h4, true, true)
+ ELEM(h5, true, true)
+ ELEM(h6, true, true)
+ ELEM(head, ____, true)
+ ELEM(header, true, true)
+ ELEM(hgroup, true, true)
+ ELEM(hr, true, ____)
+ ELEM(html, ____, true)
+ ELEM(i, ____, true)
+ ELEM(iframe, ____, true)
+ ELEM(image, ____, ____)
+ ELEM(img, ____, ____)
+ ELEM(input, ____, ____)
+ ELEM(ins, ____, true)
+ ELEM(kbd, ____, true)
+ ELEM(keygen, ____, ____)
+ ELEM(label, ____, true)
+ ELEM(legend, ____, true)
+ ELEM(li, true, true)
+ ELEM(link, ____, ____)
+ ELEM(listing, true, true)
+ ELEM(main, true, true)
+ ELEM(map, ____, true)
+ ELEM(mark, ____, true)
+ ELEM(marquee, ____, true)
+ ELEM(menu, true, true)
+ ELEM(menuitem, ____, true)
+ ELEM(meta, ____, ____)
+ ELEM(meter, ____, true)
+ ELEM(multicol, true, true)
+ ELEM(nav, true, true)
+ ELEM(nobr, ____, true)
+ ELEM(noembed, ____, true)
+ ELEM(noframes, ____, true)
+ ELEM(noscript, ____, true)
+ ELEM(object, ____, true)
+ ELEM(ol, true, true)
+ ELEM(optgroup, ____, true)
+ ELEM(option, ____, true)
+ ELEM(output, ____, true)
+ ELEM(p, true, true)
+ ELEM(param, ____, ____)
+ ELEM(picture, ____, true)
+ ELEM(plaintext, ____, true)
+ ELEM(pre, true, true)
+ ELEM(progress, ____, true)
+ ELEM(q, ____, true)
+ ELEM(rb, ____, true)
+ ELEM(rp, ____, true)
+ ELEM(rt, ____, true)
+ ELEM(rtc, ____, true)
+ ELEM(ruby, ____, true)
+ ELEM(s, ____, true)
+ ELEM(samp, ____, true)
+ ELEM(script, ____, true)
+ ELEM(section, true, true)
+ ELEM(select, ____, true)
+ ELEM(small, ____, true)
+ ELEM(source, ____, ____)
+ ELEM(span, ____, true)
+ ELEM(strike, ____, true)
+ ELEM(strong, ____, true)
+ ELEM(style, ____, true)
+ ELEM(sub, ____, true)
+ ELEM(summary, true, true)
+ ELEM(sup, ____, true)
+ ELEM(table, true, true)
+ ELEM(tbody, ____, true)
+ ELEM(td, ____, true)
+ ELEM(textarea, ____, true)
+ ELEM(tfoot, ____, true)
+ ELEM(th, ____, true)
+ ELEM(thead, ____, true)
+ ELEM(template, ____, true)
+ ELEM(time, ____, true)
+ ELEM(title, ____, true)
+ ELEM(tr, ____, true)
+ ELEM(track, ____, ____)
+ ELEM(tt, ____, true)
+ ELEM(u, ____, true)
+ ELEM(ul, true, true)
+ ELEM(var, ____, true)
+ ELEM(video, ____, true)
+ ELEM(wbr, ____, ____)
+ ELEM(xmp, ____, true)
+ ELEM(text, ____, ____)
+ ELEM(whitespace, ____, ____)
+ ELEM(newline, ____, ____)
+ ELEM(comment, ____, true)
+ ELEM(entity, ____, true)
+ ELEM(doctypeDecl, ____, true)
+ ELEM(markupDecl, ____, true)
+ ELEM(instruction, ____, true)
+ ELEM(userdefined, ____, true)
};
#undef ELEM
+#undef ____
bool
nsHTMLElement::IsContainer(nsHTMLTag aId)
{
- return !gHTMLElements[aId].mLeaf;
+ return gHTMLElements[aId].mIsContainer;
}
bool
nsHTMLElement::IsBlock(nsHTMLTag aId)
{
- return gHTMLElements[aId].IsMemberOf(kBlock) ||
- gHTMLElements[aId].IsMemberOf(kBlockEntity) ||
- gHTMLElements[aId].IsMemberOf(kHeading) ||
- gHTMLElements[aId].IsMemberOf(kPreformatted)||
- gHTMLElements[aId].IsMemberOf(kList);
+ return gHTMLElements[aId].mIsBlock;
}
#ifdef DEBUG