From 56925e1f4ad0024a1883bf80e7ad3a85b8b0fd86 Mon Sep 17 00:00:00 2001 From: Gaming4JC Date: Sat, 18 Jan 2020 12:22:37 -0500 Subject: Bug 1355493 - Tweak bufToHash() and reduce the number of pre-interned elements. Tag UXP Issue #1344 --- .../validator/htmlparser/impl/AttributeName.java | 64 +++++++++++++++------- .../nu/validator/htmlparser/impl/ElementName.java | 55 +++++++++++++------ 2 files changed, 83 insertions(+), 36 deletions(-) (limited to 'parser/html/java') diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java index 9cab8c3d0..b699bcf8e 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2011 Mozilla Foundation + * Copyright (c) 2008-2017 Mozilla Foundation * Copyright (c) 2018-2020 Moonchild Productions * Copyright (c) 2020 Binary Outcast * @@ -26,6 +26,7 @@ package nu.validator.htmlparser.impl; import java.util.Arrays; +import nu.validator.htmlparser.annotation.Inline; import nu.validator.htmlparser.annotation.Local; import nu.validator.htmlparser.annotation.NoLength; import nu.validator.htmlparser.annotation.NsUri; @@ -306,27 +307,43 @@ public final class AttributeName } /** - * This method has to return a unique integer for each well-known + * This method has to return a unique positive integer for each well-known * lower-cased attribute name. * * @param buf * @param len * @return */ - private static @Unsigned int bufToHash(@NoLength char[] buf, int len) { - @Unsigned int hash2 = 0; - @Unsigned int hash = len; - hash <<= 5; - hash += buf[0] - 0x60; - int j = len; - for (int i = 0; i < 4 && j > 0; i++) { - j--; - hash <<= 5; - hash += buf[j] - 0x60; - hash2 <<= 6; - hash2 += buf[i] - 0x5F; + @Inline private static @Unsigned int bufToHash(@NoLength char[] buf, int length) { + @Unsigned int len = length; + @Unsigned int first = buf[0]; + first <<= 19; + @Unsigned int second = 1 << 23; + @Unsigned int third = 0; + @Unsigned int fourth = 0; + @Unsigned int fifth = 0; + @Unsigned int sixth = 0; + if (length >= 4) { + second = buf[length - 4]; + second <<= 4; + third = buf[1]; + third <<= 9; + fourth = buf[length - 2]; + fourth <<= 14; + fifth = buf[3]; + fifth <<= 24; + sixth = buf[length - 1]; + sixth <<= 11; + } else if (length == 3) { + second = buf[1]; + second <<= 4; + third = buf[2]; + third <<= 9; + } else if (length == 2) { + second = buf[1]; + second <<= 24; } - return hash ^ hash2; + return len + first + second + third + fourth + fifth + sixth; } /** @@ -691,13 +708,20 @@ public final class AttributeName // */ // public static void main(String[] args) { // Arrays.sort(ATTRIBUTE_NAMES); -// for (int i = 1; i < ATTRIBUTE_NAMES.length; i++) { -// if (ATTRIBUTE_NAMES[i].hash() == ATTRIBUTE_NAMES[i - 1].hash()) { -// System.err.println("Hash collision: " -// + ATTRIBUTE_NAMES[i].getLocal(HTML) + ", " -// + ATTRIBUTE_NAMES[i - 1].getLocal(HTML)); +// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { +// int hash = ATTRIBUTE_NAMES[i].hash(); +// if (hash < 0) { +// System.err.println("Negative hash: " + ATTRIBUTE_NAMES[i].local[0]); // return; // } +// for (int j = i + 1; j < ATTRIBUTE_NAMES.length; j++) { +// if (hash == ATTRIBUTE_NAMES[j].hash()) { +// System.err.println( +// "Hash collision: " + ATTRIBUTE_NAMES[i].local[0] + ", " +// + ATTRIBUTE_NAMES[j].local[0]); +// return; +// } +// } // } // for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { // AttributeName att = ATTRIBUTE_NAMES[i]; diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java index 39cff44ee..293eaf638 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2016 Mozilla Foundation + * Copyright (c) 2008-2017 Mozilla Foundation * Copyright (c) 2018-2020 Moonchild Productions * Copyright (c) 2020 Binary Outcast * @@ -127,24 +127,40 @@ public final class ElementName } /** - * This method has to return a unique integer for each well-known + * This method has to return a unique positive integer for each well-known * lower-cased element name. * * @param buf * @param len * @return */ - private static @Unsigned int bufToHash(@NoLength char[] buf, int len) { - @Unsigned int hash = len; - hash <<= 5; - hash += buf[0] - 0x60; - int j = len; - for (int i = 0; i < 4 && j > 0; i++) { - j--; - hash <<= 5; - hash += buf[j] - 0x60; + @Inline private static @Unsigned int bufToHash(@NoLength char[] buf, int length) { + @Unsigned int len = length; + @Unsigned int first = buf[0]; + first <<= 19; + @Unsigned int second = 1 << 23; + @Unsigned int third = 0; + @Unsigned int fourth = 0; + @Unsigned int fifth = 0; + if (length >= 4) { + second = buf[length - 4]; + second <<= 4; + third = buf[length - 3]; + third <<= 9; + fourth = buf[length - 2]; + fourth <<= 14; + fifth = buf[length - 1]; + fifth <<= 24; + } else if (length == 3) { + second = buf[1]; + second <<= 4; + third = buf[2]; + third <<= 9; + } else if (length == 2) { + second = buf[1]; + second <<= 24; } - return hash; + return len + first + second + third + fourth + fifth; } private ElementName(@Local String name, @Local String camelCaseName, @@ -386,12 +402,19 @@ public final class ElementName // */ // public static void main(String[] args) { // Arrays.sort(ELEMENT_NAMES); -// for (int i = 1; i < ELEMENT_NAMES.length; i++) { -// if (ELEMENT_NAMES[i].hash() == ELEMENT_NAMES[i - 1].hash()) { -// System.err.println("Hash collision: " + ELEMENT_NAMES[i].name -// + ", " + ELEMENT_NAMES[i - 1].name); +// for (int i = 0; i < ELEMENT_NAMES.length; i++) { +// int hash = ELEMENT_NAMES[i].hash(); +// if (hash < 0) { +// System.err.println("Negative hash: " + ELEMENT_NAMES[i].name); // return; // } +// for (int j = i + 1; j < ELEMENT_NAMES.length; j++) { +// if (hash == ELEMENT_NAMES[j].hash()) { +// System.err.println("Hash collision: " + ELEMENT_NAMES[i].name +// + ", " + ELEMENT_NAMES[j].name); +// return; +// } +// } // } // for (int i = 0; i < ELEMENT_NAMES.length; i++) { // ElementName el = ELEMENT_NAMES[i]; -- cgit v1.2.3