From e17a17766ede7e44cbc1050b35268c38af8602ae Mon Sep 17 00:00:00 2001 From: Gaming4JC Date: Sat, 18 Jan 2020 17:24:00 -0500 Subject: Bug 1366241 - Change memory layout of element name and attribute name hashes from sorted to level order BST in order to take advantage of cache during lookup. Tag UXP Issue #1344 --- .../validator/htmlparser/impl/AttributeName.java | 59 ++++++++++++++++++++-- .../nu/validator/htmlparser/impl/ElementName.java | 59 ++++++++++++++++++++-- 2 files changed, 108 insertions(+), 10 deletions(-) diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java index 667e8eb93..83ff7ec31 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java @@ -25,6 +25,9 @@ package nu.validator.htmlparser.impl; import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; import nu.validator.htmlparser.annotation.Inline; import nu.validator.htmlparser.annotation.Local; @@ -254,6 +257,24 @@ public final class AttributeName return arr; } + @Inline static int levelOrderBinarySearch(int[] data, int key) { + int n = data.length; + int i = 0; + + while (i < n) { + int val = data[i]; + if (val < key) { + i = 2 * i + 2; + } else if (val > key) { + i = 2 * i + 1; + } else { + return i; + } + } + + return -1; + } + /** * Returns an attribute name by buffer. * @@ -277,7 +298,9 @@ public final class AttributeName , Interner interner) { // XXX deal with offset @Unsigned int hash = AttributeName.bufToHash(buf, length); - int index = Arrays.binarySearch(AttributeName.ATTRIBUTE_HASHES, hash); + int[] hashes; + hashes = AttributeName.ATTRIBUTE_HASHES; + int index = levelOrderBinarySearch(hashes, hash); if (index < 0) { return null; } @@ -685,6 +708,25 @@ public final class AttributeName // return bufToHash(name.toCharArray(), name.length()); // } // +// private static void fillLevelOrderArray(List sorted, int depth, +// int rootIdx, AttributeName[] levelOrder) { +// if (rootIdx >= levelOrder.length) { +// return; +// } +// +// if (depth > 0) { +// fillLevelOrderArray(sorted, depth - 1, rootIdx * 2 + 1, levelOrder); +// } +// +// if (!sorted.isEmpty()) { +// levelOrder[rootIdx] = sorted.remove(0); +// } +// +// if (depth > 0) { +// fillLevelOrderArray(sorted, depth - 1, rootIdx * 2 + 2, levelOrder); +// } +// } +// // /** // * Regenerate self // * @@ -713,15 +755,22 @@ public final class AttributeName // + att.constName() + " = new AttributeName" + att.toString() // + ";"); // } +// +// LinkedList sortedNames = new LinkedList(); +// Collections.addAll(sortedNames, ATTRIBUTE_NAMES); +// AttributeName[] levelOrder = new AttributeName[ATTRIBUTE_NAMES.length]; +// int bstDepth = (int) Math.ceil(Math.log(ATTRIBUTE_NAMES.length) / Math.log(2)); +// fillLevelOrderArray(sortedNames, bstDepth, 0, levelOrder); +// // System.out.println("private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = {"); -// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { -// AttributeName att = ATTRIBUTE_NAMES[i]; +// for (int i = 0; i < levelOrder.length; i++) { +// AttributeName att = levelOrder[i]; // System.out.println(att.constName() + ","); // } // System.out.println("};"); // System.out.println("private final static int[] ATTRIBUTE_HASHES = {"); -// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { -// AttributeName att = ATTRIBUTE_NAMES[i]; +// for (int i = 0; i < levelOrder.length; i++) { +// AttributeName att = levelOrder[i]; // System.out.println(Integer.toString(att.hash()) + ","); // } // System.out.println("};"); diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java index cc363acde..46b7a901e 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java @@ -25,6 +25,9 @@ package nu.validator.htmlparser.impl; import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; import nu.validator.htmlparser.annotation.Inline; import nu.validator.htmlparser.annotation.Local; @@ -115,9 +118,29 @@ public final class ElementName return (flags & ElementName.NOT_INTERNED) == 0; } + @Inline static int levelOrderBinarySearch(int[] data, int key) { + int n = data.length; + int i = 0; + + while (i < n) { + int val = data[i]; + if (val < key) { + i = 2 * i + 2; + } else if (val > key) { + i = 2 * i + 1; + } else { + return i; + } + } + + return -1; + } + @Inline static ElementName elementNameByBuffer(@NoLength char[] buf, int offset, int length, Interner interner) { @Unsigned int hash = ElementName.bufToHash(buf, length); - int index = Arrays.binarySearch(ElementName.ELEMENT_HASHES, hash); + int[] hashes; + hashes = ElementName.ELEMENT_HASHES; + int index = levelOrderBinarySearch(hashes, hash); if (index < 0) { return null; } else { @@ -406,6 +429,25 @@ public final class ElementName // return null; // } // +// private static void fillLevelOrderArray(List sorted, int depth, +// int rootIdx, ElementName[] levelOrder) { +// if (rootIdx >= levelOrder.length) { +// return; +// } +// +// if (depth > 0) { +// fillLevelOrderArray(sorted, depth - 1, rootIdx * 2 + 1, levelOrder); +// } +// +// if (!sorted.isEmpty()) { +// levelOrder[rootIdx] = sorted.remove(0); +// } +// +// if (depth > 0) { +// fillLevelOrderArray(sorted, depth - 1, rootIdx * 2 + 2, levelOrder); +// } +// } +// // /** // * Regenerate self // * @@ -436,15 +478,22 @@ public final class ElementName // + el.constName() + " = new ElementName" + el.toString() // + ";"); // } +// +// LinkedList sortedNames = new LinkedList(); +// Collections.addAll(sortedNames, ELEMENT_NAMES); +// ElementName[] levelOrder = new ElementName[ELEMENT_NAMES.length]; +// int bstDepth = (int) Math.ceil(Math.log(ELEMENT_NAMES.length) / Math.log(2)); +// fillLevelOrderArray(sortedNames, bstDepth, 0, levelOrder); +// // System.out.println("private final static @NoLength ElementName[] ELEMENT_NAMES = {"); -// for (int i = 0; i < ELEMENT_NAMES.length; i++) { -// ElementName el = ELEMENT_NAMES[i]; +// for (int i = 0; i < levelOrder.length; i++) { +// ElementName el = levelOrder[i]; // System.out.println(el.constName() + ","); // } // System.out.println("};"); // System.out.println("private final static int[] ELEMENT_HASHES = {"); -// for (int i = 0; i < ELEMENT_NAMES.length; i++) { -// ElementName el = ELEMENT_NAMES[i]; +// for (int i = 0; i < levelOrder.length; i++) { +// ElementName el = levelOrder[i]; // System.out.println(Integer.toString(el.hash()) + ","); // } // System.out.println("};"); -- cgit v1.2.3