summaryrefslogtreecommitdiffstats
path: root/dom/canvas/WebGLElementArrayCache.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dom/canvas/WebGLElementArrayCache.cpp')
-rw-r--r--dom/canvas/WebGLElementArrayCache.cpp622
1 files changed, 622 insertions, 0 deletions
diff --git a/dom/canvas/WebGLElementArrayCache.cpp b/dom/canvas/WebGLElementArrayCache.cpp
new file mode 100644
index 000000000..a0591445a
--- /dev/null
+++ b/dom/canvas/WebGLElementArrayCache.cpp
@@ -0,0 +1,622 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "WebGLElementArrayCache.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include "mozilla/Assertions.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/MemoryReporting.h"
+
+namespace mozilla {
+
+/* WebGLElementArrayCacheTree contains most of the implementation of
+ * WebGLElementArrayCache, which performs WebGL element array buffer validation
+ * for drawElements.
+ *
+ * Attention: Here lie nontrivial data structures, bug-prone algorithms, and
+ * non-canonical tweaks! Whence the explanatory comments, and compiled unit
+ * test.
+ *
+ * *** What problem are we solving here? ***
+ *
+ * WebGL::DrawElements has to validate that the elements are in range wrt the
+ * current vertex attribs. This boils down to the problem, given an array of
+ * integers, of computing the maximum in an arbitrary sub-array. The naive
+ * algorithm has linear complexity; this has been a major performance problem,
+ * see bug 569431. In that bug, we took the approach of caching the max for the
+ * whole array, which does cover most cases (DrawElements typically consumes the
+ * whole element array buffer) but doesn't help in other use cases:
+ * - when doing "partial DrawElements" i.e. consuming only part of the element
+ * array buffer
+ * - when doing frequent "partial buffer updates" i.e. bufferSubData calls
+ * updating parts of the element array buffer
+ *
+ * *** The solution: A binary tree ***
+ *
+ * The solution implemented here is to use a binary tree as the cache data
+ * structure. Each tree node contains the max of its two children nodes. In this
+ * way, finding the maximum in any contiguous sub-array has log complexity
+ * instead of linear complexity.
+ *
+ * Simplistically, if the element array is:
+ *
+ * [1 4 3 2]
+ *
+ * then the corresponding tree is:
+ *
+ * 4
+ * _/ \_
+ * 4 3
+ * / \ / \
+ * 1 4 3 2
+ *
+ * In practice, the bottom-most levels of the tree are both the largest to store
+ * (because they have more nodes), and the least useful performance-wise
+ * (because each node in the bottom levels concerns only few entries in the
+ * elements array buffer, it is cheap to compute).
+ *
+ * For this reason, we stop the tree a few levels above, so that each tree leaf
+ * actually corresponds to more than one element array entry.
+ *
+ * The number of levels that we "drop" is |kSkippedBottomTreeLevels| and the
+ * number of element array entries that each leaf corresponds to, is
+ * |kElementsPerLeaf|. This being a binary tree, we have:
+ *
+ * kElementsPerLeaf = 2 ^ kSkippedBottomTreeLevels.
+ *
+ * *** Storage layout of the binary tree ***
+ *
+ * We take advantage of the specifics of the situation to avoid generalist tree
+ * storage and instead store the tree entries in a vector, mTreeData.
+ *
+ * TreeData is always a vector of length:
+ *
+ * 2 * (number of leaves).
+ *
+ * Its data layout is as follows: mTreeData[0] is unused, mTreeData[1] is the
+ * root node, then at offsets 2..3 is the tree level immediately below the root
+ * node, then at offsets 4..7 is the tree level below that, etc.
+ *
+ * The figure below illustrates this by writing at each tree node the offset
+ * into mTreeData at which it is stored:
+ *
+ * 1
+ * _/ \_
+ * 2 3
+ * / \ / \
+ * 4 5 6 7
+ * ...
+ *
+ * Thus, under the convention that the root level is level 0, we see that level
+ * N is stored at offsets:
+ *
+ * [ 2^n .. 2^(n+1) - 1 ]
+ *
+ * in mTreeData. Likewise, all the usual tree operations have simple
+ * mathematical expressions in terms of mTreeData offsets, see all the methods
+ * such as ParentNode, LeftChildNode, etc.
+ *
+ * *** Design constraint: Element types aren't known at buffer-update time ***
+ *
+ * Note that a key constraint that we're operating under, is that we don't know
+ * the types of the elements by the time WebGL bufferData/bufferSubData methods
+ * are called. The type of elements is only specified in the drawElements call.
+ * This means that we may potentially have to store caches for multiple element
+ * types, for the same element array buffer. Since we don't know yet how many
+ * element types we'll eventually support (extensions add more), the concern
+ * about memory usage is serious. This is addressed by kSkippedBottomTreeLevels
+ * as explained above. Of course, in the typical case where each element array
+ * buffer is only ever used with one type, this is also addressed by having
+ * WebGLElementArrayCache lazily create trees for each type only upon first use.
+ *
+ * Another consequence of this constraint is that when updating the trees, we
+ * have to update all existing trees. So if trees for types uint8_t, uint16_t
+ * and uint32_t have ever been constructed for this buffer, every subsequent
+ * update will have to update all trees even if one of the types is never used
+ * again. That's inefficient, but content should not put indices of different
+ * types in the same element array buffer anyways. Different index types can
+ * only be consumed in separate drawElements calls, so nothing particular is
+ * to be achieved by lumping them in the same buffer object.
+ */
+template<typename T>
+struct WebGLElementArrayCacheTree
+{
+ /* A too-high kSkippedBottomTreeLevels would harm the performance of small
+ * drawElements calls. A too-low kSkippedBottomTreeLevels would cause undue
+ * memory usage. The current value has been validated by some benchmarking.
+ * See bug 732660.
+ */
+ static const size_t kSkippedBottomTreeLevels = 3;
+ static const size_t kElementsPerLeaf = 1 << kSkippedBottomTreeLevels;
+ // Since kElementsPerLeaf is POT:
+ static const size_t kElementsPerLeafMask = kElementsPerLeaf - 1;
+
+private:
+ // The WebGLElementArrayCache that owns this tree:
+ WebGLElementArrayCache& mParent;
+
+ // The tree's internal data storage. Its length is 2 * (number of leaves)
+ // because of its data layout explained in the above class comment.
+ FallibleTArray<T> mTreeData;
+
+public:
+ // Constructor. Takes a reference to the WebGLElementArrayCache that is to be
+ // the parent. Does not initialize the tree. Should be followed by a call
+ // to Update() to attempt initializing the tree.
+ explicit WebGLElementArrayCacheTree(WebGLElementArrayCache& value)
+ : mParent(value)
+ {
+ }
+
+ T GlobalMaximum() const {
+ return mTreeData[1];
+ }
+
+ // returns the index of the parent node; if treeIndex=1 (the root node),
+ // the return value is 0.
+ static size_t ParentNode(size_t treeIndex) {
+ MOZ_ASSERT(treeIndex > 1);
+ return treeIndex >> 1;
+ }
+
+ static bool IsRightNode(size_t treeIndex) {
+ MOZ_ASSERT(treeIndex > 1);
+ return treeIndex & 1;
+ }
+
+ static bool IsLeftNode(size_t treeIndex) {
+ MOZ_ASSERT(treeIndex > 1);
+ return !IsRightNode(treeIndex);
+ }
+
+ static size_t SiblingNode(size_t treeIndex) {
+ MOZ_ASSERT(treeIndex > 1);
+ return treeIndex ^ 1;
+ }
+
+ static size_t LeftChildNode(size_t treeIndex) {
+ MOZ_ASSERT(treeIndex);
+ return treeIndex << 1;
+ }
+
+ static size_t RightChildNode(size_t treeIndex) {
+ MOZ_ASSERT(treeIndex);
+ return SiblingNode(LeftChildNode(treeIndex));
+ }
+
+ static size_t LeftNeighborNode(size_t treeIndex, size_t distance = 1) {
+ MOZ_ASSERT(treeIndex > 1);
+ return treeIndex - distance;
+ }
+
+ static size_t RightNeighborNode(size_t treeIndex, size_t distance = 1) {
+ MOZ_ASSERT(treeIndex > 1);
+ return treeIndex + distance;
+ }
+
+ size_t NumLeaves() const {
+ // See class comment for why we the tree storage size is 2 * numLeaves.
+ return mTreeData.Length() >> 1;
+ }
+
+ size_t LeafForElement(size_t element) const {
+ size_t leaf = element / kElementsPerLeaf;
+ MOZ_ASSERT(leaf < NumLeaves());
+ return leaf;
+ }
+
+ size_t LeafForByte(size_t byte) const {
+ return LeafForElement(byte / sizeof(T));
+ }
+
+ // Returns the index, into the tree storage, where a given leaf is stored.
+ size_t TreeIndexForLeaf(size_t leaf) const {
+ // See above class comment. The tree storage is an array of length
+ // 2 * numLeaves. The leaves are stored in its second half.
+ return leaf + NumLeaves();
+ }
+
+ static size_t LastElementUnderSameLeaf(size_t element) {
+ return element | kElementsPerLeafMask;
+ }
+
+ static size_t FirstElementUnderSameLeaf(size_t element) {
+ return element & ~kElementsPerLeafMask;
+ }
+
+ static size_t NextMultipleOfElementsPerLeaf(size_t numElements) {
+ MOZ_ASSERT(numElements >= 1);
+ return ((numElements - 1) | kElementsPerLeafMask) + 1;
+ }
+
+ bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf)
+ {
+ size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
+ size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf);
+
+ while (true) {
+ // Given that we tweak these values in nontrivial ways, it doesn't
+ // hurt to do this sanity check.
+ MOZ_ASSERT(firstTreeIndex <= lastTreeIndex);
+
+ // Final case where there is only one node to validate at the
+ // current tree level:
+ if (lastTreeIndex == firstTreeIndex) {
+ const T& curData = mTreeData[firstTreeIndex];
+ return curData <= maxAllowed;
+ }
+
+ // If the first node at current tree level is a right node, handle
+ // it individually and replace it with its right neighbor, which is
+ // a left node.
+ if (IsRightNode(firstTreeIndex)) {
+ const T& curData = mTreeData[firstTreeIndex];
+ if (curData > maxAllowed)
+ return false;
+
+ firstTreeIndex = RightNeighborNode(firstTreeIndex);
+ }
+
+ // If the last node at current tree level is a left node, handle it
+ // individually and replace it with its left neighbor, which is a
+ // right node.
+ if (IsLeftNode(lastTreeIndex)) {
+ const T& curData = mTreeData[lastTreeIndex];
+ if (curData > maxAllowed)
+ return false;
+
+ lastTreeIndex = LeftNeighborNode(lastTreeIndex);
+ }
+
+ /* At this point it can happen that firstTreeIndex and lastTreeIndex
+ * "crossed" eachother. That happens if firstTreeIndex was a right
+ * node and lastTreeIndex was its right neighor: In that case, both
+ * above tweaks happened and as a result, they ended up being
+ * swapped: LastTreeIndex is now the _left_ neighbor of
+ * firstTreeIndex. When that happens, there is nothing left to
+ * validate.
+ */
+ if (lastTreeIndex == LeftNeighborNode(firstTreeIndex))
+ return true;
+
+ // Walk up one level.
+ firstTreeIndex = ParentNode(firstTreeIndex);
+ lastTreeIndex = ParentNode(lastTreeIndex);
+ }
+ }
+
+ // Updates the tree from the parent's buffer contents. Fallible, as it
+ // may have to resize the tree storage.
+ bool Update(size_t firstByte, size_t lastByte);
+
+ size_t SizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const
+ {
+ return mallocSizeOf(this) +
+ mTreeData.ShallowSizeOfExcludingThis(mallocSizeOf);
+ }
+};
+
+// TreeForType: just a template helper to select the right tree object for a given
+// element type.
+template<typename T>
+struct TreeForType {};
+
+template<>
+struct TreeForType<uint8_t>
+{
+ static UniquePtr<WebGLElementArrayCacheTree<uint8_t>>&
+ Value(WebGLElementArrayCache* b) {
+ return b->mUint8Tree;
+ }
+};
+
+template<>
+struct TreeForType<uint16_t>
+{
+ static UniquePtr<WebGLElementArrayCacheTree<uint16_t>>&
+ Value(WebGLElementArrayCache* b) {
+ return b->mUint16Tree;
+ }
+};
+
+template<>
+struct TreeForType<uint32_t>
+{
+ static UniquePtr<WebGLElementArrayCacheTree<uint32_t>>&
+ Value(WebGLElementArrayCache* b) {
+ return b->mUint32Tree;
+ }
+};
+
+// Calling this method will 1) update the leaves in this interval
+// from the raw buffer data, and 2) propagate this update up the tree.
+template<typename T>
+bool
+WebGLElementArrayCacheTree<T>::Update(size_t firstByte, size_t lastByte)
+{
+ MOZ_ASSERT(firstByte <= lastByte);
+ MOZ_ASSERT(lastByte < mParent.mBytes.Length());
+
+ size_t numberOfElements = mParent.mBytes.Length() / sizeof(T);
+ size_t requiredNumLeaves = 0;
+ if (numberOfElements > 0) {
+ /* If we didn't require the number of leaves to be a power of two, then
+ * it would just be equal to
+ *
+ * ceil(numberOfElements / kElementsPerLeaf)
+ *
+ * The way we implement this (division+ceil) operation in integer
+ * arithmetic
+ * is as follows:
+ */
+ size_t numLeavesNonPOT = (numberOfElements + kElementsPerLeaf - 1) / kElementsPerLeaf;
+ // It only remains to round that up to the next power of two:
+ requiredNumLeaves = RoundUpPow2(numLeavesNonPOT);
+ }
+
+ // Step #0: If needed, resize our tree data storage.
+ if (requiredNumLeaves != NumLeaves()) {
+ // See class comment for why we the tree storage size is 2 * numLeaves.
+ if (!mTreeData.SetLength(2 * requiredNumLeaves, fallible)) {
+ mTreeData.Clear();
+ return false;
+ }
+ MOZ_ASSERT(NumLeaves() == requiredNumLeaves);
+
+ if (NumLeaves()) {
+ // When resizing, update the whole tree, not just the subset
+ // corresponding to the part of the buffer being updated.
+ memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(T));
+ firstByte = 0;
+ lastByte = mParent.mBytes.Length() - 1;
+ }
+ }
+
+ if (NumLeaves() == 0)
+ return true;
+
+ lastByte = std::min(lastByte, NumLeaves() * kElementsPerLeaf * sizeof(T) - 1);
+ if (firstByte > lastByte)
+ return true;
+
+ size_t firstLeaf = LeafForByte(firstByte);
+ size_t lastLeaf = LeafForByte(lastByte);
+
+ MOZ_ASSERT(firstLeaf <= lastLeaf && lastLeaf < NumLeaves());
+
+ size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
+ size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf);
+
+ // Step #1: Initialize the tree leaves from plain buffer data.
+ // That is, each tree leaf must be set to the max of the |kElementsPerLeaf|
+ // corresponding buffer entries.
+
+ // Condition-less scope to prevent leaking this scope's variables into the
+ // code below:
+ {
+ // TreeIndex is the index of the tree leaf we're writing, i.e. the
+ // destination index.
+ size_t treeIndex = firstTreeIndex;
+ // srcIndex is the index in the source buffer.
+ size_t srcIndex = firstLeaf * kElementsPerLeaf;
+ while (treeIndex <= lastTreeIndex) {
+ T m = 0;
+ size_t a = srcIndex;
+ size_t srcIndexNextLeaf = std::min(a + kElementsPerLeaf, numberOfElements);
+ for (; srcIndex < srcIndexNextLeaf; srcIndex++) {
+ m = std::max(m, mParent.Element<T>(srcIndex));
+ }
+ mTreeData[treeIndex] = m;
+ treeIndex++;
+ }
+ }
+
+ // Step #2: Propagate the values up the tree. This is simply a matter of
+ // walking up the tree and setting each node to the max of its two children.
+ while (firstTreeIndex > 1) {
+ // Move up one level.
+ firstTreeIndex = ParentNode(firstTreeIndex);
+ lastTreeIndex = ParentNode(lastTreeIndex);
+
+ // Fast-exit case where only one node is updated at the current level.
+ if (firstTreeIndex == lastTreeIndex) {
+ mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]);
+ continue;
+ }
+
+ size_t child = LeftChildNode(firstTreeIndex);
+ size_t parent = firstTreeIndex;
+ while (parent <= lastTreeIndex) {
+ T a = mTreeData[child];
+ child = RightNeighborNode(child);
+ T b = mTreeData[child];
+ child = RightNeighborNode(child);
+ mTreeData[parent] = std::max(a, b);
+ parent = RightNeighborNode(parent);
+ }
+ }
+
+ return true;
+}
+
+WebGLElementArrayCache::WebGLElementArrayCache()
+{
+}
+
+WebGLElementArrayCache::~WebGLElementArrayCache()
+{
+}
+
+bool
+WebGLElementArrayCache::BufferData(const void* ptr, size_t byteLength)
+{
+ if (mBytes.Length() != byteLength) {
+ if (!mBytes.SetLength(byteLength, fallible)) {
+ mBytes.Clear();
+ return false;
+ }
+ }
+ MOZ_ASSERT(mBytes.Length() == byteLength);
+ return BufferSubData(0, ptr, byteLength);
+}
+
+bool
+WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr,
+ size_t updateByteLength)
+{
+ MOZ_ASSERT(pos + updateByteLength <= mBytes.Length());
+ if (!updateByteLength)
+ return true;
+
+ // Note, using memcpy on shared racy data is not well-defined, this
+ // will need to use safe-for-races operations when those become available.
+ // See bug 1225033.
+ if (ptr)
+ memcpy(mBytes.Elements() + pos, ptr, updateByteLength);
+ else
+ memset(mBytes.Elements() + pos, 0, updateByteLength);
+ return UpdateTrees(pos, pos + updateByteLength - 1);
+}
+
+bool
+WebGLElementArrayCache::UpdateTrees(size_t firstByte, size_t lastByte)
+{
+ bool result = true;
+ if (mUint8Tree)
+ result &= mUint8Tree->Update(firstByte, lastByte);
+ if (mUint16Tree)
+ result &= mUint16Tree->Update(firstByte, lastByte);
+ if (mUint32Tree)
+ result &= mUint32Tree->Update(firstByte, lastByte);
+ return result;
+}
+
+template<typename T>
+bool
+WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement,
+ size_t countElements)
+{
+ // If maxAllowed is >= the max T value, then there is no way that a T index
+ // could be invalid.
+ uint32_t maxTSize = std::numeric_limits<T>::max();
+ if (maxAllowed >= maxTSize)
+ return true;
+
+ T maxAllowedT(maxAllowed);
+
+ // Integer overflow must have been handled earlier, so we assert that
+ // maxAllowedT is exactly the max allowed value.
+ MOZ_ASSERT(uint32_t(maxAllowedT) == maxAllowed);
+
+ if (!mBytes.Length() || !countElements)
+ return true;
+
+ UniquePtr<WebGLElementArrayCacheTree<T>>& tree = TreeForType<T>::Value(this);
+ if (!tree) {
+ tree = MakeUnique<WebGLElementArrayCacheTree<T>>(*this);
+ if (mBytes.Length()) {
+ bool valid = tree->Update(0, mBytes.Length() - 1);
+ if (!valid) {
+ // Do not assert here. This case would happen if an allocation
+ // failed. We've already settled on fallible allocations around
+ // here.
+ tree = nullptr;
+ return false;
+ }
+ }
+ }
+
+ size_t lastElement = firstElement + countElements - 1;
+
+ // Fast-exit path when the global maximum for the whole element array buffer
+ // falls in the allowed range:
+ T globalMax = tree->GlobalMaximum();
+ if (globalMax <= maxAllowedT)
+ return true;
+
+ const T* elements = Elements<T>();
+
+ // Before calling tree->Validate, we have to validate ourselves the
+ // boundaries of the elements span, to round them to the nearest multiple of
+ // kElementsPerLeaf.
+ size_t firstElementAdjustmentEnd = std::min(lastElement,
+ tree->LastElementUnderSameLeaf(firstElement));
+ while (firstElement <= firstElementAdjustmentEnd) {
+ const T& curData = elements[firstElement];
+ if (curData > maxAllowedT)
+ return false;
+
+ firstElement++;
+ }
+ size_t lastElementAdjustmentEnd = std::max(firstElement,
+ tree->FirstElementUnderSameLeaf(lastElement));
+ while (lastElement >= lastElementAdjustmentEnd) {
+ const T& curData = elements[lastElement];
+ if (curData > maxAllowedT)
+ return false;
+
+ lastElement--;
+ }
+
+ // at this point, for many tiny validations, we're already done.
+ if (firstElement > lastElement)
+ return true;
+
+ // general case
+ return tree->Validate(maxAllowedT, tree->LeafForElement(firstElement),
+ tree->LeafForElement(lastElement));
+}
+
+bool
+WebGLElementArrayCache::Validate(GLenum type, uint32_t maxAllowed,
+ size_t firstElement, size_t countElements)
+{
+ if (type == LOCAL_GL_UNSIGNED_BYTE)
+ return Validate<uint8_t>(maxAllowed, firstElement, countElements);
+ if (type == LOCAL_GL_UNSIGNED_SHORT)
+ return Validate<uint16_t>(maxAllowed, firstElement, countElements);
+ if (type == LOCAL_GL_UNSIGNED_INT)
+ return Validate<uint32_t>(maxAllowed, firstElement, countElements);
+
+ MOZ_ASSERT(false, "Invalid type.");
+ return false;
+}
+
+template<typename T>
+static size_t
+SizeOfNullable(mozilla::MallocSizeOf mallocSizeOf, const T& obj)
+{
+ if (!obj)
+ return 0;
+ return obj->SizeOfIncludingThis(mallocSizeOf);
+}
+
+size_t
+WebGLElementArrayCache::SizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const
+{
+ return mallocSizeOf(this) +
+ mBytes.ShallowSizeOfExcludingThis(mallocSizeOf) +
+ SizeOfNullable(mallocSizeOf, mUint8Tree) +
+ SizeOfNullable(mallocSizeOf, mUint16Tree) +
+ SizeOfNullable(mallocSizeOf, mUint32Tree);
+}
+
+bool
+WebGLElementArrayCache::BeenUsedWithMultipleTypes() const
+{
+ // C++ Standard ($4.7)
+ // "If the source type is bool, the value false is converted to zero and
+ // the value true is converted to one."
+ const int num_types_used = (mUint8Tree != nullptr) +
+ (mUint16Tree != nullptr) +
+ (mUint32Tree != nullptr);
+ return num_types_used > 1;
+}
+
+} // end namespace mozilla