diff options
Diffstat (limited to 'dom/canvas/WebGLElementArrayCache.cpp')
-rw-r--r-- | dom/canvas/WebGLElementArrayCache.cpp | 622 |
1 files changed, 622 insertions, 0 deletions
diff --git a/dom/canvas/WebGLElementArrayCache.cpp b/dom/canvas/WebGLElementArrayCache.cpp new file mode 100644 index 000000000..a0591445a --- /dev/null +++ b/dom/canvas/WebGLElementArrayCache.cpp @@ -0,0 +1,622 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WebGLElementArrayCache.h" + +#include <algorithm> +#include <cstdlib> +#include <cstring> +#include <limits> +#include "mozilla/Assertions.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/MemoryReporting.h" + +namespace mozilla { + +/* WebGLElementArrayCacheTree contains most of the implementation of + * WebGLElementArrayCache, which performs WebGL element array buffer validation + * for drawElements. + * + * Attention: Here lie nontrivial data structures, bug-prone algorithms, and + * non-canonical tweaks! Whence the explanatory comments, and compiled unit + * test. + * + * *** What problem are we solving here? *** + * + * WebGL::DrawElements has to validate that the elements are in range wrt the + * current vertex attribs. This boils down to the problem, given an array of + * integers, of computing the maximum in an arbitrary sub-array. The naive + * algorithm has linear complexity; this has been a major performance problem, + * see bug 569431. In that bug, we took the approach of caching the max for the + * whole array, which does cover most cases (DrawElements typically consumes the + * whole element array buffer) but doesn't help in other use cases: + * - when doing "partial DrawElements" i.e. consuming only part of the element + * array buffer + * - when doing frequent "partial buffer updates" i.e. bufferSubData calls + * updating parts of the element array buffer + * + * *** The solution: A binary tree *** + * + * The solution implemented here is to use a binary tree as the cache data + * structure. Each tree node contains the max of its two children nodes. In this + * way, finding the maximum in any contiguous sub-array has log complexity + * instead of linear complexity. + * + * Simplistically, if the element array is: + * + * [1 4 3 2] + * + * then the corresponding tree is: + * + * 4 + * _/ \_ + * 4 3 + * / \ / \ + * 1 4 3 2 + * + * In practice, the bottom-most levels of the tree are both the largest to store + * (because they have more nodes), and the least useful performance-wise + * (because each node in the bottom levels concerns only few entries in the + * elements array buffer, it is cheap to compute). + * + * For this reason, we stop the tree a few levels above, so that each tree leaf + * actually corresponds to more than one element array entry. + * + * The number of levels that we "drop" is |kSkippedBottomTreeLevels| and the + * number of element array entries that each leaf corresponds to, is + * |kElementsPerLeaf|. This being a binary tree, we have: + * + * kElementsPerLeaf = 2 ^ kSkippedBottomTreeLevels. + * + * *** Storage layout of the binary tree *** + * + * We take advantage of the specifics of the situation to avoid generalist tree + * storage and instead store the tree entries in a vector, mTreeData. + * + * TreeData is always a vector of length: + * + * 2 * (number of leaves). + * + * Its data layout is as follows: mTreeData[0] is unused, mTreeData[1] is the + * root node, then at offsets 2..3 is the tree level immediately below the root + * node, then at offsets 4..7 is the tree level below that, etc. + * + * The figure below illustrates this by writing at each tree node the offset + * into mTreeData at which it is stored: + * + * 1 + * _/ \_ + * 2 3 + * / \ / \ + * 4 5 6 7 + * ... + * + * Thus, under the convention that the root level is level 0, we see that level + * N is stored at offsets: + * + * [ 2^n .. 2^(n+1) - 1 ] + * + * in mTreeData. Likewise, all the usual tree operations have simple + * mathematical expressions in terms of mTreeData offsets, see all the methods + * such as ParentNode, LeftChildNode, etc. + * + * *** Design constraint: Element types aren't known at buffer-update time *** + * + * Note that a key constraint that we're operating under, is that we don't know + * the types of the elements by the time WebGL bufferData/bufferSubData methods + * are called. The type of elements is only specified in the drawElements call. + * This means that we may potentially have to store caches for multiple element + * types, for the same element array buffer. Since we don't know yet how many + * element types we'll eventually support (extensions add more), the concern + * about memory usage is serious. This is addressed by kSkippedBottomTreeLevels + * as explained above. Of course, in the typical case where each element array + * buffer is only ever used with one type, this is also addressed by having + * WebGLElementArrayCache lazily create trees for each type only upon first use. + * + * Another consequence of this constraint is that when updating the trees, we + * have to update all existing trees. So if trees for types uint8_t, uint16_t + * and uint32_t have ever been constructed for this buffer, every subsequent + * update will have to update all trees even if one of the types is never used + * again. That's inefficient, but content should not put indices of different + * types in the same element array buffer anyways. Different index types can + * only be consumed in separate drawElements calls, so nothing particular is + * to be achieved by lumping them in the same buffer object. + */ +template<typename T> +struct WebGLElementArrayCacheTree +{ + /* A too-high kSkippedBottomTreeLevels would harm the performance of small + * drawElements calls. A too-low kSkippedBottomTreeLevels would cause undue + * memory usage. The current value has been validated by some benchmarking. + * See bug 732660. + */ + static const size_t kSkippedBottomTreeLevels = 3; + static const size_t kElementsPerLeaf = 1 << kSkippedBottomTreeLevels; + // Since kElementsPerLeaf is POT: + static const size_t kElementsPerLeafMask = kElementsPerLeaf - 1; + +private: + // The WebGLElementArrayCache that owns this tree: + WebGLElementArrayCache& mParent; + + // The tree's internal data storage. Its length is 2 * (number of leaves) + // because of its data layout explained in the above class comment. + FallibleTArray<T> mTreeData; + +public: + // Constructor. Takes a reference to the WebGLElementArrayCache that is to be + // the parent. Does not initialize the tree. Should be followed by a call + // to Update() to attempt initializing the tree. + explicit WebGLElementArrayCacheTree(WebGLElementArrayCache& value) + : mParent(value) + { + } + + T GlobalMaximum() const { + return mTreeData[1]; + } + + // returns the index of the parent node; if treeIndex=1 (the root node), + // the return value is 0. + static size_t ParentNode(size_t treeIndex) { + MOZ_ASSERT(treeIndex > 1); + return treeIndex >> 1; + } + + static bool IsRightNode(size_t treeIndex) { + MOZ_ASSERT(treeIndex > 1); + return treeIndex & 1; + } + + static bool IsLeftNode(size_t treeIndex) { + MOZ_ASSERT(treeIndex > 1); + return !IsRightNode(treeIndex); + } + + static size_t SiblingNode(size_t treeIndex) { + MOZ_ASSERT(treeIndex > 1); + return treeIndex ^ 1; + } + + static size_t LeftChildNode(size_t treeIndex) { + MOZ_ASSERT(treeIndex); + return treeIndex << 1; + } + + static size_t RightChildNode(size_t treeIndex) { + MOZ_ASSERT(treeIndex); + return SiblingNode(LeftChildNode(treeIndex)); + } + + static size_t LeftNeighborNode(size_t treeIndex, size_t distance = 1) { + MOZ_ASSERT(treeIndex > 1); + return treeIndex - distance; + } + + static size_t RightNeighborNode(size_t treeIndex, size_t distance = 1) { + MOZ_ASSERT(treeIndex > 1); + return treeIndex + distance; + } + + size_t NumLeaves() const { + // See class comment for why we the tree storage size is 2 * numLeaves. + return mTreeData.Length() >> 1; + } + + size_t LeafForElement(size_t element) const { + size_t leaf = element / kElementsPerLeaf; + MOZ_ASSERT(leaf < NumLeaves()); + return leaf; + } + + size_t LeafForByte(size_t byte) const { + return LeafForElement(byte / sizeof(T)); + } + + // Returns the index, into the tree storage, where a given leaf is stored. + size_t TreeIndexForLeaf(size_t leaf) const { + // See above class comment. The tree storage is an array of length + // 2 * numLeaves. The leaves are stored in its second half. + return leaf + NumLeaves(); + } + + static size_t LastElementUnderSameLeaf(size_t element) { + return element | kElementsPerLeafMask; + } + + static size_t FirstElementUnderSameLeaf(size_t element) { + return element & ~kElementsPerLeafMask; + } + + static size_t NextMultipleOfElementsPerLeaf(size_t numElements) { + MOZ_ASSERT(numElements >= 1); + return ((numElements - 1) | kElementsPerLeafMask) + 1; + } + + bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf) + { + size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf); + size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf); + + while (true) { + // Given that we tweak these values in nontrivial ways, it doesn't + // hurt to do this sanity check. + MOZ_ASSERT(firstTreeIndex <= lastTreeIndex); + + // Final case where there is only one node to validate at the + // current tree level: + if (lastTreeIndex == firstTreeIndex) { + const T& curData = mTreeData[firstTreeIndex]; + return curData <= maxAllowed; + } + + // If the first node at current tree level is a right node, handle + // it individually and replace it with its right neighbor, which is + // a left node. + if (IsRightNode(firstTreeIndex)) { + const T& curData = mTreeData[firstTreeIndex]; + if (curData > maxAllowed) + return false; + + firstTreeIndex = RightNeighborNode(firstTreeIndex); + } + + // If the last node at current tree level is a left node, handle it + // individually and replace it with its left neighbor, which is a + // right node. + if (IsLeftNode(lastTreeIndex)) { + const T& curData = mTreeData[lastTreeIndex]; + if (curData > maxAllowed) + return false; + + lastTreeIndex = LeftNeighborNode(lastTreeIndex); + } + + /* At this point it can happen that firstTreeIndex and lastTreeIndex + * "crossed" eachother. That happens if firstTreeIndex was a right + * node and lastTreeIndex was its right neighor: In that case, both + * above tweaks happened and as a result, they ended up being + * swapped: LastTreeIndex is now the _left_ neighbor of + * firstTreeIndex. When that happens, there is nothing left to + * validate. + */ + if (lastTreeIndex == LeftNeighborNode(firstTreeIndex)) + return true; + + // Walk up one level. + firstTreeIndex = ParentNode(firstTreeIndex); + lastTreeIndex = ParentNode(lastTreeIndex); + } + } + + // Updates the tree from the parent's buffer contents. Fallible, as it + // may have to resize the tree storage. + bool Update(size_t firstByte, size_t lastByte); + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const + { + return mallocSizeOf(this) + + mTreeData.ShallowSizeOfExcludingThis(mallocSizeOf); + } +}; + +// TreeForType: just a template helper to select the right tree object for a given +// element type. +template<typename T> +struct TreeForType {}; + +template<> +struct TreeForType<uint8_t> +{ + static UniquePtr<WebGLElementArrayCacheTree<uint8_t>>& + Value(WebGLElementArrayCache* b) { + return b->mUint8Tree; + } +}; + +template<> +struct TreeForType<uint16_t> +{ + static UniquePtr<WebGLElementArrayCacheTree<uint16_t>>& + Value(WebGLElementArrayCache* b) { + return b->mUint16Tree; + } +}; + +template<> +struct TreeForType<uint32_t> +{ + static UniquePtr<WebGLElementArrayCacheTree<uint32_t>>& + Value(WebGLElementArrayCache* b) { + return b->mUint32Tree; + } +}; + +// Calling this method will 1) update the leaves in this interval +// from the raw buffer data, and 2) propagate this update up the tree. +template<typename T> +bool +WebGLElementArrayCacheTree<T>::Update(size_t firstByte, size_t lastByte) +{ + MOZ_ASSERT(firstByte <= lastByte); + MOZ_ASSERT(lastByte < mParent.mBytes.Length()); + + size_t numberOfElements = mParent.mBytes.Length() / sizeof(T); + size_t requiredNumLeaves = 0; + if (numberOfElements > 0) { + /* If we didn't require the number of leaves to be a power of two, then + * it would just be equal to + * + * ceil(numberOfElements / kElementsPerLeaf) + * + * The way we implement this (division+ceil) operation in integer + * arithmetic + * is as follows: + */ + size_t numLeavesNonPOT = (numberOfElements + kElementsPerLeaf - 1) / kElementsPerLeaf; + // It only remains to round that up to the next power of two: + requiredNumLeaves = RoundUpPow2(numLeavesNonPOT); + } + + // Step #0: If needed, resize our tree data storage. + if (requiredNumLeaves != NumLeaves()) { + // See class comment for why we the tree storage size is 2 * numLeaves. + if (!mTreeData.SetLength(2 * requiredNumLeaves, fallible)) { + mTreeData.Clear(); + return false; + } + MOZ_ASSERT(NumLeaves() == requiredNumLeaves); + + if (NumLeaves()) { + // When resizing, update the whole tree, not just the subset + // corresponding to the part of the buffer being updated. + memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(T)); + firstByte = 0; + lastByte = mParent.mBytes.Length() - 1; + } + } + + if (NumLeaves() == 0) + return true; + + lastByte = std::min(lastByte, NumLeaves() * kElementsPerLeaf * sizeof(T) - 1); + if (firstByte > lastByte) + return true; + + size_t firstLeaf = LeafForByte(firstByte); + size_t lastLeaf = LeafForByte(lastByte); + + MOZ_ASSERT(firstLeaf <= lastLeaf && lastLeaf < NumLeaves()); + + size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf); + size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf); + + // Step #1: Initialize the tree leaves from plain buffer data. + // That is, each tree leaf must be set to the max of the |kElementsPerLeaf| + // corresponding buffer entries. + + // Condition-less scope to prevent leaking this scope's variables into the + // code below: + { + // TreeIndex is the index of the tree leaf we're writing, i.e. the + // destination index. + size_t treeIndex = firstTreeIndex; + // srcIndex is the index in the source buffer. + size_t srcIndex = firstLeaf * kElementsPerLeaf; + while (treeIndex <= lastTreeIndex) { + T m = 0; + size_t a = srcIndex; + size_t srcIndexNextLeaf = std::min(a + kElementsPerLeaf, numberOfElements); + for (; srcIndex < srcIndexNextLeaf; srcIndex++) { + m = std::max(m, mParent.Element<T>(srcIndex)); + } + mTreeData[treeIndex] = m; + treeIndex++; + } + } + + // Step #2: Propagate the values up the tree. This is simply a matter of + // walking up the tree and setting each node to the max of its two children. + while (firstTreeIndex > 1) { + // Move up one level. + firstTreeIndex = ParentNode(firstTreeIndex); + lastTreeIndex = ParentNode(lastTreeIndex); + + // Fast-exit case where only one node is updated at the current level. + if (firstTreeIndex == lastTreeIndex) { + mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]); + continue; + } + + size_t child = LeftChildNode(firstTreeIndex); + size_t parent = firstTreeIndex; + while (parent <= lastTreeIndex) { + T a = mTreeData[child]; + child = RightNeighborNode(child); + T b = mTreeData[child]; + child = RightNeighborNode(child); + mTreeData[parent] = std::max(a, b); + parent = RightNeighborNode(parent); + } + } + + return true; +} + +WebGLElementArrayCache::WebGLElementArrayCache() +{ +} + +WebGLElementArrayCache::~WebGLElementArrayCache() +{ +} + +bool +WebGLElementArrayCache::BufferData(const void* ptr, size_t byteLength) +{ + if (mBytes.Length() != byteLength) { + if (!mBytes.SetLength(byteLength, fallible)) { + mBytes.Clear(); + return false; + } + } + MOZ_ASSERT(mBytes.Length() == byteLength); + return BufferSubData(0, ptr, byteLength); +} + +bool +WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, + size_t updateByteLength) +{ + MOZ_ASSERT(pos + updateByteLength <= mBytes.Length()); + if (!updateByteLength) + return true; + + // Note, using memcpy on shared racy data is not well-defined, this + // will need to use safe-for-races operations when those become available. + // See bug 1225033. + if (ptr) + memcpy(mBytes.Elements() + pos, ptr, updateByteLength); + else + memset(mBytes.Elements() + pos, 0, updateByteLength); + return UpdateTrees(pos, pos + updateByteLength - 1); +} + +bool +WebGLElementArrayCache::UpdateTrees(size_t firstByte, size_t lastByte) +{ + bool result = true; + if (mUint8Tree) + result &= mUint8Tree->Update(firstByte, lastByte); + if (mUint16Tree) + result &= mUint16Tree->Update(firstByte, lastByte); + if (mUint32Tree) + result &= mUint32Tree->Update(firstByte, lastByte); + return result; +} + +template<typename T> +bool +WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement, + size_t countElements) +{ + // If maxAllowed is >= the max T value, then there is no way that a T index + // could be invalid. + uint32_t maxTSize = std::numeric_limits<T>::max(); + if (maxAllowed >= maxTSize) + return true; + + T maxAllowedT(maxAllowed); + + // Integer overflow must have been handled earlier, so we assert that + // maxAllowedT is exactly the max allowed value. + MOZ_ASSERT(uint32_t(maxAllowedT) == maxAllowed); + + if (!mBytes.Length() || !countElements) + return true; + + UniquePtr<WebGLElementArrayCacheTree<T>>& tree = TreeForType<T>::Value(this); + if (!tree) { + tree = MakeUnique<WebGLElementArrayCacheTree<T>>(*this); + if (mBytes.Length()) { + bool valid = tree->Update(0, mBytes.Length() - 1); + if (!valid) { + // Do not assert here. This case would happen if an allocation + // failed. We've already settled on fallible allocations around + // here. + tree = nullptr; + return false; + } + } + } + + size_t lastElement = firstElement + countElements - 1; + + // Fast-exit path when the global maximum for the whole element array buffer + // falls in the allowed range: + T globalMax = tree->GlobalMaximum(); + if (globalMax <= maxAllowedT) + return true; + + const T* elements = Elements<T>(); + + // Before calling tree->Validate, we have to validate ourselves the + // boundaries of the elements span, to round them to the nearest multiple of + // kElementsPerLeaf. + size_t firstElementAdjustmentEnd = std::min(lastElement, + tree->LastElementUnderSameLeaf(firstElement)); + while (firstElement <= firstElementAdjustmentEnd) { + const T& curData = elements[firstElement]; + if (curData > maxAllowedT) + return false; + + firstElement++; + } + size_t lastElementAdjustmentEnd = std::max(firstElement, + tree->FirstElementUnderSameLeaf(lastElement)); + while (lastElement >= lastElementAdjustmentEnd) { + const T& curData = elements[lastElement]; + if (curData > maxAllowedT) + return false; + + lastElement--; + } + + // at this point, for many tiny validations, we're already done. + if (firstElement > lastElement) + return true; + + // general case + return tree->Validate(maxAllowedT, tree->LeafForElement(firstElement), + tree->LeafForElement(lastElement)); +} + +bool +WebGLElementArrayCache::Validate(GLenum type, uint32_t maxAllowed, + size_t firstElement, size_t countElements) +{ + if (type == LOCAL_GL_UNSIGNED_BYTE) + return Validate<uint8_t>(maxAllowed, firstElement, countElements); + if (type == LOCAL_GL_UNSIGNED_SHORT) + return Validate<uint16_t>(maxAllowed, firstElement, countElements); + if (type == LOCAL_GL_UNSIGNED_INT) + return Validate<uint32_t>(maxAllowed, firstElement, countElements); + + MOZ_ASSERT(false, "Invalid type."); + return false; +} + +template<typename T> +static size_t +SizeOfNullable(mozilla::MallocSizeOf mallocSizeOf, const T& obj) +{ + if (!obj) + return 0; + return obj->SizeOfIncludingThis(mallocSizeOf); +} + +size_t +WebGLElementArrayCache::SizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const +{ + return mallocSizeOf(this) + + mBytes.ShallowSizeOfExcludingThis(mallocSizeOf) + + SizeOfNullable(mallocSizeOf, mUint8Tree) + + SizeOfNullable(mallocSizeOf, mUint16Tree) + + SizeOfNullable(mallocSizeOf, mUint32Tree); +} + +bool +WebGLElementArrayCache::BeenUsedWithMultipleTypes() const +{ + // C++ Standard ($4.7) + // "If the source type is bool, the value false is converted to zero and + // the value true is converted to one." + const int num_types_used = (mUint8Tree != nullptr) + + (mUint16Tree != nullptr) + + (mUint32Tree != nullptr); + return num_types_used > 1; +} + +} // end namespace mozilla |