summaryrefslogtreecommitdiffstats
path: root/xpcom/glue/PLDHashTable.h
diff options
context:
space:
mode:
Diffstat (limited to 'xpcom/glue/PLDHashTable.h')
-rw-r--r--xpcom/glue/PLDHashTable.h621
1 files changed, 621 insertions, 0 deletions
diff --git a/xpcom/glue/PLDHashTable.h b/xpcom/glue/PLDHashTable.h
new file mode 100644
index 000000000..cd1323dbe
--- /dev/null
+++ b/xpcom/glue/PLDHashTable.h
@@ -0,0 +1,621 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PLDHashTable_h
+#define PLDHashTable_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Attributes.h" // for MOZ_ALWAYS_INLINE
+#include "mozilla/fallible.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/Move.h"
+#include "mozilla/Types.h"
+#include "nscore.h"
+
+typedef uint32_t PLDHashNumber;
+
+class PLDHashTable;
+struct PLDHashTableOps;
+
+// Table entry header structure.
+//
+// In order to allow in-line allocation of key and value, we do not declare
+// either here. Instead, the API uses const void *key as a formal parameter.
+// The key need not be stored in the entry; it may be part of the value, but
+// need not be stored at all.
+//
+// Callback types are defined below and grouped into the PLDHashTableOps
+// structure, for single static initialization per hash table sub-type.
+//
+// Each hash table sub-type should make its entry type a subclass of
+// PLDHashEntryHdr. The mKeyHash member contains the result of multiplying the
+// hash code returned from the hashKey callback (see below) by kGoldenRatio,
+// then constraining the result to avoid the magic 0 and 1 values. The stored
+// mKeyHash value is table size invariant, and it is maintained automatically
+// -- users need never access it.
+struct PLDHashEntryHdr
+{
+private:
+ friend class PLDHashTable;
+
+ PLDHashNumber mKeyHash;
+};
+
+#ifdef DEBUG
+
+// This class does three kinds of checking:
+//
+// - that calls to one of |mOps| or to an enumerator do not cause re-entry into
+// the table in an unsafe way;
+//
+// - that multiple threads do not access the table in an unsafe way;
+//
+// - that a table marked as immutable is not modified.
+//
+// "Safe" here means that multiple concurrent read operations are ok, but a
+// write operation (i.e. one that can cause the entry storage to be reallocated
+// or destroyed) cannot safely run concurrently with another read or write
+// operation. This meaning of "safe" is only partial; for example, it does not
+// cover whether a single entry in the table is modified by two separate
+// threads. (Doing such checking would be much harder.)
+//
+// It does this with two variables:
+//
+// - mState, which embodies a tri-stage tagged union with the following
+// variants:
+// - Idle
+// - Read(n), where 'n' is the number of concurrent read operations
+// - Write
+//
+// - mIsWritable, which indicates if the table is mutable.
+//
+class Checker
+{
+public:
+ constexpr Checker() : mState(kIdle), mIsWritable(1) {}
+
+ Checker& operator=(Checker&& aOther) {
+ // Atomic<> doesn't have an |operator=(Atomic<>&&)|.
+ mState = uint32_t(aOther.mState);
+ mIsWritable = uint32_t(aOther.mIsWritable);
+
+ aOther.mState = kIdle;
+
+ return *this;
+ }
+
+ static bool IsIdle(uint32_t aState) { return aState == kIdle; }
+ static bool IsRead(uint32_t aState) { return kRead1 <= aState &&
+ aState <= kReadMax; }
+ static bool IsRead1(uint32_t aState) { return aState == kRead1; }
+ static bool IsWrite(uint32_t aState) { return aState == kWrite; }
+
+ bool IsIdle() const { return mState == kIdle; }
+
+ bool IsWritable() const { return !!mIsWritable; }
+
+ void SetNonWritable() { mIsWritable = 0; }
+
+ // NOTE: the obvious way to implement these functions is to (a) check
+ // |mState| is reasonable, and then (b) update |mState|. But the lack of
+ // atomicity in such an implementation can cause problems if we get unlucky
+ // thread interleaving between (a) and (b).
+ //
+ // So instead for |mState| we are careful to (a) first get |mState|'s old
+ // value and assign it a new value in single atomic operation, and only then
+ // (b) check the old value was reasonable. This ensures we don't have
+ // interleaving problems.
+ //
+ // For |mIsWritable| we don't need to be as careful because it can only in
+ // transition in one direction (from writable to non-writable).
+
+ void StartReadOp()
+ {
+ uint32_t oldState = mState++; // this is an atomic increment
+ MOZ_ASSERT(IsIdle(oldState) || IsRead(oldState));
+ MOZ_ASSERT(oldState < kReadMax); // check for overflow
+ }
+
+ void EndReadOp()
+ {
+ uint32_t oldState = mState--; // this is an atomic decrement
+ MOZ_ASSERT(IsRead(oldState));
+ }
+
+ void StartWriteOp()
+ {
+ MOZ_ASSERT(IsWritable());
+ uint32_t oldState = mState.exchange(kWrite);
+ MOZ_ASSERT(IsIdle(oldState));
+ }
+
+ void EndWriteOp()
+ {
+ // Check again that the table is writable, in case it was marked as
+ // non-writable just after the IsWritable() assertion in StartWriteOp()
+ // occurred.
+ MOZ_ASSERT(IsWritable());
+ uint32_t oldState = mState.exchange(kIdle);
+ MOZ_ASSERT(IsWrite(oldState));
+ }
+
+ void StartIteratorRemovalOp()
+ {
+ // When doing removals at the end of iteration, we go from Read1 state to
+ // Write and then back.
+ MOZ_ASSERT(IsWritable());
+ uint32_t oldState = mState.exchange(kWrite);
+ MOZ_ASSERT(IsRead1(oldState));
+ }
+
+ void EndIteratorRemovalOp()
+ {
+ // Check again that the table is writable, in case it was marked as
+ // non-writable just after the IsWritable() assertion in
+ // StartIteratorRemovalOp() occurred.
+ MOZ_ASSERT(IsWritable());
+ uint32_t oldState = mState.exchange(kRead1);
+ MOZ_ASSERT(IsWrite(oldState));
+ }
+
+ void StartDestructorOp()
+ {
+ // A destructor op is like a write, but the table doesn't need to be
+ // writable.
+ uint32_t oldState = mState.exchange(kWrite);
+ MOZ_ASSERT(IsIdle(oldState));
+ }
+
+ void EndDestructorOp()
+ {
+ uint32_t oldState = mState.exchange(kIdle);
+ MOZ_ASSERT(IsWrite(oldState));
+ }
+
+private:
+ // Things of note about the representation of |mState|.
+ // - The values between kRead1..kReadMax represent valid Read(n) values.
+ // - kIdle and kRead1 are deliberately chosen so that incrementing the -
+ // former gives the latter.
+ // - 9999 concurrent readers should be enough for anybody.
+ static const uint32_t kIdle = 0;
+ static const uint32_t kRead1 = 1;
+ static const uint32_t kReadMax = 9999;
+ static const uint32_t kWrite = 10000;
+
+ mutable mozilla::Atomic<uint32_t> mState;
+ mutable mozilla::Atomic<uint32_t> mIsWritable;
+};
+#endif
+
+// A PLDHashTable may be allocated on the stack or within another structure or
+// class. No entry storage is allocated until the first element is added. This
+// means that empty hash tables are cheap, which is good because they are
+// common.
+//
+// There used to be a long, math-heavy comment here about the merits of
+// double hashing vs. chaining; it was removed in bug 1058335. In short, double
+// hashing is more space-efficient unless the element size gets large (in which
+// case you should keep using double hashing but switch to using pointer
+// elements). Also, with double hashing, you can't safely hold an entry pointer
+// and use it after an add or remove operation, unless you sample Generation()
+// before adding or removing, and compare the sample after, dereferencing the
+// entry pointer only if Generation() has not changed.
+class PLDHashTable
+{
+private:
+ // This class maintains the invariant that every time the entry store is
+ // changed, the generation is updated.
+ class EntryStore
+ {
+ private:
+ char* mEntryStore;
+ uint32_t mGeneration;
+
+ public:
+ EntryStore() : mEntryStore(nullptr), mGeneration(0) {}
+
+ ~EntryStore()
+ {
+ free(mEntryStore);
+ mEntryStore = nullptr;
+ mGeneration++; // a little paranoid, but why not be extra safe?
+ }
+
+ char* Get() { return mEntryStore; }
+ const char* Get() const { return mEntryStore; }
+
+ void Set(char* aEntryStore)
+ {
+ mEntryStore = aEntryStore;
+ mGeneration++;
+ }
+
+ uint32_t Generation() const { return mGeneration; }
+ };
+
+ const PLDHashTableOps* const mOps; // Virtual operations; see below.
+ int16_t mHashShift; // Multiplicative hash shift.
+ const uint32_t mEntrySize; // Number of bytes in an entry.
+ uint32_t mEntryCount; // Number of entries in table.
+ uint32_t mRemovedCount; // Removed entry sentinels in table.
+ EntryStore mEntryStore; // (Lazy) entry storage and generation.
+
+#ifdef DEBUG
+ mutable Checker mChecker;
+#endif
+
+public:
+ // Table capacity limit; do not exceed. The max capacity used to be 1<<23 but
+ // that occasionally that wasn't enough. Making it much bigger than 1<<26
+ // probably isn't worthwhile -- tables that big are kind of ridiculous.
+ // Also, the growth operation will (deliberately) fail if |capacity *
+ // mEntrySize| overflows a uint32_t, and mEntrySize is always at least 8
+ // bytes.
+ static const uint32_t kMaxCapacity = ((uint32_t)1 << 26);
+
+ static const uint32_t kMinCapacity = 8;
+
+ // Making this half of kMaxCapacity ensures it'll fit. Nobody should need an
+ // initial length anywhere nearly this large, anyway.
+ static const uint32_t kMaxInitialLength = kMaxCapacity / 2;
+
+ // This gives a default initial capacity of 8.
+ static const uint32_t kDefaultInitialLength = 4;
+
+ // Initialize the table with |aOps| and |aEntrySize|. The table's initial
+ // capacity is chosen such that |aLength| elements can be inserted without
+ // rehashing; if |aLength| is a power-of-two, this capacity will be
+ // |2*length|. However, because entry storage is allocated lazily, this
+ // initial capacity won't be relevant until the first element is added; prior
+ // to that the capacity will be zero.
+ //
+ // This will crash if |aEntrySize| and/or |aLength| are too large.
+ PLDHashTable(const PLDHashTableOps* aOps, uint32_t aEntrySize,
+ uint32_t aLength = kDefaultInitialLength);
+
+ PLDHashTable(PLDHashTable&& aOther)
+ // These two fields are |const|. Initialize them here because the
+ // move assignment operator cannot modify them.
+ : mOps(aOther.mOps)
+ , mEntrySize(aOther.mEntrySize)
+ // Initialize this field because it is required for a safe call to the
+ // destructor, which the move assignment operator does.
+ , mEntryStore()
+#ifdef DEBUG
+ , mChecker()
+#endif
+ {
+ *this = mozilla::Move(aOther);
+ }
+
+ PLDHashTable& operator=(PLDHashTable&& aOther);
+
+ ~PLDHashTable();
+
+ // This should be used rarely.
+ const PLDHashTableOps* Ops() const { return mOps; }
+
+ // Size in entries (gross, not net of free and removed sentinels) for table.
+ // This can be zero if no elements have been added yet, in which case the
+ // entry storage will not have yet been allocated.
+ uint32_t Capacity() const
+ {
+ return mEntryStore.Get() ? CapacityFromHashShift() : 0;
+ }
+
+ uint32_t EntrySize() const { return mEntrySize; }
+ uint32_t EntryCount() const { return mEntryCount; }
+ uint32_t Generation() const { return mEntryStore.Generation(); }
+
+ // To search for a |key| in |table|, call:
+ //
+ // entry = table.Search(key);
+ //
+ // If |entry| is non-null, |key| was found. If |entry| is null, key was not
+ // found.
+ PLDHashEntryHdr* Search(const void* aKey);
+
+ // To add an entry identified by |key| to table, call:
+ //
+ // entry = table.Add(key, mozilla::fallible);
+ //
+ // If |entry| is null upon return, then the table is severely overloaded and
+ // memory can't be allocated for entry storage.
+ //
+ // Otherwise, |aEntry->mKeyHash| has been set so that
+ // PLDHashTable::EntryIsFree(entry) is false, and it is up to the caller to
+ // initialize the key and value parts of the entry sub-type, if they have not
+ // been set already (i.e. if entry was not already in the table, and if the
+ // optional initEntry hook was not used).
+ PLDHashEntryHdr* Add(const void* aKey, const mozilla::fallible_t&);
+
+ // This is like the other Add() function, but infallible, and so never
+ // returns null.
+ PLDHashEntryHdr* Add(const void* aKey);
+
+ // To remove an entry identified by |key| from table, call:
+ //
+ // table.Remove(key);
+ //
+ // If |key|'s entry is found, it is cleared (via table->mOps->clearEntry).
+ // The table's capacity may be reduced afterwards.
+ void Remove(const void* aKey);
+
+ // To remove an entry found by a prior search, call:
+ //
+ // table.RemoveEntry(entry);
+ //
+ // The entry, which must be present and in use, is cleared (via
+ // table->mOps->clearEntry). The table's capacity may be reduced afterwards.
+ void RemoveEntry(PLDHashEntryHdr* aEntry);
+
+ // Remove an entry already accessed via Search() or Add().
+ //
+ // NB: this is a "raw" or low-level method. It does not shrink the table if
+ // it is underloaded. Don't use it unless necessary and you know what you are
+ // doing, and if so, please explain in a comment why it is necessary instead
+ // of RemoveEntry().
+ void RawRemove(PLDHashEntryHdr* aEntry);
+
+ // This function is equivalent to
+ // ClearAndPrepareForLength(kDefaultInitialLength).
+ void Clear();
+
+ // This function clears the table's contents and frees its entry storage,
+ // leaving it in a empty state ready to be used again. Afterwards, when the
+ // first element is added the entry storage that gets allocated will have a
+ // capacity large enough to fit |aLength| elements without rehashing.
+ //
+ // It's conceptually the same as calling the destructor and then re-calling
+ // the constructor with the original |aOps| and |aEntrySize| arguments, and
+ // a new |aLength| argument.
+ void ClearAndPrepareForLength(uint32_t aLength);
+
+ // Measure the size of the table's entry storage. If the entries contain
+ // pointers to other heap blocks, you have to iterate over the table and
+ // measure those separately; hence the "Shallow" prefix.
+ size_t ShallowSizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+ // Like ShallowSizeOfExcludingThis(), but includes sizeof(*this).
+ size_t ShallowSizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+#ifdef DEBUG
+ // Mark a table as immutable for the remainder of its lifetime. This
+ // changes the implementation from asserting one set of invariants to
+ // asserting a different set.
+ void MarkImmutable();
+#endif
+
+ // If you use PLDHashEntryStub or a subclass of it as your entry struct, and
+ // if your entries move via memcpy and clear via memset(0), you can use these
+ // stub operations.
+ static const PLDHashTableOps* StubOps();
+
+ // The individual stub operations in StubOps().
+ static PLDHashNumber HashVoidPtrKeyStub(const void* aKey);
+ static bool MatchEntryStub(const PLDHashEntryHdr* aEntry, const void* aKey);
+ static void MoveEntryStub(PLDHashTable* aTable, const PLDHashEntryHdr* aFrom,
+ PLDHashEntryHdr* aTo);
+ static void ClearEntryStub(PLDHashTable* aTable, PLDHashEntryHdr* aEntry);
+
+ // Hash/match operations for tables holding C strings.
+ static PLDHashNumber HashStringKey(const void* aKey);
+ static bool MatchStringKey(const PLDHashEntryHdr* aEntry, const void* aKey);
+
+ // This is an iterator for PLDHashtable. Assertions will detect some, but not
+ // all, mid-iteration table modifications that might invalidate (e.g.
+ // reallocate) the entry storage.
+ //
+ // Any element can be removed during iteration using Remove(). If any
+ // elements are removed, the table may be resized once iteration ends.
+ //
+ // Example usage:
+ //
+ // for (auto iter = table.Iter(); !iter.Done(); iter.Next()) {
+ // auto entry = static_cast<FooEntry*>(iter.Get());
+ // // ... do stuff with |entry| ...
+ // // ... possibly call iter.Remove() once ...
+ // }
+ //
+ // or:
+ //
+ // for (PLDHashTable::Iterator iter(&table); !iter.Done(); iter.Next()) {
+ // auto entry = static_cast<FooEntry*>(iter.Get());
+ // // ... do stuff with |entry| ...
+ // // ... possibly call iter.Remove() once ...
+ // }
+ //
+ // The latter form is more verbose but is easier to work with when
+ // making subclasses of Iterator.
+ //
+ class Iterator
+ {
+ public:
+ explicit Iterator(PLDHashTable* aTable);
+ Iterator(Iterator&& aOther);
+ ~Iterator();
+
+ // Have we finished?
+ bool Done() const { return mNexts == mNextsLimit; }
+
+ // Get the current entry.
+ PLDHashEntryHdr* Get() const
+ {
+ MOZ_ASSERT(!Done());
+
+ PLDHashEntryHdr* entry = reinterpret_cast<PLDHashEntryHdr*>(mCurrent);
+ MOZ_ASSERT(EntryIsLive(entry));
+ return entry;
+ }
+
+ // Advance to the next entry.
+ void Next();
+
+ // Remove the current entry. Must only be called once per entry, and Get()
+ // must not be called on that entry afterwards.
+ void Remove();
+
+ protected:
+ PLDHashTable* mTable; // Main table pointer.
+
+ private:
+ char* mStart; // The first entry.
+ char* mLimit; // One past the last entry.
+ char* mCurrent; // Pointer to the current entry.
+ uint32_t mNexts; // Number of Next() calls.
+ uint32_t mNextsLimit; // Next() call limit.
+
+ bool mHaveRemoved; // Have any elements been removed?
+
+ bool IsOnNonLiveEntry() const;
+ void MoveToNextEntry();
+
+ Iterator() = delete;
+ Iterator(const Iterator&) = delete;
+ Iterator& operator=(const Iterator&) = delete;
+ Iterator& operator=(const Iterator&&) = delete;
+ };
+
+ Iterator Iter() { return Iterator(this); }
+
+ // Use this if you need to initialize an Iterator in a const method. If you
+ // use this case, you should not call Remove() on the iterator.
+ Iterator ConstIter() const
+ {
+ return Iterator(const_cast<PLDHashTable*>(this));
+ }
+
+private:
+ // Multiplicative hash uses an unsigned 32 bit integer and the golden ratio,
+ // expressed as a fixed-point 32-bit fraction.
+ static const uint32_t kHashBits = 32;
+ static const uint32_t kGoldenRatio = 0x9E3779B9U;
+
+ static uint32_t HashShift(uint32_t aEntrySize, uint32_t aLength);
+
+ static const PLDHashNumber kCollisionFlag = 1;
+
+ static bool EntryIsFree(PLDHashEntryHdr* aEntry)
+ {
+ return aEntry->mKeyHash == 0;
+ }
+ static bool EntryIsRemoved(PLDHashEntryHdr* aEntry)
+ {
+ return aEntry->mKeyHash == 1;
+ }
+ static bool EntryIsLive(PLDHashEntryHdr* aEntry)
+ {
+ return aEntry->mKeyHash >= 2;
+ }
+
+ static void MarkEntryFree(PLDHashEntryHdr* aEntry)
+ {
+ aEntry->mKeyHash = 0;
+ }
+ static void MarkEntryRemoved(PLDHashEntryHdr* aEntry)
+ {
+ aEntry->mKeyHash = 1;
+ }
+
+ PLDHashNumber Hash1(PLDHashNumber aHash0);
+ void Hash2(PLDHashNumber aHash, uint32_t& aHash2Out, uint32_t& aSizeMaskOut);
+
+ static bool MatchEntryKeyhash(PLDHashEntryHdr* aEntry, PLDHashNumber aHash);
+ PLDHashEntryHdr* AddressEntry(uint32_t aIndex);
+
+ // We store mHashShift rather than sizeLog2 to optimize the collision-free
+ // case in SearchTable.
+ uint32_t CapacityFromHashShift() const
+ {
+ return ((uint32_t)1 << (kHashBits - mHashShift));
+ }
+
+ PLDHashNumber ComputeKeyHash(const void* aKey);
+
+ enum SearchReason { ForSearchOrRemove, ForAdd };
+
+ template <SearchReason Reason>
+ PLDHashEntryHdr* NS_FASTCALL
+ SearchTable(const void* aKey, PLDHashNumber aKeyHash);
+
+ PLDHashEntryHdr* FindFreeEntry(PLDHashNumber aKeyHash);
+
+ bool ChangeTable(int aDeltaLog2);
+
+ void ShrinkIfAppropriate();
+
+ PLDHashTable(const PLDHashTable& aOther) = delete;
+ PLDHashTable& operator=(const PLDHashTable& aOther) = delete;
+};
+
+// Compute the hash code for a given key to be looked up, added, or removed.
+// A hash code may have any PLDHashNumber value.
+typedef PLDHashNumber (*PLDHashHashKey)(const void* aKey);
+
+// Compare the key identifying aEntry with the provided key parameter. Return
+// true if keys match, false otherwise.
+typedef bool (*PLDHashMatchEntry)(const PLDHashEntryHdr* aEntry,
+ const void* aKey);
+
+// Copy the data starting at aFrom to the new entry storage at aTo. Do not add
+// reference counts for any strong references in the entry, however, as this
+// is a "move" operation: the old entry storage at from will be freed without
+// any reference-decrementing callback shortly.
+typedef void (*PLDHashMoveEntry)(PLDHashTable* aTable,
+ const PLDHashEntryHdr* aFrom,
+ PLDHashEntryHdr* aTo);
+
+// Clear the entry and drop any strong references it holds. This callback is
+// invoked by Remove(), but only if the given key is found in the table.
+typedef void (*PLDHashClearEntry)(PLDHashTable* aTable,
+ PLDHashEntryHdr* aEntry);
+
+// Initialize a new entry, apart from mKeyHash. This function is called when
+// Add() finds no existing entry for the given key, and must add a new one. At
+// that point, |aEntry->mKeyHash| is not set yet, to avoid claiming the last
+// free entry in a severely overloaded table.
+typedef void (*PLDHashInitEntry)(PLDHashEntryHdr* aEntry, const void* aKey);
+
+// Finally, the "vtable" structure for PLDHashTable. The first four hooks
+// must be provided by implementations; they're called unconditionally by the
+// generic PLDHashTable.cpp code. Hooks after these may be null.
+//
+// Summary of allocation-related hook usage with C++ placement new emphasis:
+// initEntry Call placement new using default key-based ctor.
+// moveEntry Call placement new using copy ctor, run dtor on old
+// entry storage.
+// clearEntry Run dtor on entry.
+//
+// Note the reason why initEntry is optional: the default hooks (stubs) clear
+// entry storage: On successful Add(tbl, key), the returned entry pointer
+// addresses an entry struct whose mKeyHash member has been set non-zero, but
+// all other entry members are still clear (null). Add() callers can test such
+// members to see whether the entry was newly created by the Add() call that
+// just succeeded. If placement new or similar initialization is required,
+// define an |initEntry| hook. Of course, the |clearEntry| hook must zero or
+// null appropriately.
+//
+// XXX assumes 0 is null for pointer types.
+struct PLDHashTableOps
+{
+ // Mandatory hooks. All implementations must provide these.
+ PLDHashHashKey hashKey;
+ PLDHashMatchEntry matchEntry;
+ PLDHashMoveEntry moveEntry;
+ PLDHashClearEntry clearEntry;
+
+ // Optional hooks start here. If null, these are not called.
+ PLDHashInitEntry initEntry;
+};
+
+// A minimal entry is a subclass of PLDHashEntryHdr and has a void* key pointer.
+struct PLDHashEntryStub : public PLDHashEntryHdr
+{
+ const void* key;
+};
+
+#endif /* PLDHashTable_h */