diff options
Diffstat (limited to 'xpcom/glue/PLDHashTable.h')
-rw-r--r-- | xpcom/glue/PLDHashTable.h | 621 |
1 files changed, 621 insertions, 0 deletions
diff --git a/xpcom/glue/PLDHashTable.h b/xpcom/glue/PLDHashTable.h new file mode 100644 index 000000000..cd1323dbe --- /dev/null +++ b/xpcom/glue/PLDHashTable.h @@ -0,0 +1,621 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PLDHashTable_h +#define PLDHashTable_h + +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" // for MOZ_ALWAYS_INLINE +#include "mozilla/fallible.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/Move.h" +#include "mozilla/Types.h" +#include "nscore.h" + +typedef uint32_t PLDHashNumber; + +class PLDHashTable; +struct PLDHashTableOps; + +// Table entry header structure. +// +// In order to allow in-line allocation of key and value, we do not declare +// either here. Instead, the API uses const void *key as a formal parameter. +// The key need not be stored in the entry; it may be part of the value, but +// need not be stored at all. +// +// Callback types are defined below and grouped into the PLDHashTableOps +// structure, for single static initialization per hash table sub-type. +// +// Each hash table sub-type should make its entry type a subclass of +// PLDHashEntryHdr. The mKeyHash member contains the result of multiplying the +// hash code returned from the hashKey callback (see below) by kGoldenRatio, +// then constraining the result to avoid the magic 0 and 1 values. The stored +// mKeyHash value is table size invariant, and it is maintained automatically +// -- users need never access it. +struct PLDHashEntryHdr +{ +private: + friend class PLDHashTable; + + PLDHashNumber mKeyHash; +}; + +#ifdef DEBUG + +// This class does three kinds of checking: +// +// - that calls to one of |mOps| or to an enumerator do not cause re-entry into +// the table in an unsafe way; +// +// - that multiple threads do not access the table in an unsafe way; +// +// - that a table marked as immutable is not modified. +// +// "Safe" here means that multiple concurrent read operations are ok, but a +// write operation (i.e. one that can cause the entry storage to be reallocated +// or destroyed) cannot safely run concurrently with another read or write +// operation. This meaning of "safe" is only partial; for example, it does not +// cover whether a single entry in the table is modified by two separate +// threads. (Doing such checking would be much harder.) +// +// It does this with two variables: +// +// - mState, which embodies a tri-stage tagged union with the following +// variants: +// - Idle +// - Read(n), where 'n' is the number of concurrent read operations +// - Write +// +// - mIsWritable, which indicates if the table is mutable. +// +class Checker +{ +public: + constexpr Checker() : mState(kIdle), mIsWritable(1) {} + + Checker& operator=(Checker&& aOther) { + // Atomic<> doesn't have an |operator=(Atomic<>&&)|. + mState = uint32_t(aOther.mState); + mIsWritable = uint32_t(aOther.mIsWritable); + + aOther.mState = kIdle; + + return *this; + } + + static bool IsIdle(uint32_t aState) { return aState == kIdle; } + static bool IsRead(uint32_t aState) { return kRead1 <= aState && + aState <= kReadMax; } + static bool IsRead1(uint32_t aState) { return aState == kRead1; } + static bool IsWrite(uint32_t aState) { return aState == kWrite; } + + bool IsIdle() const { return mState == kIdle; } + + bool IsWritable() const { return !!mIsWritable; } + + void SetNonWritable() { mIsWritable = 0; } + + // NOTE: the obvious way to implement these functions is to (a) check + // |mState| is reasonable, and then (b) update |mState|. But the lack of + // atomicity in such an implementation can cause problems if we get unlucky + // thread interleaving between (a) and (b). + // + // So instead for |mState| we are careful to (a) first get |mState|'s old + // value and assign it a new value in single atomic operation, and only then + // (b) check the old value was reasonable. This ensures we don't have + // interleaving problems. + // + // For |mIsWritable| we don't need to be as careful because it can only in + // transition in one direction (from writable to non-writable). + + void StartReadOp() + { + uint32_t oldState = mState++; // this is an atomic increment + MOZ_ASSERT(IsIdle(oldState) || IsRead(oldState)); + MOZ_ASSERT(oldState < kReadMax); // check for overflow + } + + void EndReadOp() + { + uint32_t oldState = mState--; // this is an atomic decrement + MOZ_ASSERT(IsRead(oldState)); + } + + void StartWriteOp() + { + MOZ_ASSERT(IsWritable()); + uint32_t oldState = mState.exchange(kWrite); + MOZ_ASSERT(IsIdle(oldState)); + } + + void EndWriteOp() + { + // Check again that the table is writable, in case it was marked as + // non-writable just after the IsWritable() assertion in StartWriteOp() + // occurred. + MOZ_ASSERT(IsWritable()); + uint32_t oldState = mState.exchange(kIdle); + MOZ_ASSERT(IsWrite(oldState)); + } + + void StartIteratorRemovalOp() + { + // When doing removals at the end of iteration, we go from Read1 state to + // Write and then back. + MOZ_ASSERT(IsWritable()); + uint32_t oldState = mState.exchange(kWrite); + MOZ_ASSERT(IsRead1(oldState)); + } + + void EndIteratorRemovalOp() + { + // Check again that the table is writable, in case it was marked as + // non-writable just after the IsWritable() assertion in + // StartIteratorRemovalOp() occurred. + MOZ_ASSERT(IsWritable()); + uint32_t oldState = mState.exchange(kRead1); + MOZ_ASSERT(IsWrite(oldState)); + } + + void StartDestructorOp() + { + // A destructor op is like a write, but the table doesn't need to be + // writable. + uint32_t oldState = mState.exchange(kWrite); + MOZ_ASSERT(IsIdle(oldState)); + } + + void EndDestructorOp() + { + uint32_t oldState = mState.exchange(kIdle); + MOZ_ASSERT(IsWrite(oldState)); + } + +private: + // Things of note about the representation of |mState|. + // - The values between kRead1..kReadMax represent valid Read(n) values. + // - kIdle and kRead1 are deliberately chosen so that incrementing the - + // former gives the latter. + // - 9999 concurrent readers should be enough for anybody. + static const uint32_t kIdle = 0; + static const uint32_t kRead1 = 1; + static const uint32_t kReadMax = 9999; + static const uint32_t kWrite = 10000; + + mutable mozilla::Atomic<uint32_t> mState; + mutable mozilla::Atomic<uint32_t> mIsWritable; +}; +#endif + +// A PLDHashTable may be allocated on the stack or within another structure or +// class. No entry storage is allocated until the first element is added. This +// means that empty hash tables are cheap, which is good because they are +// common. +// +// There used to be a long, math-heavy comment here about the merits of +// double hashing vs. chaining; it was removed in bug 1058335. In short, double +// hashing is more space-efficient unless the element size gets large (in which +// case you should keep using double hashing but switch to using pointer +// elements). Also, with double hashing, you can't safely hold an entry pointer +// and use it after an add or remove operation, unless you sample Generation() +// before adding or removing, and compare the sample after, dereferencing the +// entry pointer only if Generation() has not changed. +class PLDHashTable +{ +private: + // This class maintains the invariant that every time the entry store is + // changed, the generation is updated. + class EntryStore + { + private: + char* mEntryStore; + uint32_t mGeneration; + + public: + EntryStore() : mEntryStore(nullptr), mGeneration(0) {} + + ~EntryStore() + { + free(mEntryStore); + mEntryStore = nullptr; + mGeneration++; // a little paranoid, but why not be extra safe? + } + + char* Get() { return mEntryStore; } + const char* Get() const { return mEntryStore; } + + void Set(char* aEntryStore) + { + mEntryStore = aEntryStore; + mGeneration++; + } + + uint32_t Generation() const { return mGeneration; } + }; + + const PLDHashTableOps* const mOps; // Virtual operations; see below. + int16_t mHashShift; // Multiplicative hash shift. + const uint32_t mEntrySize; // Number of bytes in an entry. + uint32_t mEntryCount; // Number of entries in table. + uint32_t mRemovedCount; // Removed entry sentinels in table. + EntryStore mEntryStore; // (Lazy) entry storage and generation. + +#ifdef DEBUG + mutable Checker mChecker; +#endif + +public: + // Table capacity limit; do not exceed. The max capacity used to be 1<<23 but + // that occasionally that wasn't enough. Making it much bigger than 1<<26 + // probably isn't worthwhile -- tables that big are kind of ridiculous. + // Also, the growth operation will (deliberately) fail if |capacity * + // mEntrySize| overflows a uint32_t, and mEntrySize is always at least 8 + // bytes. + static const uint32_t kMaxCapacity = ((uint32_t)1 << 26); + + static const uint32_t kMinCapacity = 8; + + // Making this half of kMaxCapacity ensures it'll fit. Nobody should need an + // initial length anywhere nearly this large, anyway. + static const uint32_t kMaxInitialLength = kMaxCapacity / 2; + + // This gives a default initial capacity of 8. + static const uint32_t kDefaultInitialLength = 4; + + // Initialize the table with |aOps| and |aEntrySize|. The table's initial + // capacity is chosen such that |aLength| elements can be inserted without + // rehashing; if |aLength| is a power-of-two, this capacity will be + // |2*length|. However, because entry storage is allocated lazily, this + // initial capacity won't be relevant until the first element is added; prior + // to that the capacity will be zero. + // + // This will crash if |aEntrySize| and/or |aLength| are too large. + PLDHashTable(const PLDHashTableOps* aOps, uint32_t aEntrySize, + uint32_t aLength = kDefaultInitialLength); + + PLDHashTable(PLDHashTable&& aOther) + // These two fields are |const|. Initialize them here because the + // move assignment operator cannot modify them. + : mOps(aOther.mOps) + , mEntrySize(aOther.mEntrySize) + // Initialize this field because it is required for a safe call to the + // destructor, which the move assignment operator does. + , mEntryStore() +#ifdef DEBUG + , mChecker() +#endif + { + *this = mozilla::Move(aOther); + } + + PLDHashTable& operator=(PLDHashTable&& aOther); + + ~PLDHashTable(); + + // This should be used rarely. + const PLDHashTableOps* Ops() const { return mOps; } + + // Size in entries (gross, not net of free and removed sentinels) for table. + // This can be zero if no elements have been added yet, in which case the + // entry storage will not have yet been allocated. + uint32_t Capacity() const + { + return mEntryStore.Get() ? CapacityFromHashShift() : 0; + } + + uint32_t EntrySize() const { return mEntrySize; } + uint32_t EntryCount() const { return mEntryCount; } + uint32_t Generation() const { return mEntryStore.Generation(); } + + // To search for a |key| in |table|, call: + // + // entry = table.Search(key); + // + // If |entry| is non-null, |key| was found. If |entry| is null, key was not + // found. + PLDHashEntryHdr* Search(const void* aKey); + + // To add an entry identified by |key| to table, call: + // + // entry = table.Add(key, mozilla::fallible); + // + // If |entry| is null upon return, then the table is severely overloaded and + // memory can't be allocated for entry storage. + // + // Otherwise, |aEntry->mKeyHash| has been set so that + // PLDHashTable::EntryIsFree(entry) is false, and it is up to the caller to + // initialize the key and value parts of the entry sub-type, if they have not + // been set already (i.e. if entry was not already in the table, and if the + // optional initEntry hook was not used). + PLDHashEntryHdr* Add(const void* aKey, const mozilla::fallible_t&); + + // This is like the other Add() function, but infallible, and so never + // returns null. + PLDHashEntryHdr* Add(const void* aKey); + + // To remove an entry identified by |key| from table, call: + // + // table.Remove(key); + // + // If |key|'s entry is found, it is cleared (via table->mOps->clearEntry). + // The table's capacity may be reduced afterwards. + void Remove(const void* aKey); + + // To remove an entry found by a prior search, call: + // + // table.RemoveEntry(entry); + // + // The entry, which must be present and in use, is cleared (via + // table->mOps->clearEntry). The table's capacity may be reduced afterwards. + void RemoveEntry(PLDHashEntryHdr* aEntry); + + // Remove an entry already accessed via Search() or Add(). + // + // NB: this is a "raw" or low-level method. It does not shrink the table if + // it is underloaded. Don't use it unless necessary and you know what you are + // doing, and if so, please explain in a comment why it is necessary instead + // of RemoveEntry(). + void RawRemove(PLDHashEntryHdr* aEntry); + + // This function is equivalent to + // ClearAndPrepareForLength(kDefaultInitialLength). + void Clear(); + + // This function clears the table's contents and frees its entry storage, + // leaving it in a empty state ready to be used again. Afterwards, when the + // first element is added the entry storage that gets allocated will have a + // capacity large enough to fit |aLength| elements without rehashing. + // + // It's conceptually the same as calling the destructor and then re-calling + // the constructor with the original |aOps| and |aEntrySize| arguments, and + // a new |aLength| argument. + void ClearAndPrepareForLength(uint32_t aLength); + + // Measure the size of the table's entry storage. If the entries contain + // pointers to other heap blocks, you have to iterate over the table and + // measure those separately; hence the "Shallow" prefix. + size_t ShallowSizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + // Like ShallowSizeOfExcludingThis(), but includes sizeof(*this). + size_t ShallowSizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + +#ifdef DEBUG + // Mark a table as immutable for the remainder of its lifetime. This + // changes the implementation from asserting one set of invariants to + // asserting a different set. + void MarkImmutable(); +#endif + + // If you use PLDHashEntryStub or a subclass of it as your entry struct, and + // if your entries move via memcpy and clear via memset(0), you can use these + // stub operations. + static const PLDHashTableOps* StubOps(); + + // The individual stub operations in StubOps(). + static PLDHashNumber HashVoidPtrKeyStub(const void* aKey); + static bool MatchEntryStub(const PLDHashEntryHdr* aEntry, const void* aKey); + static void MoveEntryStub(PLDHashTable* aTable, const PLDHashEntryHdr* aFrom, + PLDHashEntryHdr* aTo); + static void ClearEntryStub(PLDHashTable* aTable, PLDHashEntryHdr* aEntry); + + // Hash/match operations for tables holding C strings. + static PLDHashNumber HashStringKey(const void* aKey); + static bool MatchStringKey(const PLDHashEntryHdr* aEntry, const void* aKey); + + // This is an iterator for PLDHashtable. Assertions will detect some, but not + // all, mid-iteration table modifications that might invalidate (e.g. + // reallocate) the entry storage. + // + // Any element can be removed during iteration using Remove(). If any + // elements are removed, the table may be resized once iteration ends. + // + // Example usage: + // + // for (auto iter = table.Iter(); !iter.Done(); iter.Next()) { + // auto entry = static_cast<FooEntry*>(iter.Get()); + // // ... do stuff with |entry| ... + // // ... possibly call iter.Remove() once ... + // } + // + // or: + // + // for (PLDHashTable::Iterator iter(&table); !iter.Done(); iter.Next()) { + // auto entry = static_cast<FooEntry*>(iter.Get()); + // // ... do stuff with |entry| ... + // // ... possibly call iter.Remove() once ... + // } + // + // The latter form is more verbose but is easier to work with when + // making subclasses of Iterator. + // + class Iterator + { + public: + explicit Iterator(PLDHashTable* aTable); + Iterator(Iterator&& aOther); + ~Iterator(); + + // Have we finished? + bool Done() const { return mNexts == mNextsLimit; } + + // Get the current entry. + PLDHashEntryHdr* Get() const + { + MOZ_ASSERT(!Done()); + + PLDHashEntryHdr* entry = reinterpret_cast<PLDHashEntryHdr*>(mCurrent); + MOZ_ASSERT(EntryIsLive(entry)); + return entry; + } + + // Advance to the next entry. + void Next(); + + // Remove the current entry. Must only be called once per entry, and Get() + // must not be called on that entry afterwards. + void Remove(); + + protected: + PLDHashTable* mTable; // Main table pointer. + + private: + char* mStart; // The first entry. + char* mLimit; // One past the last entry. + char* mCurrent; // Pointer to the current entry. + uint32_t mNexts; // Number of Next() calls. + uint32_t mNextsLimit; // Next() call limit. + + bool mHaveRemoved; // Have any elements been removed? + + bool IsOnNonLiveEntry() const; + void MoveToNextEntry(); + + Iterator() = delete; + Iterator(const Iterator&) = delete; + Iterator& operator=(const Iterator&) = delete; + Iterator& operator=(const Iterator&&) = delete; + }; + + Iterator Iter() { return Iterator(this); } + + // Use this if you need to initialize an Iterator in a const method. If you + // use this case, you should not call Remove() on the iterator. + Iterator ConstIter() const + { + return Iterator(const_cast<PLDHashTable*>(this)); + } + +private: + // Multiplicative hash uses an unsigned 32 bit integer and the golden ratio, + // expressed as a fixed-point 32-bit fraction. + static const uint32_t kHashBits = 32; + static const uint32_t kGoldenRatio = 0x9E3779B9U; + + static uint32_t HashShift(uint32_t aEntrySize, uint32_t aLength); + + static const PLDHashNumber kCollisionFlag = 1; + + static bool EntryIsFree(PLDHashEntryHdr* aEntry) + { + return aEntry->mKeyHash == 0; + } + static bool EntryIsRemoved(PLDHashEntryHdr* aEntry) + { + return aEntry->mKeyHash == 1; + } + static bool EntryIsLive(PLDHashEntryHdr* aEntry) + { + return aEntry->mKeyHash >= 2; + } + + static void MarkEntryFree(PLDHashEntryHdr* aEntry) + { + aEntry->mKeyHash = 0; + } + static void MarkEntryRemoved(PLDHashEntryHdr* aEntry) + { + aEntry->mKeyHash = 1; + } + + PLDHashNumber Hash1(PLDHashNumber aHash0); + void Hash2(PLDHashNumber aHash, uint32_t& aHash2Out, uint32_t& aSizeMaskOut); + + static bool MatchEntryKeyhash(PLDHashEntryHdr* aEntry, PLDHashNumber aHash); + PLDHashEntryHdr* AddressEntry(uint32_t aIndex); + + // We store mHashShift rather than sizeLog2 to optimize the collision-free + // case in SearchTable. + uint32_t CapacityFromHashShift() const + { + return ((uint32_t)1 << (kHashBits - mHashShift)); + } + + PLDHashNumber ComputeKeyHash(const void* aKey); + + enum SearchReason { ForSearchOrRemove, ForAdd }; + + template <SearchReason Reason> + PLDHashEntryHdr* NS_FASTCALL + SearchTable(const void* aKey, PLDHashNumber aKeyHash); + + PLDHashEntryHdr* FindFreeEntry(PLDHashNumber aKeyHash); + + bool ChangeTable(int aDeltaLog2); + + void ShrinkIfAppropriate(); + + PLDHashTable(const PLDHashTable& aOther) = delete; + PLDHashTable& operator=(const PLDHashTable& aOther) = delete; +}; + +// Compute the hash code for a given key to be looked up, added, or removed. +// A hash code may have any PLDHashNumber value. +typedef PLDHashNumber (*PLDHashHashKey)(const void* aKey); + +// Compare the key identifying aEntry with the provided key parameter. Return +// true if keys match, false otherwise. +typedef bool (*PLDHashMatchEntry)(const PLDHashEntryHdr* aEntry, + const void* aKey); + +// Copy the data starting at aFrom to the new entry storage at aTo. Do not add +// reference counts for any strong references in the entry, however, as this +// is a "move" operation: the old entry storage at from will be freed without +// any reference-decrementing callback shortly. +typedef void (*PLDHashMoveEntry)(PLDHashTable* aTable, + const PLDHashEntryHdr* aFrom, + PLDHashEntryHdr* aTo); + +// Clear the entry and drop any strong references it holds. This callback is +// invoked by Remove(), but only if the given key is found in the table. +typedef void (*PLDHashClearEntry)(PLDHashTable* aTable, + PLDHashEntryHdr* aEntry); + +// Initialize a new entry, apart from mKeyHash. This function is called when +// Add() finds no existing entry for the given key, and must add a new one. At +// that point, |aEntry->mKeyHash| is not set yet, to avoid claiming the last +// free entry in a severely overloaded table. +typedef void (*PLDHashInitEntry)(PLDHashEntryHdr* aEntry, const void* aKey); + +// Finally, the "vtable" structure for PLDHashTable. The first four hooks +// must be provided by implementations; they're called unconditionally by the +// generic PLDHashTable.cpp code. Hooks after these may be null. +// +// Summary of allocation-related hook usage with C++ placement new emphasis: +// initEntry Call placement new using default key-based ctor. +// moveEntry Call placement new using copy ctor, run dtor on old +// entry storage. +// clearEntry Run dtor on entry. +// +// Note the reason why initEntry is optional: the default hooks (stubs) clear +// entry storage: On successful Add(tbl, key), the returned entry pointer +// addresses an entry struct whose mKeyHash member has been set non-zero, but +// all other entry members are still clear (null). Add() callers can test such +// members to see whether the entry was newly created by the Add() call that +// just succeeded. If placement new or similar initialization is required, +// define an |initEntry| hook. Of course, the |clearEntry| hook must zero or +// null appropriately. +// +// XXX assumes 0 is null for pointer types. +struct PLDHashTableOps +{ + // Mandatory hooks. All implementations must provide these. + PLDHashHashKey hashKey; + PLDHashMatchEntry matchEntry; + PLDHashMoveEntry moveEntry; + PLDHashClearEntry clearEntry; + + // Optional hooks start here. If null, these are not called. + PLDHashInitEntry initEntry; +}; + +// A minimal entry is a subclass of PLDHashEntryHdr and has a void* key pointer. +struct PLDHashEntryStub : public PLDHashEntryHdr +{ + const void* key; +}; + +#endif /* PLDHashTable_h */ |