summaryrefslogtreecommitdiffstats
path: root/js/src/vm/SharedImmutableStringsCache.h
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/vm/SharedImmutableStringsCache.h')
-rw-r--r--js/src/vm/SharedImmutableStringsCache.h468
1 files changed, 468 insertions, 0 deletions
diff --git a/js/src/vm/SharedImmutableStringsCache.h b/js/src/vm/SharedImmutableStringsCache.h
new file mode 100644
index 000000000..ddb6ff87f
--- /dev/null
+++ b/js/src/vm/SharedImmutableStringsCache.h
@@ -0,0 +1,468 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef vm_SharedImmutableStringsCache_h
+#define vm_SharedImmutableStringsCache_h
+
+#include "mozilla/Maybe.h"
+#include "mozilla/UniquePtr.h"
+
+#include <cstring>
+#include <new> // for placement new
+
+#include "jsstr.h"
+
+#include "js/HashTable.h"
+#include "js/Utility.h"
+
+#include "threading/ExclusiveData.h"
+
+#include "vm/MutexIDs.h"
+
+namespace js {
+
+class SharedImmutableString;
+class SharedImmutableTwoByteString;
+
+/**
+ * The `SharedImmutableStringsCache` allows for safely sharing and deduplicating
+ * immutable strings (either `const char*` or `const char16_t*`) between
+ * threads.
+ *
+ * The locking mechanism is dead-simple and coarse grained: a single lock guards
+ * all of the internal table itself, the table's entries, and the entries'
+ * reference counts. It is only safe to perform any mutation on the cache or any
+ * data stored within the cache when this lock is acquired.
+ */
+class SharedImmutableStringsCache
+{
+ friend class SharedImmutableString;
+ friend class SharedImmutableTwoByteString;
+ struct Hasher;
+
+ public:
+ using OwnedChars = mozilla::UniquePtr<char[], JS::FreePolicy>;
+ using OwnedTwoByteChars = mozilla::UniquePtr<char16_t[], JS::FreePolicy>;
+
+ /**
+ * Get the canonical, shared, and de-duplicated version of the given `const
+ * char*` string. If such a string does not exist, call `intoOwnedChars` and
+ * add the string it returns to the cache.
+ *
+ * `intoOwnedChars` must create an owned version of the given string, and
+ * must have one of the following types:
+ *
+ * mozilla::UniquePtr<char[], JS::FreePolicy> intoOwnedChars();
+ * mozilla::UniquePtr<char[], JS::FreePolicy>&& intoOwnedChars();
+ *
+ * It can be used by callers to elide a copy of the string when it is safe
+ * to give up ownership of the lookup string to the cache. It must return a
+ * `nullptr` on failure.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ template <typename IntoOwnedChars>
+ MOZ_MUST_USE mozilla::Maybe<SharedImmutableString>
+ getOrCreate(const char* chars, size_t length, IntoOwnedChars intoOwnedChars);
+
+ /**
+ * Take ownership of the given `chars` and return the canonical, shared and
+ * de-duplicated version.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ MOZ_MUST_USE mozilla::Maybe<SharedImmutableString>
+ getOrCreate(OwnedChars&& chars, size_t length);
+
+ /**
+ * Do not take ownership of the given `chars`. Return the canonical, shared
+ * and de-duplicated version. If there is no extant shared version of
+ * `chars`, make a copy and insert it into the cache.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ MOZ_MUST_USE mozilla::Maybe<SharedImmutableString>
+ getOrCreate(const char* chars, size_t length);
+
+ /**
+ * Get the canonical, shared, and de-duplicated version of the given `const
+ * char16_t*` string. If such a string does not exist, call `intoOwnedChars`
+ * and add the string it returns to the cache.
+ *
+ * `intoOwnedTwoByteChars` must create an owned version of the given string,
+ * and must have one of the following types:
+ *
+ * mozilla::UniquePtr<char16_t[], JS::FreePolicy> intoOwnedTwoByteChars();
+ * mozilla::UniquePtr<char16_t[], JS::FreePolicy>&& intoOwnedTwoByteChars();
+ *
+ * It can be used by callers to elide a copy of the string when it is safe
+ * to give up ownership of the lookup string to the cache. It must return a
+ * `nullptr` on failure.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ template <typename IntoOwnedTwoByteChars>
+ MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString>
+ getOrCreate(const char16_t* chars, size_t length, IntoOwnedTwoByteChars intoOwnedTwoByteChars);
+
+ /**
+ * Take ownership of the given `chars` and return the canonical, shared and
+ * de-duplicated version.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString>
+ getOrCreate(OwnedTwoByteChars&& chars, size_t length);
+
+ /**
+ * Do not take ownership of the given `chars`. Return the canonical, shared
+ * and de-duplicated version. If there is no extant shared version of
+ * `chars`, then make a copy and insert it into the cache.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString>
+ getOrCreate(const char16_t* chars, size_t length);
+
+ size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
+ MOZ_ASSERT(inner_);
+ size_t n = mallocSizeOf(inner_);
+
+ auto locked = inner_->lock();
+ if (!locked->set.initialized())
+ return n;
+
+ // Size of the table.
+ n += locked->set.sizeOfExcludingThis(mallocSizeOf);
+
+ // Sizes of the strings and their boxes.
+ for (auto r = locked->set.all(); !r.empty(); r.popFront()) {
+ n += mallocSizeOf(r.front().get());
+ if (const char* chars = r.front()->chars())
+ n += mallocSizeOf(chars);
+ }
+
+ return n;
+ }
+
+ /**
+ * Construct a new cache of shared, immutable strings. Returns
+ * `mozilla::Nothing` on out of memory failure.
+ */
+ static mozilla::Maybe<SharedImmutableStringsCache> Create() {
+ auto inner = js_new<ExclusiveData<Inner>>(mutexid::SharedImmutableStringsCache);
+ if (!inner)
+ return mozilla::Nothing();
+
+ auto locked = inner->lock();
+ return mozilla::Some(SharedImmutableStringsCache(locked));
+ }
+
+ SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs)
+ : inner_(rhs.inner_)
+ {
+ MOZ_ASSERT(inner_);
+ rhs.inner_ = nullptr;
+ }
+
+ SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) {
+ MOZ_ASSERT(this != &rhs, "self move not allowed");
+ new (this) SharedImmutableStringsCache(mozilla::Move(rhs));
+ return *this;
+ }
+
+ SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) = delete;
+
+ SharedImmutableStringsCache clone() {
+ MOZ_ASSERT(inner_);
+ auto locked = inner_->lock();
+ return SharedImmutableStringsCache(locked);
+ }
+
+ ~SharedImmutableStringsCache() {
+ if (!inner_)
+ return;
+
+ bool shouldDestroy = false;
+ {
+ // ~ExclusiveData takes the lock, so be sure to drop the lock before
+ // attempting to destroy the inner.
+ auto locked = inner_->lock();
+ MOZ_ASSERT(locked->refcount > 0);
+ locked->refcount--;
+ if (locked->refcount == 0)
+ shouldDestroy = true;
+ }
+ if (shouldDestroy)
+ js_delete(inner_);
+ }
+
+ /**
+ * Purge the cache of all refcount == 0 entries.
+ */
+ void purge() {
+ auto locked = inner_->lock();
+ MOZ_ASSERT(locked->refcount > 0);
+
+ if (!locked->set.initialized())
+ return;
+
+ for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) {
+ if (e.front()->refcount == 0) {
+ // The chars should be eagerly freed when refcount reaches zero.
+ MOZ_ASSERT(!e.front()->chars());
+ e.removeFront();
+ } else {
+ // The chars should exist as long as the refcount is non-zero.
+ MOZ_ASSERT(e.front()->chars());
+ }
+ }
+ }
+
+ private:
+ class StringBox
+ {
+ friend class SharedImmutableString;
+
+ OwnedChars chars_;
+ size_t length_;
+
+ public:
+ mutable size_t refcount;
+
+ using Ptr = mozilla::UniquePtr<StringBox, JS::DeletePolicy<StringBox>>;
+
+ StringBox(OwnedChars&& chars, size_t length)
+ : chars_(mozilla::Move(chars))
+ , length_(length)
+ , refcount(0)
+ {
+ MOZ_ASSERT(chars_);
+ }
+
+ static Ptr Create(OwnedChars&& chars, size_t length) {
+ return Ptr(js_new<StringBox>(mozilla::Move(chars), length));
+ }
+
+ StringBox(const StringBox&) = delete;
+ StringBox& operator=(const StringBox&) = delete;
+
+ ~StringBox() {
+ MOZ_RELEASE_ASSERT(refcount == 0,
+ "There are `SharedImmutable[TwoByte]String`s outliving their "
+ "associated cache! This always leads to use-after-free in the "
+ "`~SharedImmutableString` destructor!");
+ }
+
+ const char* chars() const { return chars_.get(); }
+ size_t length() const { return length_; }
+ };
+
+ struct Hasher
+ {
+ /**
+ * A structure used when querying for a `const char*` string in the cache.
+ */
+ class Lookup
+ {
+ friend struct Hasher;
+
+ HashNumber hash_;
+ const char* chars_;
+ size_t length_;
+
+ public:
+ Lookup(HashNumber hash, const char* chars, size_t length)
+ : hash_(hash)
+ , chars_(chars)
+ , length_(length)
+ {
+ MOZ_ASSERT(chars_);
+ MOZ_ASSERT(hash == Hasher::hashLongString(chars, length));
+ }
+
+ Lookup(HashNumber hash, const char16_t* chars, size_t length)
+ : Lookup(hash, reinterpret_cast<const char*>(chars), length * sizeof(char16_t))
+ { }
+ };
+
+ static const size_t SHORT_STRING_MAX_LENGTH = 8192;
+ static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2;
+
+ // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the
+ // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the
+ // string. This increases the risk of collisions, but in practice it
+ // should be rare, and it yields a large speedup for hashing long
+ // strings.
+ static HashNumber hashLongString(const char* chars, size_t length) {
+ MOZ_ASSERT(chars);
+ return length <= SHORT_STRING_MAX_LENGTH
+ ? mozilla::HashString(chars, length)
+ : mozilla::AddToHash(mozilla::HashString(chars, HASH_CHUNK_LENGTH),
+ mozilla::HashString(chars + length - HASH_CHUNK_LENGTH,
+ HASH_CHUNK_LENGTH));
+ }
+
+ static HashNumber hash(const Lookup& lookup) {
+ return lookup.hash_;
+ }
+
+ static bool match(const StringBox::Ptr& key, const Lookup& lookup) {
+ MOZ_ASSERT(lookup.chars_);
+
+ if (!key->chars() || key->length() != lookup.length_)
+ return false;
+
+ if (key->chars() == lookup.chars_)
+ return true;
+
+ return memcmp(key->chars(), lookup.chars_, key->length()) == 0;
+ }
+ };
+
+ // The `Inner` struct contains the actual cached contents, and is reference
+ // counted and shared between all `SharedImmutableStringsCache` and
+ // `SharedImmutable[TwoByte]String` holders.
+ struct Inner
+ {
+ using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>;
+
+ size_t refcount;
+ Set set;
+
+ Inner()
+ : refcount(0)
+ , set()
+ { }
+
+ Inner(const Inner&) = delete;
+ Inner& operator=(const Inner&) = delete;
+
+ ~Inner()
+ {
+ MOZ_ASSERT(refcount == 0);
+ }
+ };
+
+ const ExclusiveData<Inner>* inner_;
+
+ explicit SharedImmutableStringsCache(ExclusiveData<Inner>::Guard& locked)
+ : inner_(locked.parent())
+ {
+ locked->refcount++;
+ }
+};
+
+/**
+ * The `SharedImmutableString` class holds a reference to a `const char*` string
+ * from the `SharedImmutableStringsCache` and releases the reference upon
+ * destruction.
+ */
+class SharedImmutableString
+{
+ friend class SharedImmutableStringsCache;
+ friend class SharedImmutableTwoByteString;
+
+ mutable SharedImmutableStringsCache cache_;
+ mutable SharedImmutableStringsCache::StringBox* box_;
+
+ SharedImmutableString(ExclusiveData<SharedImmutableStringsCache::Inner>::Guard& locked,
+ SharedImmutableStringsCache::StringBox* box);
+
+ public:
+ /**
+ * `SharedImmutableString`s are move-able. It is an error to use a
+ * `SharedImmutableString` after it has been moved.
+ */
+ SharedImmutableString(SharedImmutableString&& rhs);
+ SharedImmutableString& operator=(SharedImmutableString&& rhs);
+
+ /**
+ * Create another shared reference to the underlying string.
+ */
+ SharedImmutableString clone() const;
+
+ // If you want a copy, take one explicitly with `clone`!
+ SharedImmutableString& operator=(const SharedImmutableString&) = delete;
+
+ ~SharedImmutableString();
+
+ /**
+ * Get a raw pointer to the underlying string. It is only safe to use the
+ * resulting pointer while this `SharedImmutableString` exists.
+ */
+ const char* chars() const {
+ MOZ_ASSERT(box_);
+ MOZ_ASSERT(box_->refcount > 0);
+ MOZ_ASSERT(box_->chars());
+ return box_->chars();
+ }
+
+ /**
+ * Get the length of the underlying string.
+ */
+ size_t length() const {
+ MOZ_ASSERT(box_);
+ MOZ_ASSERT(box_->refcount > 0);
+ MOZ_ASSERT(box_->chars());
+ return box_->length();
+ }
+};
+
+/**
+ * The `SharedImmutableTwoByteString` class holds a reference to a `const
+ * char16_t*` string from the `SharedImmutableStringsCache` and releases the
+ * reference upon destruction.
+ */
+class SharedImmutableTwoByteString
+{
+ friend class SharedImmutableStringsCache;
+
+ // If a `char*` string and `char16_t*` string happen to have the same bytes,
+ // the bytes will be shared but handed out as different types.
+ SharedImmutableString string_;
+
+ explicit SharedImmutableTwoByteString(SharedImmutableString&& string);
+ SharedImmutableTwoByteString(ExclusiveData<SharedImmutableStringsCache::Inner>::Guard& locked,
+ SharedImmutableStringsCache::StringBox* box);
+
+ public:
+ /**
+ * `SharedImmutableTwoByteString`s are move-able. It is an error to use a
+ * `SharedImmutableTwoByteString` after it has been moved.
+ */
+ SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs);
+ SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs);
+
+ /**
+ * Create another shared reference to the underlying string.
+ */
+ SharedImmutableTwoByteString clone() const;
+
+ // If you want a copy, take one explicitly with `clone`!
+ SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) = delete;
+
+ /**
+ * Get a raw pointer to the underlying string. It is only safe to use the
+ * resulting pointer while this `SharedImmutableTwoByteString` exists.
+ */
+ const char16_t* chars() const { return reinterpret_cast<const char16_t*>(string_.chars()); }
+
+ /**
+ * Get the length of the underlying string.
+ */
+ size_t length() const { return string_.length() / sizeof(char16_t); }
+};
+
+} // namespace js
+
+#endif // vm_SharedImmutableStringsCache_h