From 2c72b8859a959629462a58b1385408e25bb89bad Mon Sep 17 00:00:00 2001 From: Moonchild Date: Thu, 11 Feb 2021 13:20:54 +0000 Subject: Issue #1738 - Part 1: Improve performance of JSON stringify - Use some pointer voodoo and instead of stringbuffer append() - Use a lookup table instead of char comparisons for chr < 256 - Stop using a Hashtable/MovableCellHasher for JSON CycleDetector --- js/src/json.cpp | 183 ++++++++++++++++++---------------- js/src/tests/ecma_5/JSON/stringify.js | 3 + js/src/vm/StringBuffer.h | 15 ++- 3 files changed, 111 insertions(+), 90 deletions(-) (limited to 'js') diff --git a/js/src/json.cpp b/js/src/json.cpp index 5f1238f9f..73e37e237 100644 --- a/js/src/json.cpp +++ b/js/src/json.cpp @@ -39,78 +39,81 @@ const Class js::JSONClass = { JSCLASS_HAS_CACHED_PROTO(JSProto_JSON) }; -static inline bool -IsQuoteSpecialCharacter(char16_t c) -{ - static_assert('\b' < ' ', "'\\b' must be treated as special below"); - static_assert('\f' < ' ', "'\\f' must be treated as special below"); - static_assert('\n' < ' ', "'\\n' must be treated as special below"); - static_assert('\r' < ' ', "'\\r' must be treated as special below"); - static_assert('\t' < ' ', "'\\t' must be treated as special below"); - - return c == '"' || c == '\\' || c < ' '; -} - -/* ES5 15.12.3 Quote. */ -template -static bool -Quote(StringBuffer& sb, JSLinearString* str) +/* ES5 15.12.3 Quote. + * Requires that the destination has enough space allocated for src after escaping + * (that is, `2 + 6 * (srcEnd - srcBegin)` characters). + */ +template +static MOZ_ALWAYS_INLINE RangedPtr +InfallibleQuote(RangedPtr srcBegin, RangedPtr srcEnd, RangedPtr dstPtr) { - size_t len = str->length(); + // Maps characters < 256 to the value that must follow the '\\' in the quoted string. + // Entries with 'u' are handled as \\u00xy, and entries with 0 are not escaped in any way. + // Characters >= 256 are all assumed to be unescaped. + static const Latin1Char escapeLookup[256] = { + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', + 'n', 'u', 'f', 'r', 'u', 'u', 'u', 'u', 'u', 'u', + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', + 'u', 'u', 0, 0, '\"', 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, '\\', // rest are all zeros + }; /* Step 1. */ - if (!sb.append('"')) - return false; + *dstPtr++ = '"'; /* Step 2. */ - JS::AutoCheckCannotGC nogc; - const RangedPtr buf(str->chars(nogc), len); - for (size_t i = 0; i < len; ++i) { - /* Batch-append maximal character sequences containing no escapes. */ - size_t mark = i; - do { - if (IsQuoteSpecialCharacter(buf[i])) - break; - } while (++i < len); - if (i > mark) { - if (!sb.appendSubstring(str, mark, i - mark)) - return false; - if (i == len) - break; + while (srcBegin != srcEnd) { + SrcCharT c = *srcBegin++; + size_t escapeIndex = c % sizeof(escapeLookup); + Latin1Char escaped = escapeLookup[escapeIndex]; + if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) { + *dstPtr++ = c; + continue; } - - char16_t c = buf[i]; - if (c == '"' || c == '\\') { - if (!sb.append('\\') || !sb.append(c)) - return false; - } else if (c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') { - char16_t abbrev = (c == '\b') - ? 'b' - : (c == '\f') - ? 'f' - : (c == '\n') - ? 'n' - : (c == '\r') - ? 'r' - : 't'; - if (!sb.append('\\') || !sb.append(abbrev)) - return false; - } else { + *dstPtr++ = '\\'; + *dstPtr++ = escaped; + if (escaped == 'u') { MOZ_ASSERT(c < ' '); - if (!sb.append("\\u00")) - return false; MOZ_ASSERT((c >> 4) < 10); uint8_t x = c >> 4, y = c % 16; - if (!sb.append(Latin1Char('0' + x)) || - !sb.append(Latin1Char(y < 10 ? '0' + y : 'a' + (y - 10)))) - { - return false; - } + *dstPtr++ = '0'; + *dstPtr++ = '0'; + *dstPtr++ = '0' + x; + *dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10); } } /* Steps 3-4. */ - return sb.append('"'); + *dstPtr++ = '"'; + return dstPtr; +} + +template +static bool +Quote(CharVectorT& sb, JSLinearString* str) +{ + // We resize the backing buffer to the maximum size we could possibly need, + // write the escaped string into it, and shrink it back to the size we ended + // up needing. + size_t len = str->length(); + size_t sbInitialLen = sb.length(); + if (!sb.growByUninitialized(len * 6 + 2)) + return false; + + typedef typename CharVectorT::ElementType DstCharT; + + JS::AutoCheckCannotGC nogc; + RangedPtr srcBegin{str->chars(nogc), len}; + RangedPtr dstBegin{sb.begin(), sb.begin(), sb.end()}; + RangedPtr dstEnd = InfallibleQuote(srcBegin, srcBegin + len, dstBegin + sbInitialLen); + size_t newSize = dstEnd - dstBegin; + sb.shrinkTo(newSize); + return true; } static bool @@ -120,14 +123,23 @@ Quote(JSContext* cx, StringBuffer& sb, JSString* str) if (!linear) return false; - return linear->hasLatin1Chars() - ? Quote(sb, linear) - : Quote(sb, linear); + // Check if either has non-latin1 before calling ensure, so that the buffer's + // hasEnsured flag is set if the converstion to twoByte was automatic. + if (!sb.isUnderlyingBufferLatin1() || linear->hasTwoByteChars()) { + if (!sb.ensureTwoByteChars()) + return false; + } + if (linear->hasTwoByteChars()) + return Quote(sb.rawTwoByteBuffer(), linear); + + return sb.isUnderlyingBufferLatin1() + ? Quote(sb.latin1Chars(), linear) + : Quote(sb.rawTwoByteBuffer(), linear); } namespace { -using ObjectSet = GCHashSet, SystemAllocPolicy>; +using ObjectVector = GCVector; class StringifyContext { @@ -138,7 +150,7 @@ class StringifyContext : sb(sb), gap(gap), replacer(cx, replacer), - stack(cx), + stack(cx, ObjectVector(cx)), propertyList(propertyList), depth(0), maybeSafely(maybeSafely) @@ -147,14 +159,10 @@ class StringifyContext MOZ_ASSERT_IF(maybeSafely, gap.empty()); } - bool init() { - return stack.init(8); - } - StringBuffer& sb; const StringBuffer& gap; RootedObject replacer; - Rooted stack; + Rooted stack; const AutoIdVector& propertyList; uint32_t depth; bool maybeSafely; @@ -302,29 +310,34 @@ class CycleDetector { public: CycleDetector(StringifyContext* scx, HandleObject obj) - : stack(&scx->stack), obj_(obj) { - } - - bool foundCycle(JSContext* cx) { - auto addPtr = stack.lookupForAdd(obj_); - if (addPtr) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_JSON_CYCLIC_VALUE); - return false; - } - if (!stack.add(addPtr, obj_)) { - ReportOutOfMemory(cx); - return false; + : stack_(&scx->stack) + , obj_(obj) + , appended_(false) + {} + + MOZ_ALWAYS_INLINE bool foundCycle(JSContext* cx) { + JSObject* obj = obj_; + for (JSObject* obj2 : stack_) { + if (MOZ_UNLIKELY(obj == obj2)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_JSON_CYCLIC_VALUE); + return false; + } } - return true; + appended_ = stack_.append(obj); + return appended_; } ~CycleDetector() { - stack.remove(obj_); + if (MOZ_LIKELY(appended_)) { + MOZ_ASSERT(stack_.back() == obj_); + stack_.popBack(); + } } private: - MutableHandle stack; + MutableHandle stack_; HandleObject obj_; + bool appended_; }; /* ES5 15.12.3 JO. */ @@ -747,8 +760,6 @@ js::Stringify(JSContext* cx, MutableHandleValue vp, JSObject* replacer_, const V /* Step 12. */ StringifyContext scx(cx, sb, gap, replacer, propertyList, stringifyBehavior == StringifyBehavior::RestrictedSafe); - if (!scx.init()) - return false; if (!PreprocessValue(cx, wrapper, HandleId(emptyId), vp, &scx)) return false; if (IsFilteredValue(vp)) diff --git a/js/src/tests/ecma_5/JSON/stringify.js b/js/src/tests/ecma_5/JSON/stringify.js index 1a7e9b150..3b2147420 100644 --- a/js/src/tests/ecma_5/JSON/stringify.js +++ b/js/src/tests/ecma_5/JSON/stringify.js @@ -34,6 +34,9 @@ assertStringify({'mmm\\mmm':"hmm"}, '{"mmm\\\\mmm":"hmm"}'); assertStringify({'mmm\\mmm\\mmm':"hmm"}, '{"mmm\\\\mmm\\\\mmm":"hmm"}'); assertStringify({"mm\u000bmm":"hmm"}, '{"mm\\u000bmm":"hmm"}'); assertStringify({"mm\u0000mm":"hmm"}, '{"mm\\u0000mm":"hmm"}'); +assertStringify({"\u0000\u000b":""}, '{"\\u0000\\u000b":""}'); +assertStringify({"\u000b\ufdfd":"hmm"}, '{"\\u000b\ufdfd":"hmm"}'); +assertStringify({"\u000b\ufdfd":"h\xfc\ufdfdm"}, '{"\\u000b\ufdfd":"h\xfc\ufdfdm"}'); var x = {"free":"variable"}; assertStringify(x, '{"free":"variable"}'); diff --git a/js/src/vm/StringBuffer.h b/js/src/vm/StringBuffer.h index f2437384a..502a3bc6f 100644 --- a/js/src/vm/StringBuffer.h +++ b/js/src/vm/StringBuffer.h @@ -61,12 +61,8 @@ class StringBuffer MOZ_ALWAYS_INLINE bool isLatin1() const { return cb.constructed(); } MOZ_ALWAYS_INLINE bool isTwoByte() const { return !isLatin1(); } - MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { return cb.ref(); } MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() { return cb.ref(); } - MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const { - return cb.ref(); - } MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const { return cb.ref(); } @@ -84,6 +80,12 @@ class StringBuffer cb.construct(cx); } + MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { return cb.ref(); } + + MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const { + return cb.ref(); + } + void clear() { if (isLatin1()) latin1Chars().clear(); @@ -134,6 +136,11 @@ class StringBuffer return append(Latin1Char(c)); } + TwoByteCharBuffer& rawTwoByteBuffer() { + MOZ_ASSERT(hasEnsuredTwoByteChars_); + return twoByteChars(); + } + inline MOZ_MUST_USE bool append(const char16_t* begin, const char16_t* end); MOZ_MUST_USE bool append(const char16_t* chars, size_t len) { -- cgit v1.2.3