/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set ts=8 sts=4 et sw=4 tw=99: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "jsstr.h" #include "mozilla/Attributes.h" #include "mozilla/Casting.h" #include "mozilla/CheckedInt.h" #include "mozilla/FloatingPoint.h" #include "mozilla/PodOperations.h" #include "mozilla/Range.h" #include "mozilla/TypeTraits.h" #include "mozilla/Unused.h" #include <ctype.h> #include <limits> #include <string.h> #include "jsapi.h" #include "jsarray.h" #include "jsatom.h" #include "jsbool.h" #include "jscntxt.h" #include "jsgc.h" #include "jsnum.h" #include "jsobj.h" #include "jsopcode.h" #include "jstypes.h" #include "jsutil.h" #include "builtin/Intl.h" #include "builtin/RegExp.h" #include "jit/InlinableNatives.h" #include "js/Conversions.h" #include "js/UniquePtr.h" #include "unicode/unorm.h" #include "vm/GlobalObject.h" #include "vm/Interpreter.h" #include "vm/Opcodes.h" #include "vm/Printer.h" #include "vm/RegExpObject.h" #include "vm/RegExpStatics.h" #include "vm/StringBuffer.h" #include "vm/Unicode.h" #include "vm/Interpreter-inl.h" #include "vm/String-inl.h" #include "vm/StringObject-inl.h" #include "vm/TypeInference-inl.h" using namespace js; using namespace js::gc; using JS::Symbol; using JS::SymbolCode; using JS::ToInt32; using JS::ToUint32; using mozilla::AssertedCast; using mozilla::CheckedInt; using mozilla::IsNaN; using mozilla::IsNegativeZero; using mozilla::IsSame; using mozilla::Move; using mozilla::PodCopy; using mozilla::PodEqual; using mozilla::RangedPtr; using JS::AutoCheckCannotGC; static JSLinearString* ArgToRootedString(JSContext* cx, const CallArgs& args, unsigned argno) { if (argno >= args.length()) return cx->names().undefined; JSString* str = ToString<CanGC>(cx, args[argno]); if (!str) return nullptr; args[argno].setString(str); return str->ensureLinear(cx); } /* * Forward declarations for URI encode/decode and helper routines */ static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp); static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp); static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp); static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp); /* * Global string methods */ /* ES5 B.2.1 */ template <typename CharT> static Latin1Char* Escape(JSContext* cx, const CharT* chars, uint32_t length, uint32_t* newLengthOut) { static const uint8_t shouldPassThrough[128] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */ 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */ }; /* Take a first pass and see how big the result string will need to be. */ uint32_t newLength = length; for (size_t i = 0; i < length; i++) { char16_t ch = chars[i]; if (ch < 128 && shouldPassThrough[ch]) continue; /* The character will be encoded as %XX or %uXXXX. */ newLength += (ch < 256) ? 2 : 5; /* * newlength is incremented by at most 5 on each iteration, so worst * case newlength == length * 6. This can't overflow. */ static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6, "newlength must not overflow"); } Latin1Char* newChars = cx->pod_malloc<Latin1Char>(newLength + 1); if (!newChars) return nullptr; static const char digits[] = "0123456789ABCDEF"; size_t i, ni; for (i = 0, ni = 0; i < length; i++) { char16_t ch = chars[i]; if (ch < 128 && shouldPassThrough[ch]) { newChars[ni++] = ch; } else if (ch < 256) { newChars[ni++] = '%'; newChars[ni++] = digits[ch >> 4]; newChars[ni++] = digits[ch & 0xF]; } else { newChars[ni++] = '%'; newChars[ni++] = 'u'; newChars[ni++] = digits[ch >> 12]; newChars[ni++] = digits[(ch & 0xF00) >> 8]; newChars[ni++] = digits[(ch & 0xF0) >> 4]; newChars[ni++] = digits[ch & 0xF]; } } MOZ_ASSERT(ni == newLength); newChars[newLength] = 0; *newLengthOut = newLength; return newChars; } static bool str_escape(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); JSLinearString* str = ArgToRootedString(cx, args, 0); if (!str) return false; ScopedJSFreePtr<Latin1Char> newChars; uint32_t newLength = 0; // initialize to silence GCC warning if (str->hasLatin1Chars()) { AutoCheckCannotGC nogc; newChars = Escape(cx, str->latin1Chars(nogc), str->length(), &newLength); } else { AutoCheckCannotGC nogc; newChars = Escape(cx, str->twoByteChars(nogc), str->length(), &newLength); } if (!newChars) return false; JSString* res = NewString<CanGC>(cx, newChars.get(), newLength); if (!res) return false; newChars.forget(); args.rval().setString(res); return true; } template <typename CharT> static inline bool Unhex4(const RangedPtr<const CharT> chars, char16_t* result) { char16_t a = chars[0], b = chars[1], c = chars[2], d = chars[3]; if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d))) return false; *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d); return true; } template <typename CharT> static inline bool Unhex2(const RangedPtr<const CharT> chars, char16_t* result) { char16_t a = chars[0], b = chars[1]; if (!(JS7_ISHEX(a) && JS7_ISHEX(b))) return false; *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b); return true; } template <typename CharT> static bool Unescape(StringBuffer& sb, const mozilla::Range<const CharT> chars) { /* * NB: use signed integers for length/index to allow simple length * comparisons without unsigned-underflow hazards. */ static_assert(JSString::MAX_LENGTH <= INT_MAX, "String length must fit in a signed integer"); int length = AssertedCast<int>(chars.length()); /* * Note that the spec algorithm has been optimized to avoid building * a string in the case where no escapes are present. */ /* Step 4. */ int k = 0; bool building = false; /* Step 5. */ while (k < length) { /* Step 6. */ char16_t c = chars[k]; /* Step 7. */ if (c != '%') goto step_18; /* Step 8. */ if (k > length - 6) goto step_14; /* Step 9. */ if (chars[k + 1] != 'u') goto step_14; #define ENSURE_BUILDING \ do { \ if (!building) { \ building = true; \ if (!sb.reserve(length)) \ return false; \ sb.infallibleAppend(chars.begin().get(), k); \ } \ } while(false); /* Step 10-13. */ if (Unhex4(chars.begin() + k + 2, &c)) { ENSURE_BUILDING; k += 5; goto step_18; } step_14: /* Step 14. */ if (k > length - 3) goto step_18; /* Step 15-17. */ if (Unhex2(chars.begin() + k + 1, &c)) { ENSURE_BUILDING; k += 2; } step_18: if (building && !sb.append(c)) return false; /* Step 19. */ k += 1; } return true; #undef ENSURE_BUILDING } /* ES5 B.2.2 */ static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); /* Step 1. */ RootedLinearString str(cx, ArgToRootedString(cx, args, 0)); if (!str) return false; /* Step 3. */ StringBuffer sb(cx); if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) return false; if (str->hasLatin1Chars()) { AutoCheckCannotGC nogc; if (!Unescape(sb, str->latin1Range(nogc))) return false; } else { AutoCheckCannotGC nogc; if (!Unescape(sb, str->twoByteRange(nogc))) return false; } JSLinearString* result; if (!sb.empty()) { result = sb.finishString(); if (!result) return false; } else { result = str; } args.rval().setString(result); return true; } #if JS_HAS_UNEVAL static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); JSString* str = ValueToSource(cx, args.get(0)); if (!str) return false; args.rval().setString(str); return true; } #endif static const JSFunctionSpec string_functions[] = { JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING), JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING), #if JS_HAS_UNEVAL JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING), #endif JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING), JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING), JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1, JSPROP_RESOLVING), JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1, JSPROP_RESOLVING), JS_FS_END }; static const unsigned STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT; static bool str_enumerate(JSContext* cx, HandleObject obj) { RootedString str(cx, obj->as<StringObject>().unbox()); RootedValue value(cx); for (size_t i = 0, length = str->length(); i < length; i++) { JSString* str1 = NewDependentString(cx, str, i, 1); if (!str1) return false; value.setString(str1); if (!DefineElement(cx, obj, i, value, nullptr, nullptr, STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) { return false; } } return true; } static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) { // str_resolve ignores non-integer ids. return JSID_IS_INT(id); } static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp) { if (!JSID_IS_INT(id)) return true; RootedString str(cx, obj->as<StringObject>().unbox()); int32_t slot = JSID_TO_INT(id); if ((size_t)slot < str->length()) { JSString* str1 = cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot)); if (!str1) return false; RootedValue value(cx, StringValue(str1)); if (!DefineElement(cx, obj, uint32_t(slot), value, nullptr, nullptr, STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) { return false; } *resolvedp = true; } return true; } static const ClassOps StringObjectClassOps = { nullptr, /* addProperty */ nullptr, /* delProperty */ nullptr, /* getProperty */ nullptr, /* setProperty */ str_enumerate, str_resolve, str_mayResolve }; const Class StringObject::class_ = { js_String_str, JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) | JSCLASS_HAS_CACHED_PROTO(JSProto_String), &StringObjectClassOps }; /* * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)| * from nearly all String.prototype.* functions. */ static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(JSContext* cx, HandleValue thisv) { JS_CHECK_RECURSION(cx, return nullptr); if (thisv.isString()) return thisv.toString(); if (thisv.isObject()) { RootedObject obj(cx, &thisv.toObject()); if (obj->is<StringObject>()) { StringObject* nobj = &obj->as<StringObject>(); Rooted<jsid> id(cx, NameToId(cx->names().toString)); if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, str_toString)) return nobj->unbox(); } } else if (thisv.isNullOrUndefined()) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_CANT_CONVERT_TO, thisv.isNull() ? "null" : "undefined", "object"); return nullptr; } return ToStringSlow<CanGC>(cx, thisv); } MOZ_ALWAYS_INLINE bool IsString(HandleValue v) { return v.isString() || (v.isObject() && v.toObject().is<StringObject>()); } #if JS_HAS_TOSOURCE MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) { MOZ_ASSERT(IsString(args.thisv())); Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv())); if (!str) return false; str = QuoteString(cx, str, '"'); if (!str) return false; StringBuffer sb(cx); if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))")) return false; str = sb.finishString(); if (!str) return false; args.rval().setString(str); return true; } static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args); } #endif /* JS_HAS_TOSOURCE */ MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) { MOZ_ASSERT(IsString(args.thisv())); args.rval().setString(args.thisv().isString() ? args.thisv().toString() : args.thisv().toObject().as<StringObject>().unbox()); return true; } bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return CallNonGenericMethod<IsString, str_toString_impl>(cx, args); } /* * Java-like string native methods. */ JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt) { MOZ_ASSERT(0 <= beginInt); MOZ_ASSERT(0 <= lengthInt); MOZ_ASSERT(uint32_t(beginInt) <= str->length()); MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt); uint32_t begin = beginInt; uint32_t len = lengthInt; /* * Optimization for one level deep ropes. * This is common for the following pattern: * * while() { * text = text.substr(0, x) + "bla" + text.substr(x) * test.charCodeAt(x + 1) * } */ if (str->isRope()) { JSRope* rope = &str->asRope(); /* Substring is totally in leftChild of rope. */ if (begin + len <= rope->leftChild()->length()) return NewDependentString(cx, rope->leftChild(), begin, len); /* Substring is totally in rightChild of rope. */ if (begin >= rope->leftChild()->length()) { begin -= rope->leftChild()->length(); return NewDependentString(cx, rope->rightChild(), begin, len); } /* * Requested substring is partly in the left and partly in right child. * Create a rope of substrings for both childs. */ MOZ_ASSERT(begin < rope->leftChild()->length() && begin + len > rope->leftChild()->length()); size_t lhsLength = rope->leftChild()->length() - begin; size_t rhsLength = begin + len - rope->leftChild()->length(); Rooted<JSRope*> ropeRoot(cx, rope); RootedString lhs(cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength)); if (!lhs) return nullptr; RootedString rhs(cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength)); if (!rhs) return nullptr; return JSRope::new_<CanGC>(cx, lhs, rhs, len); } return NewDependentString(cx, str, begin, len); } template <typename CharT> static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) { // Unlike toUpperCase, toLowerCase has the nice invariant that if the input // is a Latin1 string, the output is also a Latin1 string. UniquePtr<CharT[], JS::FreePolicy> newChars; size_t length = str->length(); { AutoCheckCannotGC nogc; const CharT* chars = str->chars<CharT>(nogc); // Look for the first upper case character. size_t i = 0; for (; i < length; i++) { char16_t c = chars[i]; if (!IsSame<CharT, Latin1Char>::value) { if (unicode::IsLeadSurrogate(c) && i + 1 < length) { char16_t trail = chars[i + 1]; if (unicode::IsTrailSurrogate(trail)) { if (unicode::CanLowerCaseNonBMP(c, trail)) break; i++; continue; } } } if (unicode::CanLowerCase(c)) break; } // If all characters are lower case, return the input string. if (i == length) return str; newChars = cx->make_pod_array<CharT>(length + 1); if (!newChars) return nullptr; PodCopy(newChars.get(), chars, i); for (; i < length; i++) { char16_t c = chars[i]; if (!IsSame<CharT, Latin1Char>::value) { if (unicode::IsLeadSurrogate(c) && i + 1 < length) { char16_t trail = chars[i + 1]; if (unicode::IsTrailSurrogate(trail)) { trail = unicode::ToLowerCaseNonBMPTrail(c, trail); newChars[i] = c; newChars[i + 1] = trail; i++; continue; } } } c = unicode::ToLowerCase(c); MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR); newChars[i] = c; } newChars[length] = 0; } JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length); if (!res) return nullptr; mozilla::Unused << newChars.release(); return res; } static inline bool ToLowerCaseHelper(JSContext* cx, const CallArgs& args) { RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; JSLinearString* linear = str->ensureLinear(cx); if (!linear) return false; if (linear->hasLatin1Chars()) str = ToLowerCase<Latin1Char>(cx, linear); else str = ToLowerCase<char16_t>(cx, linear); if (!str) return false; args.rval().setString(str); return true; } bool js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) { return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp)); } bool js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); /* * Forcefully ignore the first (or any) argument and return toLowerCase(), * ECMA has reserved that argument, presumably for defining the locale. */ if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) { RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; RootedValue result(cx); if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) return false; args.rval().set(result); return true; } return ToLowerCaseHelper(cx, args); } template <typename DestChar, typename SrcChar> static void ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length) { MOZ_ASSERT(firstLowerCase < length); for (size_t i = 0; i < firstLowerCase; i++) destChars[i] = srcChars[i]; for (size_t i = firstLowerCase; i < length; i++) { char16_t c = srcChars[i]; if (!IsSame<DestChar, Latin1Char>::value) { if (unicode::IsLeadSurrogate(c) && i + 1 < length) { char16_t trail = srcChars[i + 1]; if (unicode::IsTrailSurrogate(trail)) { trail = unicode::ToUpperCaseNonBMPTrail(c, trail); destChars[i] = c; destChars[i + 1] = trail; i++; continue; } } } c = unicode::ToUpperCase(c); MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR); destChars[i] = c; } destChars[length] = '\0'; } template <typename CharT> static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) { typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr; typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr; mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars; size_t length = str->length(); { AutoCheckCannotGC nogc; const CharT* chars = str->chars<CharT>(nogc); // Look for the first lower case character. size_t i = 0; for (; i < length; i++) { char16_t c = chars[i]; if (!IsSame<CharT, Latin1Char>::value) { if (unicode::IsLeadSurrogate(c) && i + 1 < length) { char16_t trail = chars[i + 1]; if (unicode::IsTrailSurrogate(trail)) { if (unicode::CanUpperCaseNonBMP(c, trail)) break; i++; continue; } } } if (unicode::CanUpperCase(c)) break; } // If all characters are upper case, return the input string. if (i == length) return str; // If the string is Latin1, check if it contains the MICRO SIGN (0xb5) // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding // upper case characters are not in the Latin1 range. bool resultIsLatin1; if (IsSame<CharT, Latin1Char>::value) { resultIsLatin1 = true; for (size_t j = i; j < length; j++) { Latin1Char c = chars[j]; if (c == 0xb5 || c == 0xff) { MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR); resultIsLatin1 = false; break; } else { MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR); } } } else { resultIsLatin1 = false; } if (resultIsLatin1) { Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1); if (!buf) return nullptr; ToUpperCaseImpl(buf.get(), chars, i, length); newChars.construct<Latin1CharPtr>(Move(buf)); } else { TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1); if (!buf) return nullptr; ToUpperCaseImpl(buf.get(), chars, i, length); newChars.construct<TwoByteCharPtr>(Move(buf)); } } JSString* res; if (newChars.constructed<Latin1CharPtr>()) { res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length); if (!res) return nullptr; mozilla::Unused << newChars.ref<Latin1CharPtr>().release(); } else { res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length); if (!res) return nullptr; mozilla::Unused << newChars.ref<TwoByteCharPtr>().release(); } return res; } static bool ToUpperCaseHelper(JSContext* cx, const CallArgs& args) { RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; JSLinearString* linear = str->ensureLinear(cx); if (!linear) return false; if (linear->hasLatin1Chars()) str = ToUpperCase<Latin1Char>(cx, linear); else str = ToUpperCase<char16_t>(cx, linear); if (!str) return false; args.rval().setString(str); return true; } bool js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) { return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp)); } bool js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); /* * Forcefully ignore the first (or any) argument and return toUpperCase(), * ECMA has reserved that argument, presumably for defining the locale. */ if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) { RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; RootedValue result(cx); if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) return false; args.rval().set(result); return true; } return ToUpperCaseHelper(cx, args); } /* ES6 20140210 draft 21.1.3.12. */ bool js::str_normalize(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Steps 1-3. RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; // Step 4. UNormalizationMode form; if (!args.hasDefined(0)) { form = UNORM_NFC; } else { // Steps 5-6. RootedLinearString formStr(cx, ArgToRootedString(cx, args, 0)); if (!formStr) return false; // Step 7. if (EqualStrings(formStr, cx->names().NFC)) { form = UNORM_NFC; } else if (EqualStrings(formStr, cx->names().NFD)) { form = UNORM_NFD; } else if (EqualStrings(formStr, cx->names().NFKC)) { form = UNORM_NFKC; } else if (EqualStrings(formStr, cx->names().NFKD)) { form = UNORM_NFKD; } else { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_NORMALIZE_FORM); return false; } } // Step 8. AutoStableStringChars stableChars(cx); if (!str->ensureFlat(cx) || !stableChars.initTwoByte(cx, str)) return false; static const size_t INLINE_CAPACITY = 32; const UChar* srcChars = Char16ToUChar(stableChars.twoByteRange().begin().get()); int32_t srcLen = AssertedCast<int32_t>(str->length()); Vector<char16_t, INLINE_CAPACITY> chars(cx); if (!chars.resize(INLINE_CAPACITY)) return false; UErrorCode status = U_ZERO_ERROR; int32_t size = unorm_normalize(srcChars, srcLen, form, 0, Char16ToUChar(chars.begin()), INLINE_CAPACITY, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { if (!chars.resize(size)) return false; status = U_ZERO_ERROR; #ifdef DEBUG int32_t finalSize = #endif unorm_normalize(srcChars, srcLen, form, 0, Char16ToUChar(chars.begin()), size, &status); MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently"); } if (U_FAILURE(status)) return false; JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size); if (!ns) return false; // Step 9. args.rval().setString(ns); return true; } bool js::str_charAt(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); RootedString str(cx); size_t i; if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) { str = args.thisv().toString(); i = size_t(args[0].toInt32()); if (i >= str->length()) goto out_of_range; } else { str = ToStringForStringFunction(cx, args.thisv()); if (!str) return false; double d = 0.0; if (args.length() > 0 && !ToInteger(cx, args[0], &d)) return false; if (d < 0 || str->length() <= d) goto out_of_range; i = size_t(d); } str = cx->staticStrings().getUnitStringForElement(cx, str, i); if (!str) return false; args.rval().setString(str); return true; out_of_range: args.rval().setString(cx->runtime()->emptyString); return true; } bool js::str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res) { RootedString str(cx); size_t i; if (index.isInt32()) { i = index.toInt32(); if (i >= string->length()) goto out_of_range; } else { double d = 0.0; if (!ToInteger(cx, index, &d)) return false; // check whether d is negative as size_t is unsigned if (d < 0 || string->length() <= d ) goto out_of_range; i = size_t(d); } char16_t c; if (!string->getChar(cx, i , &c)) return false; res.setInt32(c); return true; out_of_range: res.setNaN(); return true; } bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); RootedString str(cx); RootedValue index(cx); if (args.thisv().isString()) { str = args.thisv().toString(); } else { str = ToStringForStringFunction(cx, args.thisv()); if (!str) return false; } if (args.length() != 0) index = args[0]; else index.setInt32(0); return js::str_charCodeAt_impl(cx, str, index, args.rval()); } /* * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen. * The patlen argument must be positive and no greater than sBMHPatLenMax. * * Return the index of pat in text, or -1 if not found. */ static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */ static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */ static const int sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */ template <typename TextChar, typename PatChar> static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen) { MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax); uint8_t skip[sBMHCharSetSize]; for (uint32_t i = 0; i < sBMHCharSetSize; i++) skip[i] = uint8_t(patLen); uint32_t patLast = patLen - 1; for (uint32_t i = 0; i < patLast; i++) { char16_t c = pat[i]; if (c >= sBMHCharSetSize) return sBMHBadPattern; skip[c] = uint8_t(patLast - i); } for (uint32_t k = patLast; k < textLen; ) { for (uint32_t i = k, j = patLast; ; i--, j--) { if (text[i] != pat[j]) break; if (j == 0) return static_cast<int>(i); /* safe: max string size */ } char16_t c = text[k]; k += (c >= sBMHCharSetSize) ? patLen : skip[c]; } return -1; } template <typename TextChar, typename PatChar> struct MemCmp { typedef uint32_t Extent; static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*, uint32_t patLen) { return (patLen - 1) * sizeof(PatChar); } static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) { MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar)); return memcmp(p, t, extent) == 0; } }; template <typename TextChar, typename PatChar> struct ManualCmp { typedef const PatChar* Extent; static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat, uint32_t patLen) { return pat + patLen; } static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) { for (; p != extent; ++p, ++t) { if (*p != *t) return false; } return true; } }; template <typename TextChar, typename PatChar> static const TextChar* FirstCharMatcherUnrolled(const TextChar* text, uint32_t n, const PatChar pat) { const TextChar* textend = text + n; const TextChar* t = text; switch ((textend - t) & 7) { case 0: if (*t++ == pat) return t - 1; MOZ_FALLTHROUGH; case 7: if (*t++ == pat) return t - 1; MOZ_FALLTHROUGH; case 6: if (*t++ == pat) return t - 1; MOZ_FALLTHROUGH; case 5: if (*t++ == pat) return t - 1; MOZ_FALLTHROUGH; case 4: if (*t++ == pat) return t - 1; MOZ_FALLTHROUGH; case 3: if (*t++ == pat) return t - 1; MOZ_FALLTHROUGH; case 2: if (*t++ == pat) return t - 1; MOZ_FALLTHROUGH; case 1: if (*t++ == pat) return t - 1; } while (textend != t) { if (t[0] == pat) return t; if (t[1] == pat) return t + 1; if (t[2] == pat) return t + 2; if (t[3] == pat) return t + 3; if (t[4] == pat) return t + 4; if (t[5] == pat) return t + 5; if (t[6] == pat) return t + 6; if (t[7] == pat) return t + 7; t += 8; } return nullptr; } static const char* FirstCharMatcher8bit(const char* text, uint32_t n, const char pat) { #if defined(__clang__) return FirstCharMatcherUnrolled<char, char>(text, n, pat); #else return reinterpret_cast<const char*>(memchr(text, pat, n)); #endif } static const char16_t* FirstCharMatcher16bit(const char16_t* text, uint32_t n, const char16_t pat) { #if defined(XP_DARWIN) || defined(XP_WIN) /* * Performance of memchr is horrible in OSX. Windows is better, * but it is still better to use UnrolledMatcher. */ return FirstCharMatcherUnrolled<char16_t, char16_t>(text, n, pat); #else /* * For linux the best performance is obtained by slightly hacking memchr. * memchr works only on 8bit char but char16_t is 16bit. So we treat char16_t * in blocks of 8bit and use memchr. */ const char* text8 = (const char*) text; const char* pat8 = reinterpret_cast<const char*>(&pat); MOZ_ASSERT(n < UINT32_MAX/2); n *= 2; uint32_t i = 0; while (i < n) { /* Find the first 8 bits of 16bit character in text. */ const char* pos8 = FirstCharMatcher8bit(text8 + i, n - i, pat8[0]); if (pos8 == nullptr) return nullptr; i = static_cast<uint32_t>(pos8 - text8); /* Incorrect match if it matches the last 8 bits of 16bit char. */ if (i % 2 != 0) { i++; continue; } /* Test if last 8 bits match last 8 bits of 16bit char. */ if (pat8[1] == text8[i + 1]) return (text + (i/2)); i += 2; } return nullptr; #endif } template <class InnerMatch, typename TextChar, typename PatChar> static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t patlen) { const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen); uint32_t i = 0; uint32_t n = textlen - patlen + 1; while (i < n) { const TextChar* pos; if (sizeof(TextChar) == 2 && sizeof(PatChar) == 2) pos = (TextChar*) FirstCharMatcher16bit((char16_t*)text + i, n - i, pat[0]); else if (sizeof(TextChar) == 1 && sizeof(PatChar) == 1) pos = (TextChar*) FirstCharMatcher8bit((char*) text + i, n - i, pat[0]); else pos = (TextChar*) FirstCharMatcherUnrolled<TextChar, PatChar>(text + i, n - i, pat[0]); if (pos == nullptr) return -1; i = static_cast<uint32_t>(pos - text); if (InnerMatch::match(pat + 1, text + i + 1, extent)) return i; i += 1; } return -1; } template <typename TextChar, typename PatChar> static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen) { if (patLen == 0) return 0; if (textLen < patLen) return -1; #if defined(__i386__) || defined(_M_IX86) || defined(__i386) /* * Given enough registers, the unrolled loop below is faster than the * following loop. 32-bit x86 does not have enough registers. */ if (patLen == 1) { const PatChar p0 = *pat; const TextChar* end = text + textLen; for (const TextChar* c = text; c != end; ++c) { if (*c == p0) return c - text; } return -1; } #endif /* * If the text or pattern string is short, BMH will be more expensive than * the basic linear scan due to initialization cost and a more complex loop * body. While the correct threshold is input-dependent, we can make a few * conservative observations: * - When |textLen| is "big enough", the initialization time will be * proportionally small, so the worst-case slowdown is minimized. * - When |patLen| is "too small", even the best case for BMH will be * slower than a simple scan for large |textLen| due to the more complex * loop body of BMH. * From this, the values for "big enough" and "too small" are determined * empirically. See bug 526348. */ if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) { int index = BoyerMooreHorspool(text, textLen, pat, patLen); if (index != sBMHBadPattern) return index; } /* * For big patterns with large potential overlap we want the SIMD-optimized * speed of memcmp. For small patterns, a simple loop is faster. We also can't * use memcmp if one of the strings is TwoByte and the other is Latin1. * * FIXME: Linux memcmp performance is sad and the manual loop is faster. */ return #if !defined(__linux__) (patLen > 128 && IsSame<TextChar, PatChar>::value) ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen) : #endif Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen); } static int32_t StringMatch(JSLinearString* text, JSLinearString* pat, uint32_t start = 0) { MOZ_ASSERT(start <= text->length()); uint32_t textLen = text->length() - start; uint32_t patLen = pat->length(); int match; AutoCheckCannotGC nogc; if (text->hasLatin1Chars()) { const Latin1Char* textChars = text->latin1Chars(nogc) + start; if (pat->hasLatin1Chars()) match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen); else match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen); } else { const char16_t* textChars = text->twoByteChars(nogc) + start; if (pat->hasLatin1Chars()) match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen); else match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen); } return (match == -1) ? -1 : start + match; } static const size_t sRopeMatchThresholdRatioLog2 = 5; bool js::StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patLen) { AutoCheckCannotGC nogc; return text->hasLatin1Chars() ? StringMatch(text->latin1Chars(nogc), text->length(), pat, patLen) != -1 : StringMatch(text->twoByteChars(nogc), text->length(), pat, patLen) != -1; } int js::StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start) { return StringMatch(text, pat, start); } // When an algorithm does not need a string represented as a single linear // array of characters, this range utility may be used to traverse the string a // sequence of linear arrays of characters. This avoids flattening ropes. class StringSegmentRange { // If malloc() shows up in any profiles from this vector, we can add a new // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx. Rooted<StringVector> stack; RootedLinearString cur; bool settle(JSString* str) { while (str->isRope()) { JSRope& rope = str->asRope(); if (!stack.append(rope.rightChild())) return false; str = rope.leftChild(); } cur = &str->asLinear(); return true; } public: explicit StringSegmentRange(JSContext* cx) : stack(cx, StringVector(cx)), cur(cx) {} MOZ_MUST_USE bool init(JSString* str) { MOZ_ASSERT(stack.empty()); return settle(str); } bool empty() const { return cur == nullptr; } JSLinearString* front() const { MOZ_ASSERT(!cur->isRope()); return cur; } MOZ_MUST_USE bool popFront() { MOZ_ASSERT(!empty()); if (stack.empty()) { cur = nullptr; return true; } return settle(stack.popCopy()); } }; typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector; template <typename TextChar, typename PatChar> static int RopeMatchImpl(const AutoCheckCannotGC& nogc, LinearStringVector& strings, const PatChar* pat, size_t patLen) { /* Absolute offset from the beginning of the logical text string. */ int pos = 0; for (JSLinearString** outerp = strings.begin(); outerp != strings.end(); ++outerp) { /* Try to find a match within 'outer'. */ JSLinearString* outer = *outerp; const TextChar* chars = outer->chars<TextChar>(nogc); size_t len = outer->length(); int matchResult = StringMatch(chars, len, pat, patLen); if (matchResult != -1) { /* Matched! */ return pos + matchResult; } /* Try to find a match starting in 'outer' and running into other nodes. */ const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1); const TextChar* const textend = chars + len; const PatChar p0 = *pat; const PatChar* const p1 = pat + 1; const PatChar* const patend = pat + patLen; for (const TextChar* t = text; t != textend; ) { if (*t++ != p0) continue; JSLinearString** innerp = outerp; const TextChar* ttend = textend; const TextChar* tt = t; for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) { while (tt == ttend) { if (++innerp == strings.end()) return -1; JSLinearString* inner = *innerp; tt = inner->chars<TextChar>(nogc); ttend = tt + inner->length(); } if (*pp != *tt) goto break_continue; } /* Matched! */ return pos + (t - chars) - 1; /* -1 because of *t++ above */ break_continue:; } pos += len; } return -1; } /* * RopeMatch takes the text to search and the pattern to search for in the text. * RopeMatch returns false on OOM and otherwise returns the match index through * the 'match' outparam (-1 for not found). */ static bool RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match) { uint32_t patLen = pat->length(); if (patLen == 0) { *match = 0; return true; } if (text->length() < patLen) { *match = -1; return true; } /* * List of leaf nodes in the rope. If we run out of memory when trying to * append to this list, we can still fall back to StringMatch, so use the * system allocator so we don't report OOM in that case. */ LinearStringVector strings; /* * We don't want to do rope matching if there is a poor node-to-char ratio, * since this means spending a lot of time in the match loop below. We also * need to build the list of leaf nodes. Do both here: iterate over the * nodes so long as there are not too many. * * We also don't use rope matching if the rope contains both Latin1 and * TwoByte nodes, to simplify the match algorithm. */ { size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2; StringSegmentRange r(cx); if (!r.init(text)) return false; bool textIsLatin1 = text->hasLatin1Chars(); while (!r.empty()) { if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 || !strings.append(r.front())) { JSLinearString* linear = text->ensureLinear(cx); if (!linear) return false; *match = StringMatch(linear, pat); return true; } if (!r.popFront()) return false; } } AutoCheckCannotGC nogc; if (text->hasLatin1Chars()) { if (pat->hasLatin1Chars()) *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc), patLen); else *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc), patLen); } else { if (pat->hasLatin1Chars()) *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc), patLen); else *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc), patLen); } return true; } /* ES6 2015 ST 21.1.3.7 String.prototype.includes */ bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Steps 1, 2, and 3 RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; // Steps 4 and 5 bool isRegExp; if (!IsRegExp(cx, args.get(0), &isRegExp)) return false; // Step 6 if (isRegExp) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE, "first", "", "Regular Expression"); return false; } // Steps 7 and 8 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0)); if (!searchStr) return false; // Steps 9 and 10 uint32_t pos = 0; if (args.hasDefined(1)) { if (args[1].isInt32()) { int i = args[1].toInt32(); pos = (i < 0) ? 0U : uint32_t(i); } else { double d; if (!ToInteger(cx, args[1], &d)) return false; pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX))); } } // Step 11 uint32_t textLen = str->length(); // Step 12 uint32_t start = Min(Max(pos, 0U), textLen); // Steps 13 and 14 JSLinearString* text = str->ensureLinear(cx); if (!text) return false; args.rval().setBoolean(StringMatch(text, searchStr, start) != -1); return true; } /* ES6 draft <RC4 String.prototype.contains for compatibility */ bool js::str_contains(JSContext* cx, unsigned argc, Value* vp) { return js::str_includes(cx, argc, vp); } /* ES6 20120927 draft 15.5.4.7. */ bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Steps 1, 2, and 3 RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; // Steps 4 and 5 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0)); if (!searchStr) return false; // Steps 6 and 7 uint32_t pos = 0; if (args.hasDefined(1)) { if (args[1].isInt32()) { int i = args[1].toInt32(); pos = (i < 0) ? 0U : uint32_t(i); } else { double d; if (!ToInteger(cx, args[1], &d)) return false; pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX))); } } // Step 8 uint32_t textLen = str->length(); // Step 9 uint32_t start = Min(Max(pos, 0U), textLen); // Steps 10 and 11 JSLinearString* text = str->ensureLinear(cx); if (!text) return false; args.rval().setInt32(StringMatch(text, searchStr, start)); return true; } template <typename TextChar, typename PatChar> static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen, const PatChar* pat, size_t patLen, size_t start) { MOZ_ASSERT(patLen > 0); MOZ_ASSERT(patLen <= textLen); MOZ_ASSERT(start <= textLen - patLen); const PatChar p0 = *pat; const PatChar* patNext = pat + 1; const PatChar* patEnd = pat + patLen; for (const TextChar* t = text + start; t >= text; --t) { if (*t == p0) { const TextChar* t1 = t + 1; for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) { if (*t1 != *p1) goto break_continue; } return static_cast<int32_t>(t - text); } break_continue:; } return -1; } // ES2017 draft rev 6859bb9ccaea9c6ede81d71e5320e3833b92cb3e // 21.1.3.9 String.prototype.lastIndexOf ( searchString [ , position ] ) bool js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Steps 1-2. RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; // Step 3. RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0)); if (!searchStr) return false; // Step 6. size_t len = str->length(); // Step 8. size_t searchLen = searchStr->length(); // Steps 4-5, 7. int start = len - searchLen; // Start searching here if (args.hasDefined(1)) { if (args[1].isInt32()) { int i = args[1].toInt32(); if (i <= 0) start = 0; else if (i < start) start = i; } else { double d; if (!ToNumber(cx, args[1], &d)) return false; if (!IsNaN(d)) { d = JS::ToInteger(d); if (d <= 0) start = 0; else if (d < start) start = int(d); } } } if (searchLen > len) { args.rval().setInt32(-1); return true; } if (searchLen == 0) { args.rval().setInt32(start); return true; } MOZ_ASSERT(0 <= start && size_t(start) < len); JSLinearString* text = str->ensureLinear(cx); if (!text) return false; // Step 9. int32_t res; AutoCheckCannotGC nogc; if (text->hasLatin1Chars()) { const Latin1Char* textChars = text->latin1Chars(nogc); if (searchStr->hasLatin1Chars()) res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc), searchLen, start); else res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc), searchLen, start); } else { const char16_t* textChars = text->twoByteChars(nogc); if (searchStr->hasLatin1Chars()) res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc), searchLen, start); else res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc), searchLen, start); } args.rval().setInt32(res); return true; } bool js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start) { MOZ_ASSERT(start + pat->length() <= text->length()); size_t patLen = pat->length(); AutoCheckCannotGC nogc; if (text->hasLatin1Chars()) { const Latin1Char* textChars = text->latin1Chars(nogc) + start; if (pat->hasLatin1Chars()) return PodEqual(textChars, pat->latin1Chars(nogc), patLen); return EqualChars(textChars, pat->twoByteChars(nogc), patLen); } const char16_t* textChars = text->twoByteChars(nogc) + start; if (pat->hasTwoByteChars()) return PodEqual(textChars, pat->twoByteChars(nogc), patLen); return EqualChars(pat->latin1Chars(nogc), textChars, patLen); } /* ES6 draft rc3 21.1.3.18. */ bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Steps 1, 2, and 3 RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; // Steps 4 and 5 bool isRegExp; if (!IsRegExp(cx, args.get(0), &isRegExp)) return false; // Step 6 if (isRegExp) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE, "first", "", "Regular Expression"); return false; } // Steps 7 and 8 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0)); if (!searchStr) return false; // Steps 9 and 10 uint32_t pos = 0; if (args.hasDefined(1)) { if (args[1].isInt32()) { int i = args[1].toInt32(); pos = (i < 0) ? 0U : uint32_t(i); } else { double d; if (!ToInteger(cx, args[1], &d)) return false; pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX))); } } // Step 11 uint32_t textLen = str->length(); // Step 12 uint32_t start = Min(Max(pos, 0U), textLen); // Step 13 uint32_t searchLen = searchStr->length(); // Step 14 if (searchLen + start < searchLen || searchLen + start > textLen) { args.rval().setBoolean(false); return true; } // Steps 15 and 16 JSLinearString* text = str->ensureLinear(cx); if (!text) return false; args.rval().setBoolean(HasSubstringAt(text, searchStr, start)); return true; } /* ES6 draft rc3 21.1.3.6. */ bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Steps 1, 2, and 3 RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; // Steps 4 and 5 bool isRegExp; if (!IsRegExp(cx, args.get(0), &isRegExp)) return false; // Step 6 if (isRegExp) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE, "first", "", "Regular Expression"); return false; } // Steps 7 and 8 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0)); if (!searchStr) return false; // Step 9 uint32_t textLen = str->length(); // Steps 10 and 11 uint32_t pos = textLen; if (args.hasDefined(1)) { if (args[1].isInt32()) { int i = args[1].toInt32(); pos = (i < 0) ? 0U : uint32_t(i); } else { double d; if (!ToInteger(cx, args[1], &d)) return false; pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX))); } } // Step 12 uint32_t end = Min(Max(pos, 0U), textLen); // Step 13 uint32_t searchLen = searchStr->length(); // Step 15 (reordered) if (searchLen > end) { args.rval().setBoolean(false); return true; } // Step 14 uint32_t start = end - searchLen; // Steps 16 and 17 JSLinearString* text = str->ensureLinear(cx); if (!text) return false; args.rval().setBoolean(HasSubstringAt(text, searchStr, start)); return true; } template <typename CharT> static void TrimString(const CharT* chars, bool trimLeft, bool trimRight, size_t length, size_t* pBegin, size_t* pEnd) { size_t begin = 0, end = length; if (trimLeft) { while (begin < length && unicode::IsSpace(chars[begin])) ++begin; } if (trimRight) { while (end > begin && unicode::IsSpace(chars[end - 1])) --end; } *pBegin = begin; *pEnd = end; } static bool TrimString(JSContext* cx, const CallArgs& args, bool trimLeft, bool trimRight) { RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; JSLinearString* linear = str->ensureLinear(cx); if (!linear) return false; size_t length = linear->length(); size_t begin, end; if (linear->hasLatin1Chars()) { AutoCheckCannotGC nogc; TrimString(linear->latin1Chars(nogc), trimLeft, trimRight, length, &begin, &end); } else { AutoCheckCannotGC nogc; TrimString(linear->twoByteChars(nogc), trimLeft, trimRight, length, &begin, &end); } str = NewDependentString(cx, str, begin, end - begin); if (!str) return false; args.rval().setString(str); return true; } bool js::str_trim(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return TrimString(cx, args, true, true); } bool js::str_trimLeft(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return TrimString(cx, args, true, false); } bool js::str_trimRight(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return TrimString(cx, args, false, true); } // Utility for building a rope (lazy concatenation) of strings. class RopeBuilder { JSContext* cx; RootedString res; RopeBuilder(const RopeBuilder& other) = delete; void operator=(const RopeBuilder& other) = delete; public: explicit RopeBuilder(JSContext* cx) : cx(cx), res(cx, cx->runtime()->emptyString) {} inline bool append(HandleString str) { res = ConcatStrings<CanGC>(cx, res, str); return !!res; } inline JSString* result() { return res; } }; namespace { template <typename CharT> static uint32_t FindDollarIndex(const CharT* chars, size_t length) { if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) { uint32_t dollarIndex = p - chars; MOZ_ASSERT(dollarIndex < length); return dollarIndex; } return UINT32_MAX; } } /* anonymous namespace */ static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr, HandleString repstr, size_t match, size_t patternLength) { RopeBuilder builder(cx); size_t matchEnd = match + patternLength; if (textstr->isRope()) { /* * If we are replacing over a rope, avoid flattening it by iterating * through it, building a new rope. */ StringSegmentRange r(cx); if (!r.init(textstr)) return nullptr; size_t pos = 0; while (!r.empty()) { RootedString str(cx, r.front()); size_t len = str->length(); size_t strEnd = pos + len; if (pos < matchEnd && strEnd > match) { /* * We need to special-case any part of the rope that overlaps * with the replacement string. */ if (match >= pos) { /* * If this part of the rope overlaps with the left side of * the pattern, then it must be the only one to overlap with * the first character in the pattern, so we include the * replacement string here. */ RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos)); if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) { return nullptr; } } /* * If str runs off the end of the matched string, append the * last part of str. */ if (strEnd > matchEnd) { RootedString rightSide(cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd)); if (!rightSide || !builder.append(rightSide)) return nullptr; } } else { if (!builder.append(str)) return nullptr; } pos += str->length(); if (!r.popFront()) return nullptr; } } else { RootedString leftSide(cx, NewDependentString(cx, textstr, 0, match)); if (!leftSide) return nullptr; RootedString rightSide(cx); rightSide = NewDependentString(cx, textstr, match + patternLength, textstr->length() - match - patternLength); if (!rightSide || !builder.append(leftSide) || !builder.append(repstr) || !builder.append(rightSide)) { return nullptr; } } return builder.result(); } template <typename CharT> static bool AppendDollarReplacement(StringBuffer& newReplaceChars, size_t firstDollarIndex, size_t matchStart, size_t matchLimit, JSLinearString* text, const CharT* repChars, size_t repLength) { MOZ_ASSERT(firstDollarIndex < repLength); /* Move the pre-dollar chunk in bulk. */ newReplaceChars.infallibleAppend(repChars, firstDollarIndex); /* Move the rest char-by-char, interpreting dollars as we encounter them. */ const CharT* repLimit = repChars + repLength; for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) { if (*it != '$' || it == repLimit - 1) { if (!newReplaceChars.append(*it)) return false; continue; } switch (*(it + 1)) { case '$': /* Eat one of the dollars. */ if (!newReplaceChars.append(*it)) return false; break; case '&': if (!newReplaceChars.appendSubstring(text, matchStart, matchLimit - matchStart)) return false; break; case '`': if (!newReplaceChars.appendSubstring(text, 0, matchStart)) return false; break; case '\'': if (!newReplaceChars.appendSubstring(text, matchLimit, text->length() - matchLimit)) return false; break; default: /* The dollar we saw was not special (no matter what its mother told it). */ if (!newReplaceChars.append(*it)) return false; continue; } ++it; /* We always eat an extra char in the above switch. */ } return true; } /* * Perform a linear-scan dollar substitution on the replacement text, * constructing a result string that looks like: * * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:] */ static JSString* BuildDollarReplacement(JSContext* cx, JSString* textstrArg, JSLinearString* repstr, uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) { RootedLinearString textstr(cx, textstrArg->ensureLinear(cx)); if (!textstr) return nullptr; size_t matchLimit = matchStart + patternLength; /* * Most probably: * * len(newstr) >= len(orig) - len(match) + len(replacement) * * Note that dollar vars _could_ make the resulting text smaller than this. */ StringBuffer newReplaceChars(cx); if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars()) return nullptr; if (!newReplaceChars.reserve(textstr->length() - patternLength + repstr->length())) return nullptr; bool res; if (repstr->hasLatin1Chars()) { AutoCheckCannotGC nogc; res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart, matchLimit, textstr, repstr->latin1Chars(nogc), repstr->length()); } else { AutoCheckCannotGC nogc; res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart, matchLimit, textstr, repstr->twoByteChars(nogc), repstr->length()); } if (!res) return nullptr; RootedString leftSide(cx, NewDependentString(cx, textstr, 0, matchStart)); if (!leftSide) return nullptr; RootedString newReplace(cx, newReplaceChars.finishString()); if (!newReplace) return nullptr; MOZ_ASSERT(textstr->length() >= matchLimit); RootedString rightSide(cx, NewDependentString(cx, textstr, matchLimit, textstr->length() - matchLimit)); if (!rightSide) return nullptr; RopeBuilder builder(cx); if (!builder.append(leftSide) || !builder.append(newReplace) || !builder.append(rightSide)) return nullptr; return builder.result(); } template <typename StrChar, typename RepChar> static bool StrFlatReplaceGlobal(JSContext *cx, JSLinearString *str, JSLinearString *pat, JSLinearString *rep, StringBuffer &sb) { MOZ_ASSERT(str->length() > 0); AutoCheckCannotGC nogc; const StrChar *strChars = str->chars<StrChar>(nogc); const RepChar *repChars = rep->chars<RepChar>(nogc); // The pattern is empty, so we interleave the replacement string in-between // each character. if (!pat->length()) { CheckedInt<uint32_t> strLength(str->length()); CheckedInt<uint32_t> repLength(rep->length()); CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength; if (!length.isValid()) { ReportAllocationOverflow(cx); return false; } if (!sb.reserve(length.value())) return false; for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) { sb.infallibleAppend(*strChars); sb.infallibleAppend(repChars, rep->length()); } sb.infallibleAppend(*strChars); return true; } // If it's true, we are sure that the result's length is, at least, the same // length as |str->length()|. if (rep->length() >= pat->length()) { if (!sb.reserve(str->length())) return false; } uint32_t start = 0; for (;;) { int match = StringMatch(str, pat, start); if (match < 0) break; if (!sb.append(strChars + start, match - start)) return false; if (!sb.append(repChars, rep->length())) return false; start = match + pat->length(); } if (!sb.append(strChars + start, str->length() - start)) return false; return true; } // This is identical to "str.split(pattern).join(replacement)" except that we // do some deforestation optimization in Ion. JSString * js::str_flat_replace_string(JSContext *cx, HandleString string, HandleString pattern, HandleString replacement) { MOZ_ASSERT(string); MOZ_ASSERT(pattern); MOZ_ASSERT(replacement); if (!string->length()) return string; RootedLinearString linearRepl(cx, replacement->ensureLinear(cx)); if (!linearRepl) return nullptr; RootedLinearString linearPat(cx, pattern->ensureLinear(cx)); if (!linearPat) return nullptr; RootedLinearString linearStr(cx, string->ensureLinear(cx)); if (!linearStr) return nullptr; StringBuffer sb(cx); if (linearStr->hasTwoByteChars()) { if (!sb.ensureTwoByteChars()) return nullptr; if (linearRepl->hasTwoByteChars()) { if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat, linearRepl, sb)) return nullptr; } else { if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat, linearRepl, sb)) return nullptr; } } else { if (linearRepl->hasTwoByteChars()) { if (!sb.ensureTwoByteChars()) return nullptr; if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat, linearRepl, sb)) return nullptr; } else { if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(cx, linearStr, linearPat, linearRepl, sb)) return nullptr; } } JSString *str = sb.finishString(); if (!str) return nullptr; return str; } JSString* js::str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern, HandleString replacement) { RootedLinearString repl(cx, replacement->ensureLinear(cx)); if (!repl) return nullptr; RootedAtom pat(cx, AtomizeString(cx, pattern)); if (!pat) return nullptr; size_t patternLength = pat->length(); int32_t match; uint32_t dollarIndex; { AutoCheckCannotGC nogc; dollarIndex = repl->hasLatin1Chars() ? FindDollarIndex(repl->latin1Chars(nogc), repl->length()) : FindDollarIndex(repl->twoByteChars(nogc), repl->length()); } /* * |string| could be a rope, so we want to avoid flattening it for as * long as possible. */ if (string->isRope()) { if (!RopeMatch(cx, &string->asRope(), pat, &match)) return nullptr; } else { match = StringMatch(&string->asLinear(), pat, 0); } if (match < 0) return string; if (dollarIndex != UINT32_MAX) return BuildDollarReplacement(cx, string, repl, dollarIndex, match, patternLength); return BuildFlatReplacement(cx, string, repl, match, patternLength); } // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18. static JSObject* SplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, HandleLinearString sep, HandleObjectGroup group) { size_t strLength = str->length(); size_t sepLength = sep->length(); MOZ_ASSERT(sepLength != 0); // Step 12. if (strLength == 0) { // Step 12.a. int match = StringMatch(str, sep, 0); // Step 12.b. if (match != -1) return NewFullyAllocatedArrayTryUseGroup(cx, group, 0); // Steps 12.c-e. RootedValue v(cx, StringValue(str)); return NewCopiedArrayTryUseGroup(cx, group, v.address(), 1); } // Step 3 (reordered). AutoValueVector splits(cx); // Step 8 (reordered). size_t lastEndIndex = 0; // Step 13. size_t index = 0; // Step 14. while (index != strLength) { // Step 14.a. int match = StringMatch(str, sep, index); // Step 14.b. // // Our match algorithm differs from the spec in that it returns the // next index at which a match happens. If no match happens we're // done. // // But what if the match is at the end of the string (and the string is // not empty)? Per 14.c.i this shouldn't be a match, so we have to // specially exclude it. Thus this case should hold: // // var a = "abc".split(/\b/); // assertEq(a.length, 1); // assertEq(a[0], "abc"); if (match == -1) break; // Step 14.c. size_t endIndex = match + sepLength; // Step 14.c.i. if (endIndex == lastEndIndex) { index++; continue; } // Step 14.c.ii. MOZ_ASSERT(lastEndIndex < endIndex); MOZ_ASSERT(sepLength <= strLength); MOZ_ASSERT(lastEndIndex + sepLength <= endIndex); // Step 14.c.ii.1. size_t subLength = size_t(endIndex - sepLength - lastEndIndex); JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength); // Steps 14.c.ii.2-4. if (!sub || !splits.append(StringValue(sub))) return nullptr; // Step 14.c.ii.5. if (splits.length() == limit) return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length()); // Step 14.c.ii.6. index = endIndex; // Step 14.c.ii.7. lastEndIndex = index; } // Step 15. JSString* sub = NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex); // Steps 16-17. if (!sub || !splits.append(StringValue(sub))) return nullptr; // Step 18. return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length()); } // Fast-path for splitting a string into a character array via split(""). static JSObject* CharSplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, HandleObjectGroup group) { size_t strLength = str->length(); if (strLength == 0) return NewFullyAllocatedArrayTryUseGroup(cx, group, 0); js::StaticStrings& staticStrings = cx->staticStrings(); uint32_t resultlen = (limit < strLength ? limit : strLength); AutoValueVector splits(cx); if (!splits.reserve(resultlen)) return nullptr; for (size_t i = 0; i < resultlen; ++i) { JSString* sub = staticStrings.getUnitStringForElement(cx, str, i); if (!sub) return nullptr; splits.infallibleAppend(StringValue(sub)); } return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length()); } // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18. JSObject* js::str_split_string(JSContext* cx, HandleObjectGroup group, HandleString str, HandleString sep, uint32_t limit) { RootedLinearString linearStr(cx, str->ensureLinear(cx)); if (!linearStr) return nullptr; RootedLinearString linearSep(cx, sep->ensureLinear(cx)); if (!linearSep) return nullptr; if (linearSep->length() == 0) return CharSplitHelper(cx, linearStr, limit, group); return SplitHelper(cx, linearStr, limit, linearSep, group); } /* * Python-esque sequence operations. */ bool js::str_concat(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); JSString* str = ToStringForStringFunction(cx, args.thisv()); if (!str) return false; for (unsigned i = 0; i < args.length(); i++) { JSString* argStr = ToString<NoGC>(cx, args[i]); if (!argStr) { RootedString strRoot(cx, str); argStr = ToString<CanGC>(cx, args[i]); if (!argStr) return false; str = strRoot; } JSString* next = ConcatStrings<NoGC>(cx, str, argStr); if (next) { str = next; } else { RootedString strRoot(cx, str), argStrRoot(cx, argStr); str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot); if (!str) return false; } } args.rval().setString(str); return true; } static const JSFunctionSpec string_methods[] = { #if JS_HAS_TOSOURCE JS_FN(js_toSource_str, str_toSource, 0,0), #endif /* Java-like methods. */ JS_FN(js_toString_str, str_toString, 0,0), JS_FN(js_valueOf_str, str_toString, 0,0), JS_FN("toLowerCase", str_toLowerCase, 0,0), JS_FN("toUpperCase", str_toUpperCase, 0,0), JS_INLINABLE_FN("charAt", str_charAt, 1,0, StringCharAt), JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1,0, StringCharCodeAt), JS_SELF_HOSTED_FN("substring", "String_substring", 2,0), JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2,0), JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2,0), JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0), JS_FN("includes", str_includes, 1,0), JS_FN("contains", str_contains, 1,0), JS_FN("indexOf", str_indexOf, 1,0), JS_FN("lastIndexOf", str_lastIndexOf, 1,0), JS_FN("startsWith", str_startsWith, 1,0), JS_FN("endsWith", str_endsWith, 1,0), JS_FN("trim", str_trim, 0,0), JS_FN("trimLeft", str_trimLeft, 0,0), JS_FN("trimRight", str_trimRight, 0,0), JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,0), JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,0), JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0), JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0), JS_FN("normalize", str_normalize, 0,0), /* Perl-ish methods (search is actually Python-esque). */ JS_SELF_HOSTED_FN("match", "String_match", 1,0), JS_SELF_HOSTED_FN("search", "String_search", 1,0), JS_SELF_HOSTED_FN("replace", "String_replace", 2,0), JS_SELF_HOSTED_FN("split", "String_split", 2,0), JS_SELF_HOSTED_FN("substr", "String_substr", 2,0), /* Python-esque sequence methods. */ JS_FN("concat", str_concat, 1,0), JS_SELF_HOSTED_FN("slice", "String_slice", 2,0), /* HTML string methods. */ JS_SELF_HOSTED_FN("bold", "String_bold", 0,0), JS_SELF_HOSTED_FN("italics", "String_italics", 0,0), JS_SELF_HOSTED_FN("fixed", "String_fixed", 0,0), JS_SELF_HOSTED_FN("strike", "String_strike", 0,0), JS_SELF_HOSTED_FN("small", "String_small", 0,0), JS_SELF_HOSTED_FN("big", "String_big", 0,0), JS_SELF_HOSTED_FN("blink", "String_blink", 0,0), JS_SELF_HOSTED_FN("sup", "String_sup", 0,0), JS_SELF_HOSTED_FN("sub", "String_sub", 0,0), JS_SELF_HOSTED_FN("anchor", "String_anchor", 1,0), JS_SELF_HOSTED_FN("link", "String_link", 1,0), JS_SELF_HOSTED_FN("fontcolor","String_fontcolor", 1,0), JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1,0), JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0,0), JS_FS_END }; // ES6 rev 27 (2014 Aug 24) 21.1.1 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); RootedString str(cx); if (args.length() > 0) { if (!args.isConstructing() && args[0].isSymbol()) return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval()); str = ToString<CanGC>(cx, args[0]); if (!str) return false; } else { str = cx->runtime()->emptyString; } if (args.isConstructing()) { RootedObject proto(cx); RootedObject newTarget(cx, &args.newTarget().toObject()); if (!GetPrototypeFromConstructor(cx, newTarget, &proto)) return false; StringObject* strobj = StringObject::create(cx, str, proto); if (!strobj) return false; args.rval().setObject(*strobj); return true; } args.rval().setString(str); return true; } static bool str_fromCharCode_few_args(JSContext* cx, const CallArgs& args) { MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE); char16_t chars[JSFatInlineString::MAX_LENGTH_TWO_BYTE]; for (unsigned i = 0; i < args.length(); i++) { uint16_t code; if (!ToUint16(cx, args[i], &code)) return false; chars[i] = char16_t(code); } JSString* str = NewStringCopyN<CanGC>(cx, chars, args.length()); if (!str) return false; args.rval().setString(str); return true; } bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX); // Optimize the single-char case. if (args.length() == 1) return str_fromCharCode_one_arg(cx, args[0], args.rval()); // Optimize the case where the result will definitely fit in an inline // string (thin or fat) and so we don't need to malloc the chars. (We could // cover some cases where args.length() goes up to // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are // all Latin1, but it doesn't seem worth the effort.) if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE) return str_fromCharCode_few_args(cx, args); char16_t* chars = cx->pod_malloc<char16_t>(args.length() + 1); if (!chars) return false; for (unsigned i = 0; i < args.length(); i++) { uint16_t code; if (!ToUint16(cx, args[i], &code)) { js_free(chars); return false; } chars[i] = char16_t(code); } chars[args.length()] = 0; JSString* str = NewString<CanGC>(cx, chars, args.length()); if (!str) { js_free(chars); return false; } args.rval().setString(str); return true; } static inline bool CodeUnitToString(JSContext* cx, uint16_t ucode, MutableHandleValue rval) { if (StaticStrings::hasUnit(ucode)) { rval.setString(cx->staticStrings().getUnit(ucode)); return true; } char16_t c = char16_t(ucode); JSString* str = NewStringCopyN<CanGC>(cx, &c, 1); if (!str) return false; rval.setString(str); return true; } bool js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval) { uint16_t ucode; if (!ToUint16(cx, code, &ucode)) return false; return CodeUnitToString(cx, ucode, rval); } static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code, uint32_t* codePoint) { // String.fromCodePoint, Steps 5.a-b. double nextCP; if (!ToNumber(cx, code, &nextCP)) return false; // String.fromCodePoint, Steps 5.c-d. if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || nextCP > unicode::NonBMPMax) { ToCStringBuf cbuf; if (char* numStr = NumberToCString(cx, &cbuf, nextCP)) JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_NOT_A_CODEPOINT, numStr); return false; } *codePoint = uint32_t(nextCP); return true; } bool js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval) { // Steps 1-4 (omitted). // Steps 5.a-d. uint32_t codePoint; if (!ToCodePoint(cx, code, &codePoint)) return false; // Steps 5.e, 6. if (!unicode::IsSupplementary(codePoint)) return CodeUnitToString(cx, uint16_t(codePoint), rval); char16_t chars[] = { unicode::LeadSurrogate(codePoint), unicode::TrailSurrogate(codePoint) }; JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2); if (!str) return false; rval.setString(str); return true; } static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) { MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2); // Steps 1-2 (omitted). // Step 3. char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE]; // Steps 4-5. unsigned length = 0; for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) { // Steps 5.a-d. uint32_t codePoint; if (!ToCodePoint(cx, args[nextIndex], &codePoint)) return false; // Step 5.e. unicode::UTF16Encode(codePoint, elements, &length); } // Step 6. JSString* str = NewStringCopyN<CanGC>(cx, elements, length); if (!str) return false; args.rval().setString(str); return true; } // ES2017 draft rev 40edb3a95a475c1b251141ac681b8793129d9a6d // 21.1.2.2 String.fromCodePoint(...codePoints) bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Optimize the single code-point case. if (args.length() == 1) return str_fromCodePoint_one_arg(cx, args[0], args.rval()); // Optimize the case where the result will definitely fit in an inline // string (thin or fat) and so we don't need to malloc the chars. (We could // cover some cases where |args.length()| goes up to // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars // are all Latin1, but it doesn't seem worth the effort.) if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) return str_fromCodePoint_few_args(cx, args); // Steps 1-2 (omitted). // Step 3. static_assert(ARGS_LENGTH_MAX < std::numeric_limits<size_t>::max() / 2, "|args.length() * 2 + 1| does not overflow"); char16_t* elements = cx->pod_malloc<char16_t>(args.length() * 2 + 1); if (!elements) return false; // Steps 4-5. unsigned length = 0; for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) { // Steps 5.a-d. uint32_t codePoint; if (!ToCodePoint(cx, args[nextIndex], &codePoint)) { js_free(elements); return false; } // Step 5.e. unicode::UTF16Encode(codePoint, elements, &length); } elements[length] = 0; // Step 6. JSString* str = NewString<CanGC>(cx, elements, length); if (!str) { js_free(elements); return false; } args.rval().setString(str); return true; } static const JSFunctionSpec string_static_methods[] = { JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0, StringFromCharCode), JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0, StringFromCodePoint), JS_SELF_HOSTED_FN("raw", "String_static_raw", 2,0), JS_SELF_HOSTED_FN("substring", "String_static_substring", 3,0), JS_SELF_HOSTED_FN("substr", "String_static_substr", 3,0), JS_SELF_HOSTED_FN("slice", "String_static_slice", 3,0), JS_SELF_HOSTED_FN("match", "String_generic_match", 2,0), JS_SELF_HOSTED_FN("replace", "String_generic_replace", 3,0), JS_SELF_HOSTED_FN("search", "String_generic_search", 2,0), JS_SELF_HOSTED_FN("split", "String_generic_split", 3,0), JS_SELF_HOSTED_FN("toLowerCase", "String_static_toLowerCase", 1,0), JS_SELF_HOSTED_FN("toUpperCase", "String_static_toUpperCase", 1,0), JS_SELF_HOSTED_FN("charAt", "String_static_charAt", 2,0), JS_SELF_HOSTED_FN("charCodeAt", "String_static_charCodeAt", 2,0), JS_SELF_HOSTED_FN("includes", "String_static_includes", 2,0), JS_SELF_HOSTED_FN("indexOf", "String_static_indexOf", 2,0), JS_SELF_HOSTED_FN("lastIndexOf", "String_static_lastIndexOf", 2,0), JS_SELF_HOSTED_FN("startsWith", "String_static_startsWith", 2,0), JS_SELF_HOSTED_FN("endsWith", "String_static_endsWith", 2,0), JS_SELF_HOSTED_FN("trim", "String_static_trim", 1,0), JS_SELF_HOSTED_FN("trimLeft", "String_static_trimLeft", 1,0), JS_SELF_HOSTED_FN("trimRight", "String_static_trimRight", 1,0), JS_SELF_HOSTED_FN("toLocaleLowerCase","String_static_toLocaleLowerCase",1,0), JS_SELF_HOSTED_FN("toLocaleUpperCase","String_static_toLocaleUpperCase",1,0), JS_SELF_HOSTED_FN("normalize", "String_static_normalize", 1,0), JS_SELF_HOSTED_FN("concat", "String_static_concat", 2,0), JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2,0), JS_FS_END }; /* static */ Shape* StringObject::assignInitialShape(ExclusiveContext* cx, Handle<StringObject*> obj) { MOZ_ASSERT(obj->empty()); return obj->addDataProperty(cx, cx->names().length, LENGTH_SLOT, JSPROP_PERMANENT | JSPROP_READONLY); } JSObject* js::InitStringClass(JSContext* cx, HandleObject obj) { MOZ_ASSERT(obj->isNative()); Rooted<GlobalObject*> global(cx, &obj->as<GlobalObject>()); Rooted<JSString*> empty(cx, cx->runtime()->emptyString); RootedObject proto(cx, global->createBlankPrototype(cx, &StringObject::class_)); if (!proto || !proto->as<StringObject>().init(cx, empty)) return nullptr; /* Now create the String function. */ RootedFunction ctor(cx); ctor = global->createConstructor(cx, StringConstructor, cx->names().String, 1, AllocKind::FUNCTION, &jit::JitInfo_String); if (!ctor) return nullptr; if (!LinkConstructorAndPrototype(cx, ctor, proto)) return nullptr; if (!DefinePropertiesAndFunctions(cx, proto, nullptr, string_methods) || !DefinePropertiesAndFunctions(cx, ctor, nullptr, string_static_methods)) { return nullptr; } /* * Define escape/unescape, the URI encode/decode functions, and maybe * uneval on the global object. */ if (!JS_DefineFunctions(cx, global, string_functions)) return nullptr; if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor, proto)) return nullptr; return proto; } const char* js::ValueToPrintable(JSContext* cx, const Value& vArg, JSAutoByteString* bytes, bool asSource) { RootedValue v(cx, vArg); JSString* str; if (asSource) str = ValueToSource(cx, v); else str = ToString<CanGC>(cx, v); if (!str) return nullptr; str = QuoteString(cx, str, 0); if (!str) return nullptr; return bytes->encodeLatin1(cx, str); } template <AllowGC allowGC> JSString* js::ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg) { /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */ MOZ_ASSERT(!arg.isString()); Value v = arg; if (!v.isPrimitive()) { if (!cx->shouldBeJSContext() || !allowGC) return nullptr; RootedValue v2(cx, v); if (!ToPrimitive(cx->asJSContext(), JSTYPE_STRING, &v2)) return nullptr; v = v2; } JSString* str; if (v.isString()) { str = v.toString(); } else if (v.isInt32()) { str = Int32ToString<allowGC>(cx, v.toInt32()); } else if (v.isDouble()) { str = NumberToString<allowGC>(cx, v.toDouble()); } else if (v.isBoolean()) { str = BooleanToString(cx, v.toBoolean()); } else if (v.isNull()) { str = cx->names().null; } else if (v.isSymbol()) { if (cx->shouldBeJSContext() && allowGC) { JS_ReportErrorNumberASCII(cx->asJSContext(), GetErrorMessage, nullptr, JSMSG_SYMBOL_TO_STRING); } return nullptr; } else { MOZ_ASSERT(v.isUndefined()); str = cx->names().undefined; } return str; } template JSString* js::ToStringSlow<CanGC>(ExclusiveContext* cx, HandleValue arg); template JSString* js::ToStringSlow<NoGC>(ExclusiveContext* cx, const Value& arg); JS_PUBLIC_API(JSString*) js::ToStringSlow(JSContext* cx, HandleValue v) { return ToStringSlow<CanGC>(cx, v); } static JSString* SymbolToSource(JSContext* cx, Symbol* symbol) { RootedString desc(cx, symbol->description()); SymbolCode code = symbol->code(); if (code != SymbolCode::InSymbolRegistry && code != SymbolCode::UniqueSymbol) { // Well-known symbol. MOZ_ASSERT(uint32_t(code) < JS::WellKnownSymbolLimit); return desc; } StringBuffer buf(cx); if (code == SymbolCode::InSymbolRegistry ? !buf.append("Symbol.for(") : !buf.append("Symbol(")) return nullptr; if (desc) { desc = StringToSource(cx, desc); if (!desc || !buf.append(desc)) return nullptr; } if (!buf.append(')')) return nullptr; return buf.finishString(); } JSString* js::ValueToSource(JSContext* cx, HandleValue v) { JS_CHECK_RECURSION(cx, return nullptr); assertSameCompartment(cx, v); if (v.isUndefined()) return cx->names().void0; if (v.isString()) return StringToSource(cx, v.toString()); if (v.isSymbol()) return SymbolToSource(cx, v.toSymbol()); if (v.isPrimitive()) { /* Special case to preserve negative zero, _contra_ toString. */ if (v.isDouble() && IsNegativeZero(v.toDouble())) { /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */ static const char16_t js_negzero_ucNstr[] = {'-', '0'}; return NewStringCopyN<CanGC>(cx, js_negzero_ucNstr, 2); } return ToString<CanGC>(cx, v); } RootedValue fval(cx); RootedObject obj(cx, &v.toObject()); if (!GetProperty(cx, obj, obj, cx->names().toSource, &fval)) return nullptr; if (IsCallable(fval)) { RootedValue v(cx); if (!js::Call(cx, fval, obj, &v)) return nullptr; return ToString<CanGC>(cx, v); } return ObjectToSource(cx, obj); } JSString* js::StringToSource(JSContext* cx, JSString* str) { return QuoteString(cx, str, '"'); } bool js::EqualChars(JSLinearString* str1, JSLinearString* str2) { MOZ_ASSERT(str1->length() == str2->length()); size_t len = str1->length(); AutoCheckCannotGC nogc; if (str1->hasTwoByteChars()) { if (str2->hasTwoByteChars()) return PodEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc), len); return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len); } if (str2->hasLatin1Chars()) return PodEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len); return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len); } bool js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result) { if (str1 == str2) { *result = true; return true; } size_t length1 = str1->length(); if (length1 != str2->length()) { *result = false; return true; } JSLinearString* linear1 = str1->ensureLinear(cx); if (!linear1) return false; JSLinearString* linear2 = str2->ensureLinear(cx); if (!linear2) return false; *result = EqualChars(linear1, linear2); return true; } bool js::EqualStrings(JSLinearString* str1, JSLinearString* str2) { if (str1 == str2) return true; size_t length1 = str1->length(); if (length1 != str2->length()) return false; return EqualChars(str1, str2); } static int32_t CompareStringsImpl(JSLinearString* str1, JSLinearString* str2) { size_t len1 = str1->length(); size_t len2 = str2->length(); AutoCheckCannotGC nogc; if (str1->hasLatin1Chars()) { const Latin1Char* chars1 = str1->latin1Chars(nogc); return str2->hasLatin1Chars() ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2) : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2); } const char16_t* chars1 = str1->twoByteChars(nogc); return str2->hasLatin1Chars() ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2) : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2); } int32_t js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2) { AutoCheckCannotGC nogc; return s2->hasLatin1Chars() ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length()) : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length()); } bool js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result) { MOZ_ASSERT(str1); MOZ_ASSERT(str2); if (str1 == str2) { *result = 0; return true; } JSLinearString* linear1 = str1->ensureLinear(cx); if (!linear1) return false; JSLinearString* linear2 = str2->ensureLinear(cx); if (!linear2) return false; *result = CompareStringsImpl(linear1, linear2); return true; } int32_t js::CompareAtoms(JSAtom* atom1, JSAtom* atom2) { return CompareStringsImpl(atom1, atom2); } bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes) { size_t length = strlen(asciiBytes); #ifdef DEBUG for (size_t i = 0; i != length; ++i) MOZ_ASSERT(unsigned(asciiBytes[i]) <= 127); #endif if (length != str->length()) return false; const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes); AutoCheckCannotGC nogc; return str->hasLatin1Chars() ? PodEqual(latin1, str->latin1Chars(nogc), length) : EqualChars(latin1, str->twoByteChars(nogc), length); } size_t js_strlen(const char16_t* s) { const char16_t* t; for (t = s; *t != 0; t++) continue; return (size_t)(t - s); } int32_t js_strcmp(const char16_t* lhs, const char16_t* rhs) { while (true) { if (*lhs != *rhs) return int32_t(*lhs) - int32_t(*rhs); if (*lhs == 0) return 0; ++lhs, ++rhs; } } int32_t js_fputs(const char16_t* s, FILE* f) { while (*s != 0) { if (fputwc(wchar_t(*s), f) == WEOF) return WEOF; s++; } return 1; } UniqueChars js::DuplicateString(js::ExclusiveContext* cx, const char* s) { size_t n = strlen(s) + 1; auto ret = cx->make_pod_array<char>(n); if (!ret) return ret; PodCopy(ret.get(), s, n); return ret; } UniqueTwoByteChars js::DuplicateString(js::ExclusiveContext* cx, const char16_t* s) { size_t n = js_strlen(s) + 1; auto ret = cx->make_pod_array<char16_t>(n); if (!ret) return ret; PodCopy(ret.get(), s, n); return ret; } UniqueChars js::DuplicateString(const char* s) { return UniqueChars(js_strdup(s)); } UniqueChars js::DuplicateString(const char* s, size_t n) { UniqueChars ret(js_pod_malloc<char>(n + 1)); if (!ret) return nullptr; PodCopy(ret.get(), s, n); ret[n] = 0; return ret; } UniqueTwoByteChars js::DuplicateString(const char16_t* s) { return DuplicateString(s, js_strlen(s)); } UniqueTwoByteChars js::DuplicateString(const char16_t* s, size_t n) { UniqueTwoByteChars ret(js_pod_malloc<char16_t>(n + 1)); if (!ret) return nullptr; PodCopy(ret.get(), s, n); ret[n] = 0; return ret; } template <typename CharT> const CharT* js_strchr_limit(const CharT* s, char16_t c, const CharT* limit) { while (s < limit) { if (*s == c) return s; s++; } return nullptr; } template const Latin1Char* js_strchr_limit(const Latin1Char* s, char16_t c, const Latin1Char* limit); template const char16_t* js_strchr_limit(const char16_t* s, char16_t c, const char16_t* limit); char16_t* js::InflateString(ExclusiveContext* cx, const char* bytes, size_t* lengthp) { size_t nchars; char16_t* chars; size_t nbytes = *lengthp; nchars = nbytes; chars = cx->pod_malloc<char16_t>(nchars + 1); if (!chars) goto bad; for (size_t i = 0; i < nchars; i++) chars[i] = (unsigned char) bytes[i]; *lengthp = nchars; chars[nchars] = 0; return chars; bad: // For compatibility with callers of JS_DecodeBytes we must zero lengthp // on errors. *lengthp = 0; return nullptr; } template <typename CharT> bool js::DeflateStringToBuffer(JSContext* maybecx, const CharT* src, size_t srclen, char* dst, size_t* dstlenp) { size_t dstlen = *dstlenp; if (srclen > dstlen) { for (size_t i = 0; i < dstlen; i++) dst[i] = char(src[i]); if (maybecx) { AutoSuppressGC suppress(maybecx); JS_ReportErrorNumberASCII(maybecx, GetErrorMessage, nullptr, JSMSG_BUFFER_TOO_SMALL); } return false; } for (size_t i = 0; i < srclen; i++) dst[i] = char(src[i]); *dstlenp = srclen; return true; } template bool js::DeflateStringToBuffer(JSContext* maybecx, const Latin1Char* src, size_t srclen, char* dst, size_t* dstlenp); template bool js::DeflateStringToBuffer(JSContext* maybecx, const char16_t* src, size_t srclen, char* dst, size_t* dstlenp); #define ____ false /* * Identifier start chars: * - 36: $ * - 65..90: A..Z * - 95: _ * - 97..122: a..z */ const bool js_isidstart[] = { /* 0 1 2 3 4 5 6 7 8 9 */ /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____, /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true, /* 7 */ true, true, true, true, true, true, true, true, true, true, /* 8 */ true, true, true, true, true, true, true, true, true, true, /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true, /* 10 */ true, true, true, true, true, true, true, true, true, true, /* 11 */ true, true, true, true, true, true, true, true, true, true, /* 12 */ true, true, true, ____, ____, ____, ____, ____ }; /* * Identifier chars: * - 36: $ * - 48..57: 0..9 * - 65..90: A..Z * - 95: _ * - 97..122: a..z */ const bool js_isident[] = { /* 0 1 2 3 4 5 6 7 8 9 */ /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____, /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true, /* 5 */ true, true, true, true, true, true, true, true, ____, ____, /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true, /* 7 */ true, true, true, true, true, true, true, true, true, true, /* 8 */ true, true, true, true, true, true, true, true, true, true, /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true, /* 10 */ true, true, true, true, true, true, true, true, true, true, /* 11 */ true, true, true, true, true, true, true, true, true, true, /* 12 */ true, true, true, ____, ____, ____, ____, ____ }; /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */ const bool js_isspace[] = { /* 0 1 2 3 4 5 6 7 8 9 */ /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true, /* 1 */ true, true, true, true, ____, ____, ____, ____, ____, ____, /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____, /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____ }; /* * Uri reserved chars + #: * - 35: # * - 36: $ * - 38: & * - 43: + * - 44: , * - 47: / * - 58: : * - 59: ; * - 61: = * - 63: ? * - 64: @ */ static const bool js_isUriReservedPlusPound[] = { /* 0 1 2 3 4 5 6 7 8 9 */ /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____, /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____, /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true, /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____, /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____ }; /* * Uri unescaped chars: * - 33: ! * - 39: ' * - 40: ( * - 41: ) * - 42: * * - 45: - * - 46: . * - 48..57: 0-9 * - 65..90: A-Z * - 95: _ * - 97..122: a-z * - 126: ~ */ static const bool js_isUriUnescaped[] = { /* 0 1 2 3 4 5 6 7 8 9 */ /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true, /* 4 */ true, true, true, ____, ____, true, true, ____, true, true, /* 5 */ true, true, true, true, true, true, true, true, ____, ____, /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true, /* 7 */ true, true, true, true, true, true, true, true, true, true, /* 8 */ true, true, true, true, true, true, true, true, true, true, /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true, /* 10 */ true, true, true, true, true, true, true, true, true, true, /* 11 */ true, true, true, true, true, true, true, true, true, true, /* 12 */ true, true, true, ____, ____, ____, true, ____ }; #undef ____ #define URI_CHUNK 64U static inline bool TransferBufferToString(StringBuffer& sb, MutableHandleValue rval) { JSString* str = sb.finishString(); if (!str) return false; rval.setString(str); return true; } /* * ECMA 3, 15.1.3 URI Handling Function Properties * * The following are implementations of the algorithms * given in the ECMA specification for the hidden functions * 'Encode' and 'Decode'. */ enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success }; template <typename CharT> static EncodeResult Encode(StringBuffer& sb, const CharT* chars, size_t length, const bool* unescapedSet, const bool* unescapedSet2) { static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */ char16_t hexBuf[4]; hexBuf[0] = '%'; hexBuf[3] = 0; for (size_t k = 0; k < length; k++) { char16_t c = chars[k]; if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) { if (!sb.append(c)) return Encode_Failure; } else { if (unicode::IsTrailSurrogate(c)) return Encode_BadUri; uint32_t v; if (!unicode::IsLeadSurrogate(c)) { v = c; } else { k++; if (k == length) return Encode_BadUri; char16_t c2 = chars[k]; if (!unicode::IsTrailSurrogate(c2)) return Encode_BadUri; v = unicode::UTF16Decode(c, c2); } uint8_t utf8buf[4]; size_t L = OneUcs4ToUtf8Char(utf8buf, v); for (size_t j = 0; j < L; j++) { hexBuf[1] = HexDigits[utf8buf[j] >> 4]; hexBuf[2] = HexDigits[utf8buf[j] & 0xf]; if (!sb.append(hexBuf, 3)) return Encode_Failure; } } } return Encode_Success; } static bool Encode(JSContext* cx, HandleLinearString str, const bool* unescapedSet, const bool* unescapedSet2, MutableHandleValue rval) { size_t length = str->length(); if (length == 0) { rval.setString(cx->runtime()->emptyString); return true; } StringBuffer sb(cx); if (!sb.reserve(length)) return false; EncodeResult res; if (str->hasLatin1Chars()) { AutoCheckCannotGC nogc; res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet, unescapedSet2); } else { AutoCheckCannotGC nogc; res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet, unescapedSet2); } if (res == Encode_Failure) return false; if (res == Encode_BadUri) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI); return false; } MOZ_ASSERT(res == Encode_Success); return TransferBufferToString(sb, rval); } enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success }; template <typename CharT> static DecodeResult Decode(StringBuffer& sb, const CharT* chars, size_t length, const bool* reservedSet) { for (size_t k = 0; k < length; k++) { char16_t c = chars[k]; if (c == '%') { size_t start = k; if ((k + 2) >= length) return Decode_BadUri; if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2])) return Decode_BadUri; uint32_t B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]); k += 2; if (!(B & 0x80)) { c = char16_t(B); } else { int n = 1; while (B & (0x80 >> n)) n++; if (n == 1 || n > 4) return Decode_BadUri; uint8_t octets[4]; octets[0] = (uint8_t)B; if (k + 3 * (n - 1) >= length) return Decode_BadUri; for (int j = 1; j < n; j++) { k++; if (chars[k] != '%') return Decode_BadUri; if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2])) return Decode_BadUri; B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]); if ((B & 0xC0) != 0x80) return Decode_BadUri; k += 2; octets[j] = char(B); } uint32_t v = JS::Utf8ToOneUcs4Char(octets, n); if (v >= unicode::NonBMPMin) { if (v > unicode::NonBMPMax) return Decode_BadUri; char16_t H = unicode::LeadSurrogate(v); if (!sb.append(H)) return Decode_Failure; c = unicode::TrailSurrogate(v); } else { c = char16_t(v); } } if (c < 128 && reservedSet && reservedSet[c]) { if (!sb.append(chars + start, k - start + 1)) return Decode_Failure; } else { if (!sb.append(c)) return Decode_Failure; } } else { if (!sb.append(c)) return Decode_Failure; } } return Decode_Success; } static bool Decode(JSContext* cx, HandleLinearString str, const bool* reservedSet, MutableHandleValue rval) { size_t length = str->length(); if (length == 0) { rval.setString(cx->runtime()->emptyString); return true; } StringBuffer sb(cx); DecodeResult res; if (str->hasLatin1Chars()) { AutoCheckCannotGC nogc; res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet); } else { AutoCheckCannotGC nogc; res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet); } if (res == Decode_Failure) return false; if (res == Decode_BadUri) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI); return false; } MOZ_ASSERT(res == Decode_Success); return TransferBufferToString(sb, rval); } static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); RootedLinearString str(cx, ArgToRootedString(cx, args, 0)); if (!str) return false; return Decode(cx, str, js_isUriReservedPlusPound, args.rval()); } static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); RootedLinearString str(cx, ArgToRootedString(cx, args, 0)); if (!str) return false; return Decode(cx, str, nullptr, args.rval()); } static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); RootedLinearString str(cx, ArgToRootedString(cx, args, 0)); if (!str) return false; return Encode(cx, str, js_isUriUnescaped, js_isUriReservedPlusPound, args.rval()); } static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); RootedLinearString str(cx, ArgToRootedString(cx, args, 0)); if (!str) return false; return Encode(cx, str, js_isUriUnescaped, nullptr, args.rval()); } /* * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at * least 4 bytes long. Return the number of UTF-8 bytes of data written. */ uint32_t js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char) { MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax); if (ucs4Char < 0x80) { utf8Buffer[0] = uint8_t(ucs4Char); return 1; } uint32_t a = ucs4Char >> 11; uint32_t utf8Length = 2; while (a) { a >>= 5; utf8Length++; } MOZ_ASSERT(utf8Length <= 4); uint32_t i = utf8Length; while (--i) { utf8Buffer[i] = uint8_t((ucs4Char & 0x3F) | 0x80); ucs4Char >>= 6; } utf8Buffer[0] = uint8_t(0x100 - (1 << (8 - utf8Length)) + ucs4Char); return utf8Length; } size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, JSLinearString* str, uint32_t quote) { size_t len = str->length(); AutoCheckCannotGC nogc; return str->hasLatin1Chars() ? PutEscapedStringImpl(buffer, bufferSize, out, str->latin1Chars(nogc), len, quote) : PutEscapedStringImpl(buffer, bufferSize, out, str->twoByteChars(nogc), len, quote); } template <typename CharT> size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const CharT* chars, size_t length, uint32_t quote) { enum { STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE } state; MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"'); MOZ_ASSERT_IF(!buffer, bufferSize == 0); MOZ_ASSERT_IF(out, !buffer); if (bufferSize == 0) buffer = nullptr; else bufferSize--; const CharT* charsEnd = chars + length; size_t n = 0; state = FIRST_QUOTE; unsigned shift = 0; unsigned hex = 0; unsigned u = 0; char c = 0; /* to quell GCC warnings */ for (;;) { switch (state) { case STOP: goto stop; case FIRST_QUOTE: state = CHARS; goto do_quote; case LAST_QUOTE: state = STOP; do_quote: if (quote == 0) continue; c = (char)quote; break; case CHARS: if (chars == charsEnd) { state = LAST_QUOTE; continue; } u = *chars++; if (u < ' ') { if (u != 0) { const char* escape = strchr(js_EscapeMap, (int)u); if (escape) { u = escape[1]; goto do_escape; } } goto do_hex_escape; } if (u < 127) { if (u == quote || u == '\\') goto do_escape; c = (char)u; } else if (u < 0x100) { goto do_hex_escape; } else { shift = 16; hex = u; u = 'u'; goto do_escape; } break; do_hex_escape: shift = 8; hex = u; u = 'x'; do_escape: c = '\\'; state = ESCAPE_START; break; case ESCAPE_START: MOZ_ASSERT(' ' <= u && u < 127); c = (char)u; state = ESCAPE_MORE; break; case ESCAPE_MORE: if (shift == 0) { state = CHARS; continue; } shift -= 4; u = 0xF & (hex >> shift); c = (char)(u + (u < 10 ? '0' : 'A' - 10)); break; } if (buffer) { MOZ_ASSERT(n <= bufferSize); if (n != bufferSize) { buffer[n] = c; } else { buffer[n] = '\0'; buffer = nullptr; } } else if (out) { if (out->put(&c, 1) < 0) return size_t(-1); } n++; } stop: if (buffer) buffer[n] = '\0'; return n; } template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const Latin1Char* chars, size_t length, uint32_t quote); template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const char* chars, size_t length, uint32_t quote); template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const char16_t* chars, size_t length, uint32_t quote); template size_t js::PutEscapedString(char* buffer, size_t bufferSize, const Latin1Char* chars, size_t length, uint32_t quote); template size_t js::PutEscapedString(char* buffer, size_t bufferSize, const char16_t* chars, size_t length, uint32_t quote); static bool FlatStringMatchHelper(JSContext* cx, HandleString str, HandleString pattern, bool* isFlat, int32_t* match) { RootedLinearString linearPattern(cx, pattern->ensureLinear(cx)); if (!linearPattern) return false; static const size_t MAX_FLAT_PAT_LEN = 256; if (linearPattern->length() > MAX_FLAT_PAT_LEN || StringHasRegExpMetaChars(linearPattern)) { *isFlat = false; return true; } *isFlat = true; if (str->isRope()) { if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) return false; } else { *match = StringMatch(&str->asLinear(), linearPattern); } return true; } static bool BuildFlatMatchArray(JSContext* cx, HandleString str, HandleString pattern, int32_t match, MutableHandleValue rval) { if (match < 0) { rval.setNull(); return true; } /* Get the templateObject that defines the shape and type of the output object */ JSObject* templateObject = cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx); if (!templateObject) return false; RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject)); if (!arr) return false; /* Store a Value for each pair. */ arr->setDenseInitializedLength(1); arr->initDenseElement(0, StringValue(pattern)); /* Set the |index| property. (TemplateObject positions it in slot 0) */ arr->setSlot(0, Int32Value(match)); /* Set the |input| property. (TemplateObject positions it in slot 1) */ arr->setSlot(1, StringValue(str)); #ifdef DEBUG RootedValue test(cx); RootedId id(cx, NameToId(cx->names().index)); if (!NativeGetProperty(cx, arr, id, &test)) return false; MOZ_ASSERT(test == arr->getSlot(0)); id = NameToId(cx->names().input); if (!NativeGetProperty(cx, arr, id, &test)) return false; MOZ_ASSERT(test == arr->getSlot(1)); #endif rval.setObject(*arr); return true; } #ifdef DEBUG static bool CallIsStringOptimizable(JSContext* cx, const char* name, bool* result) { JSAtom* atom = Atomize(cx, name, strlen(name)); if (!atom) return false; RootedPropertyName propName(cx, atom->asPropertyName()); RootedValue funcVal(cx); if (!GlobalObject::getSelfHostedFunction(cx, cx->global(), propName, propName, 0, &funcVal)) return false; FixedInvokeArgs<0> args(cx); RootedValue rval(cx); if (!Call(cx, funcVal, UndefinedHandleValue, args, &rval)) return false; *result = rval.toBoolean(); return true; } #endif bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 2); MOZ_ASSERT(args[0].isString()); MOZ_ASSERT(args[1].isString()); #ifdef DEBUG bool isOptimizable = false; if (!CallIsStringOptimizable(cx, "IsStringMatchOptimizable", &isOptimizable)) return false; MOZ_ASSERT(isOptimizable); #endif RootedString str(cx,args[0].toString()); RootedString pattern(cx, args[1].toString()); bool isFlat = false; int32_t match = 0; if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) return false; if (!isFlat) { args.rval().setUndefined(); return true; } return BuildFlatMatchArray(cx, str, pattern, match, args.rval()); } bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 2); MOZ_ASSERT(args[0].isString()); MOZ_ASSERT(args[1].isString()); #ifdef DEBUG bool isOptimizable = false; if (!CallIsStringOptimizable(cx, "IsStringSearchOptimizable", &isOptimizable)) return false; MOZ_ASSERT(isOptimizable); #endif RootedString str(cx,args[0].toString()); RootedString pattern(cx, args[1].toString()); bool isFlat = false; int32_t match = 0; if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) return false; if (!isFlat) { args.rval().setInt32(-2); return true; } args.rval().setInt32(match); return true; }