diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /xpcom/string | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'xpcom/string')
55 files changed, 11456 insertions, 0 deletions
diff --git a/xpcom/string/README.html b/xpcom/string/README.html new file mode 100644 index 000000000..4a0927c65 --- /dev/null +++ b/xpcom/string/README.html @@ -0,0 +1,11 @@ +<html> +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> +<body> + <h1><span class="LXRSHORTDESC">managing sequences of characters</span></h1> +<p> + <span class="LXRLONGDESC"></span> +</p> +</body> +</html> diff --git a/xpcom/string/crashtests/1113005-frame.html b/xpcom/string/crashtests/1113005-frame.html new file mode 100644 index 000000000..505fc22f1 --- /dev/null +++ b/xpcom/string/crashtests/1113005-frame.html @@ -0,0 +1,5 @@ +<form method=post enctype=multipart/form-data action="data:text/html,"><textarea name='file"; filename="filename.ext + '></textarea> +<script> +document.forms[0].submit(); +</script> diff --git a/xpcom/string/crashtests/1113005.html b/xpcom/string/crashtests/1113005.html new file mode 100644 index 000000000..e377bb637 --- /dev/null +++ b/xpcom/string/crashtests/1113005.html @@ -0,0 +1,2 @@ +<!DOCTYPE html> +<iframe src="1113005-frame.html"></iframe> diff --git a/xpcom/string/crashtests/394275-1.html b/xpcom/string/crashtests/394275-1.html new file mode 100644 index 000000000..4f2afd1a6 --- /dev/null +++ b/xpcom/string/crashtests/394275-1.html @@ -0,0 +1,9 @@ +<html> +<body> +<script> +style = document.createElement("style"); +document.documentElement.appendChild(style); +style.textContent = "tz\uDAB2 "; +</script> +</body> +</html> diff --git a/xpcom/string/crashtests/395651-1.html b/xpcom/string/crashtests/395651-1.html new file mode 100644 index 000000000..3f89a0836 --- /dev/null +++ b/xpcom/string/crashtests/395651-1.html @@ -0,0 +1,31 @@ +<html> +<head> +<script> + +function X() { dump("X\n"); } +function Y() { dump("Y\n"); } + +function boom() +{ + dump("Start9\n"); + + var div = document.getElementById("v"); + + var textNode = document.createTextNode(String.fromCharCode(0xDAAF)); // high surrogate + div.appendChild(textNode); + + document.addEventListener("DOMCharacterDataModified", X, true); + textNode.data += 'B'; + document.removeEventListener("DOMCharacterDataModified", X, true); + + document.addEventListener("DOMAttrModified", Y, true); + textNode.data += String.fromCharCode(0xDF53); // low surrogate + document.removeEventListener("DOMAttrModified", Y, true); +} + +</script> +</head> + +<body onload="boom();"><div id="v"></div></body> + +</html> diff --git a/xpcom/string/crashtests/crashtests.list b/xpcom/string/crashtests/crashtests.list new file mode 100644 index 000000000..8562f1ad8 --- /dev/null +++ b/xpcom/string/crashtests/crashtests.list @@ -0,0 +1,3 @@ +load 394275-1.html +load 395651-1.html +load 1113005.html diff --git a/xpcom/string/moz.build b/xpcom/string/moz.build new file mode 100644 index 000000000..6ad7d7cc8 --- /dev/null +++ b/xpcom/string/moz.build @@ -0,0 +1,61 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files('**'): + BUG_COMPONENT = ('Core', 'String') + +EXPORTS += [ + 'nsAString.h', + 'nsCharTraits.h', + 'nsDependentString.h', + 'nsDependentSubstring.h', + 'nsEmbedString.h', + 'nsLiteralString.h', + 'nsPrintfCString.h', + 'nsPromiseFlatString.h', + 'nsReadableUtils.h', + 'nsString.h', + 'nsStringBuffer.h', + 'nsStringFwd.h', + 'nsStringIterator.h', + 'nsSubstring.h', + 'nsSubstringTuple.h', + 'nsTDependentString.h', + 'nsTDependentSubstring.h', + 'nsTLiteralString.h', + 'nsTPromiseFlatString.h', + 'nsTString.h', + 'nsTSubstring.h', + 'nsTSubstringTuple.h', + 'nsUTF8Utils.h', + 'nsXPCOMStrings.h', + 'nsXPIDLString.h', + 'string-template-def-char.h', + 'string-template-def-unichar.h', + 'string-template-undef.h', +] + +UNIFIED_SOURCES += [ + 'nsDependentString.cpp', + 'nsDependentSubstring.cpp', + 'nsPromiseFlatString.cpp', + 'nsReadableUtils.cpp', + 'nsString.cpp', + 'nsStringComparator.cpp', + 'nsStringObsolete.cpp', + 'nsSubstring.cpp', + 'nsSubstringTuple.cpp', +] + +# Are we targeting x86 or x86-64? If so, compile the SSE2 functions for +# nsUTF8Utils.cpp and nsReadableUtils.cpp. +if CONFIG['INTEL_ARCHITECTURE']: + SOURCES += ['nsUTF8UtilsSSE2.cpp'] + SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS'] + SOURCES += ['nsReadableUtilsSSE2.cpp'] + SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS'] + +FINAL_LIBRARY = 'xul' diff --git a/xpcom/string/nsAString.h b/xpcom/string/nsAString.h new file mode 100644 index 000000000..0cbea0dc7 --- /dev/null +++ b/xpcom/string/nsAString.h @@ -0,0 +1,62 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsAString_h___ +#define nsAString_h___ + +#include "nsStringFwd.h" +#include "nsStringIterator.h" + +#include <string.h> +#include <stdarg.h> + +#define kNotFound -1 + +// declare nsAString +#include "string-template-def-unichar.h" +#include "nsTSubstring.h" +#include "string-template-undef.h" + +// declare nsACString +#include "string-template-def-char.h" +#include "nsTSubstring.h" +#include "string-template-undef.h" + + +/** + * ASCII case-insensitive comparator. (for Unicode case-insensitive + * comparision, see nsUnicharUtils.h) + */ +class nsCaseInsensitiveCStringComparator + : public nsCStringComparator +{ +public: + nsCaseInsensitiveCStringComparator() + { + } + typedef char char_type; + + virtual int operator()(const char_type*, const char_type*, + uint32_t, uint32_t) const override; +}; + +class nsCaseInsensitiveCStringArrayComparator +{ +public: + template<class A, class B> + bool Equals(const A& aStrA, const B& aStrB) const + { + return aStrA.Equals(aStrB, nsCaseInsensitiveCStringComparator()); + } +}; + +// included here for backwards compatibility +#ifndef nsSubstringTuple_h___ +#include "nsSubstringTuple.h" +#endif + +#endif // !defined(nsAString_h___) diff --git a/xpcom/string/nsCharTraits.h b/xpcom/string/nsCharTraits.h new file mode 100644 index 000000000..d93e1f5dc --- /dev/null +++ b/xpcom/string/nsCharTraits.h @@ -0,0 +1,587 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsCharTraits_h___ +#define nsCharTraits_h___ + +#include <ctype.h> // for |EOF|, |WEOF| +#include <string.h> // for |memcpy|, et al + +#include "nscore.h" // for |char16_t| + +// This file may be used (through nsUTF8Utils.h) from non-XPCOM code, in +// particular the standalone software updater. In that case stub out +// the macros provided by nsDebug.h which are only usable when linking XPCOM + +#ifdef NS_NO_XPCOM +#define NS_WARNING(msg) +#define NS_ASSERTION(cond, msg) +#define NS_ERROR(msg) +#else +#include "nsDebug.h" // for NS_ASSERTION +#endif + +/* + * Some macros for converting char16_t (UTF-16) to and from Unicode scalar + * values. + * + * Note that UTF-16 represents all Unicode scalar values up to U+10FFFF by + * using "surrogate pairs". These consist of a high surrogate, i.e. a code + * point in the range U+D800 - U+DBFF, and a low surrogate, i.e. a code point + * in the range U+DC00 - U+DFFF, like this: + * + * U+D800 U+DC00 = U+10000 + * U+D800 U+DC01 = U+10001 + * ... + * U+DBFF U+DFFE = U+10FFFE + * U+DBFF U+DFFF = U+10FFFF + * + * These surrogate code points U+D800 - U+DFFF are not themselves valid Unicode + * scalar values and are not well-formed UTF-16 except as high-surrogate / + * low-surrogate pairs. + */ + +#define PLANE1_BASE uint32_t(0x00010000) +// High surrogates are in the range 0xD800 -- OxDBFF +#define NS_IS_HIGH_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xD800) +// Low surrogates are in the range 0xDC00 -- 0xDFFF +#define NS_IS_LOW_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xDC00) +// Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE +#define IS_SURROGATE(u) ((uint32_t(u) & 0xFFFFF800) == 0xD800) + +// Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF + +// N = (H - 0xD800) * 0x400 + 0x10000 + (L - 0xDC00) +// I wonder whether we could somehow assert that H is a high surrogate +// and L is a low surrogate +#define SURROGATE_TO_UCS4(h, l) (((uint32_t(h) & 0x03FF) << 10) + \ + (uint32_t(l) & 0x03FF) + PLANE1_BASE) + +// Extract surrogates from a UCS4 char +// Reference: the Unicode standard 4.0, section 3.9 +// Since (c - 0x10000) >> 10 == (c >> 10) - 0x0080 and +// 0xD7C0 == 0xD800 - 0x0080, +// ((c - 0x10000) >> 10) + 0xD800 can be simplified to +#define H_SURROGATE(c) char16_t(char16_t(uint32_t(c) >> 10) + \ + char16_t(0xD7C0)) +// where it's to be noted that 0xD7C0 is not bitwise-OR'd +// but added. + +// Since 0x10000 & 0x03FF == 0, +// (c - 0x10000) & 0x03FF == c & 0x03FF so that +// ((c - 0x10000) & 0x03FF) | 0xDC00 is equivalent to +#define L_SURROGATE(c) char16_t(char16_t(uint32_t(c) & uint32_t(0x03FF)) | \ + char16_t(0xDC00)) + +#define IS_IN_BMP(ucs) (uint32_t(ucs) < PLANE1_BASE) +#define UCS2_REPLACEMENT_CHAR char16_t(0xFFFD) + +#define UCS_END uint32_t(0x00110000) +#define IS_VALID_CHAR(c) ((uint32_t(c) < UCS_END) && !IS_SURROGATE(c)) +#define ENSURE_VALID_CHAR(c) (IS_VALID_CHAR(c) ? (c) : UCS2_REPLACEMENT_CHAR) + +template <class CharT> +struct nsCharTraits +{ +}; + +template <> +struct nsCharTraits<char16_t> +{ + typedef char16_t char_type; + typedef uint16_t unsigned_char_type; + typedef char incompatible_char_type; + + static char_type* const sEmptyBuffer; + + static void + assign(char_type& aLhs, char_type aRhs) + { + aLhs = aRhs; + } + + + // integer representation of characters: + typedef int int_type; + + static char_type + to_char_type(int_type aChar) + { + return char_type(aChar); + } + + static int_type + to_int_type(char_type aChar) + { + return int_type(static_cast<unsigned_char_type>(aChar)); + } + + static bool + eq_int_type(int_type aLhs, int_type aRhs) + { + return aLhs == aRhs; + } + + + // |char_type| comparisons: + + static bool + eq(char_type aLhs, char_type aRhs) + { + return aLhs == aRhs; + } + + static bool + lt(char_type aLhs, char_type aRhs) + { + return aLhs < aRhs; + } + + + // operations on s[n] arrays: + + static char_type* + move(char_type* aStr1, const char_type* aStr2, size_t aN) + { + return static_cast<char_type*>(memmove(aStr1, aStr2, + aN * sizeof(char_type))); + } + + static char_type* + copy(char_type* aStr1, const char_type* aStr2, size_t aN) + { + return static_cast<char_type*>(memcpy(aStr1, aStr2, + aN * sizeof(char_type))); + } + + static char_type* + copyASCII(char_type* aStr1, const char* aStr2, size_t aN) + { + for (char_type* s = aStr1; aN--; ++s, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + *s = static_cast<char_type>(*aStr2); + } + return aStr1; + } + + static char_type* + assign(char_type* aStr, size_t aN, char_type aChar) + { + char_type* result = aStr; + while (aN--) { + assign(*aStr++, aChar); + } + return result; + } + + static int + compare(const char_type* aStr1, const char_type* aStr2, size_t aN) + { + for (; aN--; ++aStr1, ++aStr2) { + if (!eq(*aStr1, *aStr2)) { + return to_int_type(*aStr1) - to_int_type(*aStr2); + } + } + + return 0; + } + + static int + compareASCII(const char_type* aStr1, const char* aStr2, size_t aN) + { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (!eq_int_type(to_int_type(*aStr1), + to_int_type(static_cast<char_type>(*aStr2)))) { + return to_int_type(*aStr1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + return 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int + compareASCIINullTerminated(const char_type* aStr1, size_t aN, + const char* aStr2) + { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (!eq_int_type(to_int_type(*aStr1), + to_int_type(static_cast<char_type>(*aStr2)))) { + return to_int_type(*aStr1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + /** + * Convert c to its lower-case form, but only if c is in the ASCII + * range. Otherwise leave it alone. + */ + static char_type + ASCIIToLower(char_type aChar) + { + if (aChar >= 'A' && aChar <= 'Z') { + return char_type(aChar + ('a' - 'A')); + } + + return aChar; + } + + static int + compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, size_t aN) + { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != static_cast<char_type>(*aStr2)) { + return to_int_type(lower_s1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + return 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int + compareLowerCaseToASCIINullTerminated(const char_type* aStr1, + size_t aN, const char* aStr2) + { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != static_cast<char_type>(*aStr2)) { + return to_int_type(lower_s1) - + to_int_type(static_cast<char_type>(*aStr2)); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + static size_t + length(const char_type* aStr) + { + size_t result = 0; + while (!eq(*aStr++, char_type(0))) { + ++result; + } + return result; + } + + static const char_type* + find(const char_type* aStr, size_t aN, char_type aChar) + { + while (aN--) { + if (eq(*aStr, aChar)) { + return aStr; + } + ++aStr; + } + + return 0; + } +}; + +template <> +struct nsCharTraits<char> +{ + typedef char char_type; + typedef unsigned char unsigned_char_type; + typedef char16_t incompatible_char_type; + + static char_type* const sEmptyBuffer; + + static void + assign(char_type& aLhs, char_type aRhs) + { + aLhs = aRhs; + } + + + // integer representation of characters: + + typedef int int_type; + + static char_type + to_char_type(int_type aChar) + { + return char_type(aChar); + } + + static int_type + to_int_type(char_type aChar) + { + return int_type(static_cast<unsigned_char_type>(aChar)); + } + + static bool + eq_int_type(int_type aLhs, int_type aRhs) + { + return aLhs == aRhs; + } + + + // |char_type| comparisons: + + static bool eq(char_type aLhs, char_type aRhs) + { + return aLhs == aRhs; + } + + static bool + lt(char_type aLhs, char_type aRhs) + { + return aLhs < aRhs; + } + + + // operations on s[n] arrays: + + static char_type* + move(char_type* aStr1, const char_type* aStr2, size_t aN) + { + return static_cast<char_type*>(memmove(aStr1, aStr2, + aN * sizeof(char_type))); + } + + static char_type* + copy(char_type* aStr1, const char_type* aStr2, size_t aN) + { + return static_cast<char_type*>(memcpy(aStr1, aStr2, + aN * sizeof(char_type))); + } + + static char_type* + copyASCII(char_type* aStr1, const char* aStr2, size_t aN) + { + return copy(aStr1, aStr2, aN); + } + + static char_type* + assign(char_type* aStr, size_t aN, char_type aChar) + { + return static_cast<char_type*>(memset(aStr, to_int_type(aChar), aN)); + } + + static int + compare(const char_type* aStr1, const char_type* aStr2, size_t aN) + { + return memcmp(aStr1, aStr2, aN); + } + + static int + compareASCII(const char_type* aStr1, const char* aStr2, size_t aN) + { +#ifdef DEBUG + for (size_t i = 0; i < aN; ++i) { + NS_ASSERTION(!(aStr2[i] & ~0x7F), "Unexpected non-ASCII character"); + } +#endif + return compare(aStr1, aStr2, aN); + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int + compareASCIINullTerminated(const char_type* aStr1, size_t aN, + const char* aStr2) + { + // can't use strcmp here because we don't want to stop when aStr1 + // contains a null + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + if (*aStr1 != *aStr2) { + return to_int_type(*aStr1) - to_int_type(*aStr2); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + /** + * Convert c to its lower-case form, but only if c is ASCII. + */ + static char_type + ASCIIToLower(char_type aChar) + { + if (aChar >= 'A' && aChar <= 'Z') { + return char_type(aChar + ('a' - 'A')); + } + + return aChar; + } + + static int + compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, size_t aN) + { + for (; aN--; ++aStr1, ++aStr2) { + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != *aStr2) { + return to_int_type(lower_s1) - to_int_type(*aStr2); + } + } + return 0; + } + + // this version assumes that s2 is null-terminated and s1 has length n. + // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, + // we return 1. + static int + compareLowerCaseToASCIINullTerminated(const char_type* aStr1, size_t aN, + const char* aStr2) + { + for (; aN--; ++aStr1, ++aStr2) { + if (!*aStr2) { + return 1; + } + NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character"); + NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'), + "Unexpected uppercase character"); + char_type lower_s1 = ASCIIToLower(*aStr1); + if (lower_s1 != *aStr2) { + return to_int_type(lower_s1) - to_int_type(*aStr2); + } + } + + if (*aStr2) { + return -1; + } + + return 0; + } + + static size_t + length(const char_type* aStr) + { + return strlen(aStr); + } + + static const char_type* + find(const char_type* aStr, size_t aN, char_type aChar) + { + return reinterpret_cast<const char_type*>(memchr(aStr, to_int_type(aChar), + aN)); + } +}; + +template <class InputIterator> +struct nsCharSourceTraits +{ + typedef typename InputIterator::difference_type difference_type; + + static uint32_t + readable_distance(const InputIterator& aFirst, const InputIterator& aLast) + { + // assumes single fragment + return uint32_t(aLast.get() - aFirst.get()); + } + + static const typename InputIterator::value_type* + read(const InputIterator& aIter) + { + return aIter.get(); + } + + static void + advance(InputIterator& aStr, difference_type aN) + { + aStr.advance(aN); + } +}; + +template <class CharT> +struct nsCharSourceTraits<CharT*> +{ + typedef ptrdiff_t difference_type; + + static uint32_t + readable_distance(CharT* aStr) + { + return uint32_t(nsCharTraits<CharT>::length(aStr)); + // return numeric_limits<uint32_t>::max(); + } + + static uint32_t + readable_distance(CharT* aFirst, CharT* aLast) + { + return uint32_t(aLast - aFirst); + } + + static const CharT* + read(CharT* aStr) + { + return aStr; + } + + static void + advance(CharT*& aStr, difference_type aN) + { + aStr += aN; + } +}; + +template <class OutputIterator> +struct nsCharSinkTraits +{ + static void + write(OutputIterator& aIter, const typename OutputIterator::value_type* aStr, + uint32_t aN) + { + aIter.write(aStr, aN); + } +}; + +template <class CharT> +struct nsCharSinkTraits<CharT*> +{ + static void + write(CharT*& aIter, const CharT* aStr, uint32_t aN) + { + nsCharTraits<CharT>::move(aIter, aStr, aN); + aIter += aN; + } +}; + +#endif // !defined(nsCharTraits_h___) diff --git a/xpcom/string/nsDependentString.cpp b/xpcom/string/nsDependentString.cpp new file mode 100644 index 000000000..52240d17b --- /dev/null +++ b/xpcom/string/nsDependentString.cpp @@ -0,0 +1,18 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsDependentString.h" +#include "nsAlgorithm.h" + +// define nsDependentString +#include "string-template-def-unichar.h" +#include "nsTDependentString.cpp" +#include "string-template-undef.h" + +// define nsDependentCString +#include "string-template-def-char.h" +#include "nsTDependentString.cpp" +#include "string-template-undef.h" diff --git a/xpcom/string/nsDependentString.h b/xpcom/string/nsDependentString.h new file mode 100644 index 000000000..20b5997ef --- /dev/null +++ b/xpcom/string/nsDependentString.h @@ -0,0 +1,23 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDependentString_h___ +#define nsDependentString_h___ + +#include "nsString.h" +#include "nsDebug.h" + +// declare nsDependentString +#include "string-template-def-unichar.h" +#include "nsTDependentString.h" +#include "string-template-undef.h" + +// declare nsDependentCString +#include "string-template-def-char.h" +#include "nsTDependentString.h" +#include "string-template-undef.h" + +#endif /* !defined(nsDependentString_h___) */ diff --git a/xpcom/string/nsDependentSubstring.cpp b/xpcom/string/nsDependentSubstring.cpp new file mode 100644 index 000000000..721cf8f6a --- /dev/null +++ b/xpcom/string/nsDependentSubstring.cpp @@ -0,0 +1,18 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsDependentSubstring.h" +#include "nsAlgorithm.h" + +// define nsDependentSubstring +#include "string-template-def-unichar.h" +#include "nsTDependentSubstring.cpp" +#include "string-template-undef.h" + +// define nsDependentCSubstring +#include "string-template-def-char.h" +#include "nsTDependentSubstring.cpp" +#include "string-template-undef.h" diff --git a/xpcom/string/nsDependentSubstring.h b/xpcom/string/nsDependentSubstring.h new file mode 100644 index 000000000..078b8ab54 --- /dev/null +++ b/xpcom/string/nsDependentSubstring.h @@ -0,0 +1,22 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDependentSubstring_h___ +#define nsDependentSubstring_h___ + +#include "nsSubstring.h" + +// declare nsDependentSubstring +#include "string-template-def-unichar.h" +#include "nsTDependentSubstring.h" +#include "string-template-undef.h" + +// declare nsDependentCSubstring +#include "string-template-def-char.h" +#include "nsTDependentSubstring.h" +#include "string-template-undef.h" + +#endif /* !defined(nsDependentSubstring_h___) */ diff --git a/xpcom/string/nsEmbedString.h b/xpcom/string/nsEmbedString.h new file mode 100644 index 000000000..caedd50cd --- /dev/null +++ b/xpcom/string/nsEmbedString.h @@ -0,0 +1,18 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsEmbedString_h___ +#define nsEmbedString_h___ + +#include "nsStringAPI.h" + +/** + * compatibility + */ +typedef nsString nsEmbedString; +typedef nsCString nsEmbedCString; + +#endif diff --git a/xpcom/string/nsLiteralString.h b/xpcom/string/nsLiteralString.h new file mode 100644 index 000000000..4a0b39107 --- /dev/null +++ b/xpcom/string/nsLiteralString.h @@ -0,0 +1,37 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsLiteralString_h___ +#define nsLiteralString_h___ + +#include "nscore.h" +#include "nsString.h" + +// declare nsLiteralString +#include "string-template-def-unichar.h" +#include "nsTLiteralString.h" +#include "string-template-undef.h" + +// declare nsLiteralCString +#include "string-template-def-char.h" +#include "nsTLiteralString.h" +#include "string-template-undef.h" + +#include "mozilla/Char16.h" + +#define NS_MULTILINE_LITERAL_STRING(s) static_cast<const nsLiteralString&>(nsLiteralString(s)) +#define NS_MULTILINE_LITERAL_STRING_INIT(n,s) n(s) +#define NS_NAMED_MULTILINE_LITERAL_STRING(n,s) const nsLiteralString n(s) + +#define NS_LITERAL_STRING(s) static_cast<const nsLiteralString&>(nsLiteralString(u"" s)) +#define NS_LITERAL_STRING_INIT(n,s) n(u"" s) +#define NS_NAMED_LITERAL_STRING(n,s) const nsLiteralString n(u"" s) + +#define NS_LITERAL_CSTRING(s) static_cast<const nsLiteralCString&>(nsLiteralCString("" s)) +#define NS_LITERAL_CSTRING_INIT(n,s) n("" s) +#define NS_NAMED_LITERAL_CSTRING(n,s) const nsLiteralCString n("" s) + +#endif /* !defined(nsLiteralString_h___) */ diff --git a/xpcom/string/nsPrintfCString.h b/xpcom/string/nsPrintfCString.h new file mode 100644 index 000000000..ce90ec497 --- /dev/null +++ b/xpcom/string/nsPrintfCString.h @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsPrintfCString_h___ +#define nsPrintfCString_h___ + +#include "nsString.h" + +/** + * nsPrintfCString lets you create a nsCString using a printf-style format + * string. For example: + * + * NS_WARNING(nsPrintfCString("Unexpected value: %f", 13.917).get()); + * + * nsPrintfCString has a small built-in auto-buffer. For larger strings, it + * will allocate on the heap. + * + * See also nsCString::AppendPrintf(). + */ +class nsPrintfCString : public nsFixedCString +{ + typedef nsCString string_type; + +public: + explicit nsPrintfCString(const char_type* aFormat, ...) + : nsFixedCString(mLocalBuffer, kLocalBufferSize, 0) + { + va_list ap; + va_start(ap, aFormat); + AppendPrintf(aFormat, ap); + va_end(ap); + } + +private: + static const uint32_t kLocalBufferSize = 16; + char_type mLocalBuffer[kLocalBufferSize]; +}; + +#endif // !defined(nsPrintfCString_h___) diff --git a/xpcom/string/nsPromiseFlatString.cpp b/xpcom/string/nsPromiseFlatString.cpp new file mode 100644 index 000000000..e66d2ef9f --- /dev/null +++ b/xpcom/string/nsPromiseFlatString.cpp @@ -0,0 +1,17 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsPromiseFlatString.h" + +// define nsPromiseFlatString +#include "string-template-def-unichar.h" +#include "nsTPromiseFlatString.cpp" +#include "string-template-undef.h" + +// define nsPromiseFlatCString +#include "string-template-def-char.h" +#include "nsTPromiseFlatString.cpp" +#include "string-template-undef.h" diff --git a/xpcom/string/nsPromiseFlatString.h b/xpcom/string/nsPromiseFlatString.h new file mode 100644 index 000000000..a025bfd26 --- /dev/null +++ b/xpcom/string/nsPromiseFlatString.h @@ -0,0 +1,22 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsPromiseFlatString_h___ +#define nsPromiseFlatString_h___ + +#include "nsString.h" + +// declare nsPromiseFlatString +#include "string-template-def-unichar.h" +#include "nsTPromiseFlatString.h" +#include "string-template-undef.h" + +// declare nsPromiseFlatCString +#include "string-template-def-char.h" +#include "nsTPromiseFlatString.h" +#include "string-template-undef.h" + +#endif /* !defined(nsPromiseFlatString_h___) */ diff --git a/xpcom/string/nsReadableUtils.cpp b/xpcom/string/nsReadableUtils.cpp new file mode 100644 index 000000000..524b1d7fe --- /dev/null +++ b/xpcom/string/nsReadableUtils.cpp @@ -0,0 +1,1383 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsReadableUtils.h" +#include "nsReadableUtilsImpl.h" + +#include <algorithm> + +#include "mozilla/CheckedInt.h" + +#include "nscore.h" +#include "nsMemory.h" +#include "nsString.h" +#include "nsTArray.h" +#include "nsUTF8Utils.h" + +using mozilla::IsASCII; + +/** + * Fallback implementation for finding the first non-ASCII character in a + * UTF-16 string. + */ +static inline int32_t +FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd) +{ + typedef mozilla::NonASCIIParameters<sizeof(size_t)> p; + const size_t kMask = p::mask(); + const uintptr_t kAlignMask = p::alignMask(); + const size_t kNumUnicharsPerWord = p::numUnicharsPerWord(); + + const char16_t* idx = aBegin; + + // Align ourselves to a word boundary. + for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) { + if (!IsASCII(*idx)) { + return idx - aBegin; + } + } + + // Check one word at a time. + const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask); + for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) { + const size_t word = *reinterpret_cast<const size_t*>(idx); + if (word & kMask) { + return idx - aBegin; + } + } + + // Take care of the remainder one character at a time. + for (; idx != aEnd; idx++) { + if (!IsASCII(*idx)) { + return idx - aBegin; + } + } + + return -1; +} + +/* + * This function returns -1 if all characters in str are ASCII characters. + * Otherwise, it returns a value less than or equal to the index of the first + * ASCII character in str. For example, if first non-ASCII character is at + * position 25, it may return 25, 24, or 16. But it guarantees + * there are only ASCII characters before returned value. + */ +static inline int32_t +FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd) +{ +#ifdef MOZILLA_MAY_SUPPORT_SSE2 + if (mozilla::supports_sse2()) { + return mozilla::SSE2::FirstNonASCII(aBegin, aEnd); + } +#endif + + return FirstNonASCIIUnvectorized(aBegin, aEnd); +} + +void +LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest) +{ + aDest.Truncate(); + LossyAppendUTF16toASCII(aSource, aDest); +} + +void +CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest) +{ + aDest.Truncate(); + AppendASCIItoUTF16(aSource, aDest); +} + +void +LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest) +{ + aDest.Truncate(); + if (aSource) { + LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); + } +} + +void +CopyASCIItoUTF16(const char* aSource, nsAString& aDest) +{ + aDest.Truncate(); + if (aSource) { + AppendASCIItoUTF16(nsDependentCString(aSource), aDest); + } +} + +void +CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest) +{ + if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) { + // Note that this may wildly underestimate the allocation that failed, as + // we report the length of aSource as UTF-16 instead of UTF-8. + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest, + const mozilla::fallible_t& aFallible) +{ + aDest.Truncate(); + if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) { + return false; + } + return true; +} + +void +CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest) +{ + aDest.Truncate(); + AppendUTF8toUTF16(aSource, aDest); +} + +void +CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest) +{ + aDest.Truncate(); + AppendUTF16toUTF8(aSource, aDest); +} + +void +CopyUTF8toUTF16(const char* aSource, nsAString& aDest) +{ + aDest.Truncate(); + AppendUTF8toUTF16(aSource, aDest); +} + +void +LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest) +{ + uint32_t old_dest_length = aDest.Length(); + aDest.SetLength(old_dest_length + aSource.Length()); + + nsAString::const_iterator fromBegin, fromEnd; + + nsACString::iterator dest; + aDest.BeginWriting(dest); + + dest.advance(old_dest_length); + + // right now, this won't work on multi-fragment destinations + LossyConvertEncoding16to8 converter(dest.get()); + + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); +} + +void +AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest) +{ + if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest, + const mozilla::fallible_t& aFallible) +{ + uint32_t old_dest_length = aDest.Length(); + if (!aDest.SetLength(old_dest_length + aSource.Length(), + aFallible)) { + return false; + } + + nsACString::const_iterator fromBegin, fromEnd; + + nsAString::iterator dest; + aDest.BeginWriting(dest); + + dest.advance(old_dest_length); + + // right now, this won't work on multi-fragment destinations + LossyConvertEncoding8to16 converter(dest.get()); + + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); + return true; +} + +void +LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest) +{ + if (aSource) { + LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); + } +} + +bool +AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible) +{ + if (aSource) { + return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible); + } + + return true; +} + +void +AppendASCIItoUTF16(const char* aSource, nsAString& aDest) +{ + if (aSource) { + AppendASCIItoUTF16(nsDependentCString(aSource), aDest); + } +} + +void +AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest) +{ + if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) { + // Note that this may wildly underestimate the allocation that failed, as + // we report the length of aSource as UTF-16 instead of UTF-8. + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest, + const mozilla::fallible_t& aFallible) +{ + // At 16 characters analysis showed better performance of both the all ASCII + // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of + // that length. + const nsAString::size_type kFastPathMinLength = 16; + + int32_t firstNonASCII = 0; + if (aSource.Length() >= kFastPathMinLength) { + firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading()); + } + + if (firstNonASCII == -1) { + // This is all ASCII, we can use the more efficient lossy append. + mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length()); + new_length += aDest.Length(); + + if (!new_length.isValid() || + !aDest.SetCapacity(new_length.value(), aFallible)) { + return false; + } + + LossyAppendUTF16toASCII(aSource, aDest); + return true; + } + + nsAString::const_iterator source_start, source_end; + CalculateUTF8Size calculator; + aSource.BeginReading(source_start); + aSource.EndReading(source_end); + + // Skip the characters that we know are single byte. + source_start.advance(firstNonASCII); + + copy_string(source_start, + source_end, calculator); + + // Include the ASCII characters that were skipped in the count. + size_t count = calculator.Size() + firstNonASCII; + + if (count) { + auto old_dest_length = aDest.Length(); + // Grow the buffer if we need to. + mozilla::CheckedInt<nsACString::size_type> new_length(count); + new_length += old_dest_length; + + if (!new_length.isValid() || + !aDest.SetLength(new_length.value(), aFallible)) { + return false; + } + + // All ready? Time to convert + + nsAString::const_iterator ascii_end; + aSource.BeginReading(ascii_end); + + if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) { + // Use the more efficient lossy converter for the ASCII portion. + LossyConvertEncoding16to8 lossy_converter( + aDest.BeginWriting() + old_dest_length); + nsAString::const_iterator ascii_start; + aSource.BeginReading(ascii_start); + ascii_end.advance(firstNonASCII); + + copy_string(ascii_start, ascii_end, lossy_converter); + } else { + // Not using the lossy shortcut, we need to include the leading ASCII + // chars. + firstNonASCII = 0; + } + + ConvertUTF16toUTF8 converter( + aDest.BeginWriting() + old_dest_length + firstNonASCII); + copy_string(ascii_end, + aSource.EndReading(source_end), converter); + + NS_ASSERTION(converter.Size() == count - firstNonASCII, + "Unexpected disparity between CalculateUTF8Size and " + "ConvertUTF16toUTF8"); + } + + return true; +} + +void +AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest) +{ + if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest, + const mozilla::fallible_t& aFallible) +{ + nsACString::const_iterator source_start, source_end; + CalculateUTF8Length calculator; + copy_string(aSource.BeginReading(source_start), + aSource.EndReading(source_end), calculator); + + uint32_t count = calculator.Length(); + + // Avoid making the string mutable if we're appending an empty string + if (count) { + uint32_t old_dest_length = aDest.Length(); + + // Grow the buffer if we need to. + if (!aDest.SetLength(old_dest_length + count, aFallible)) { + return false; + } + + // All ready? Time to convert + + ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length); + copy_string(aSource.BeginReading(source_start), + aSource.EndReading(source_end), converter); + + NS_ASSERTION(converter.ErrorEncountered() || + converter.Length() == count, + "CalculateUTF8Length produced the wrong length"); + + if (converter.ErrorEncountered()) { + NS_ERROR("Input wasn't UTF8 or incorrect length was calculated"); + aDest.SetLength(old_dest_length); + } + } + + return true; +} + +void +AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest) +{ + if (aSource) { + AppendUTF16toUTF8(nsDependentString(aSource), aDest); + } +} + +void +AppendUTF8toUTF16(const char* aSource, nsAString& aDest) +{ + if (aSource) { + AppendUTF8toUTF16(nsDependentCString(aSource), aDest); + } +} + + +/** + * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator). + * + * @param aSource an string you will eventually be making a copy of + * @return a new buffer (of the type specified by the second parameter) which you must free with |free|. + * + */ +template <class FromStringT, class ToCharT> +inline +ToCharT* +AllocateStringCopy(const FromStringT& aSource, ToCharT*) +{ + return static_cast<ToCharT*>(moz_xmalloc( + (aSource.Length() + 1) * sizeof(ToCharT))); +} + + +char* +ToNewCString(const nsAString& aSource) +{ + char* result = AllocateStringCopy(aSource, (char*)0); + if (!result) { + return nullptr; + } + + nsAString::const_iterator fromBegin, fromEnd; + LossyConvertEncoding16to8 converter(result); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter).write_terminator(); + return result; +} + +char* +ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) +{ + nsAString::const_iterator start, end; + CalculateUTF8Size calculator; + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + calculator); + + if (aUTF8Count) { + *aUTF8Count = calculator.Size(); + } + + char* result = static_cast<char*> + (moz_xmalloc(calculator.Size() + 1)); + if (!result) { + return nullptr; + } + + ConvertUTF16toUTF8 converter(result); + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + converter).write_terminator(); + NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); + + return result; +} + +char* +ToNewCString(const nsACString& aSource) +{ + // no conversion needed, just allocate a buffer of the correct length and copy into it + + char* result = AllocateStringCopy(aSource, (char*)0); + if (!result) { + return nullptr; + } + + nsACString::const_iterator fromBegin, fromEnd; + char* toBegin = result; + *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + toBegin) = char(0); + return result; +} + +char16_t* +ToNewUnicode(const nsAString& aSource) +{ + // no conversion needed, just allocate a buffer of the correct length and copy into it + + char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); + if (!result) { + return nullptr; + } + + nsAString::const_iterator fromBegin, fromEnd; + char16_t* toBegin = result; + *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + toBegin) = char16_t(0); + return result; +} + +char16_t* +ToNewUnicode(const nsACString& aSource) +{ + char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); + if (!result) { + return nullptr; + } + + nsACString::const_iterator fromBegin, fromEnd; + LossyConvertEncoding8to16 converter(result); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter).write_terminator(); + return result; +} + +uint32_t +CalcUTF8ToUnicodeLength(const nsACString& aSource) +{ + nsACString::const_iterator start, end; + CalculateUTF8Length calculator; + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + calculator); + return calculator.Length(); +} + +char16_t* +UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer, + uint32_t* aUTF16Count) +{ + nsACString::const_iterator start, end; + ConvertUTF8toUTF16 converter(aBuffer); + copy_string(aSource.BeginReading(start), + aSource.EndReading(end), + converter).write_terminator(); + if (aUTF16Count) { + *aUTF16Count = converter.Length(); + } + return aBuffer; +} + +char16_t* +UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) +{ + const uint32_t length = CalcUTF8ToUnicodeLength(aSource); + const size_t buffer_size = (length + 1) * sizeof(char16_t); + char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size)); + if (!buffer) { + return nullptr; + } + + uint32_t copied; + UTF8ToUnicodeBuffer(aSource, buffer, &copied); + NS_ASSERTION(length == copied, "length mismatch"); + + if (aUTF16Count) { + *aUTF16Count = copied; + } + return buffer; +} + +char16_t* +CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, + uint32_t aLength) +{ + nsAString::const_iterator fromBegin, fromEnd; + char16_t* toBegin = aDest; + copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)), + aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)), + toBegin); + return aDest; +} + +void +CopyUnicodeTo(const nsAString::const_iterator& aSrcStart, + const nsAString::const_iterator& aSrcEnd, + nsAString& aDest) +{ + aDest.SetLength(Distance(aSrcStart, aSrcEnd)); + + nsAString::char_iterator dest = aDest.BeginWriting(); + nsAString::const_iterator fromBegin(aSrcStart); + + copy_string(fromBegin, aSrcEnd, dest); +} + +void +AppendUnicodeTo(const nsAString::const_iterator& aSrcStart, + const nsAString::const_iterator& aSrcEnd, + nsAString& aDest) +{ + uint32_t oldLength = aDest.Length(); + aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); + + nsAString::char_iterator dest = aDest.BeginWriting() + oldLength; + nsAString::const_iterator fromBegin(aSrcStart); + + copy_string(fromBegin, aSrcEnd, dest); +} + +bool +IsASCII(const nsAString& aString) +{ + static const char16_t NOT_ASCII = char16_t(~0x007F); + + + // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character + + nsAString::const_iterator iter, done_reading; + aString.BeginReading(iter); + aString.EndReading(done_reading); + + const char16_t* c = iter.get(); + const char16_t* end = done_reading.get(); + + while (c < end) { + if (*c++ & NOT_ASCII) { + return false; + } + } + + return true; +} + +bool +IsASCII(const nsACString& aString) +{ + static const char NOT_ASCII = char(~0x7F); + + + // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character + + nsACString::const_iterator iter, done_reading; + aString.BeginReading(iter); + aString.EndReading(done_reading); + + const char* c = iter.get(); + const char* end = done_reading.get(); + + while (c < end) { + if (*c++ & NOT_ASCII) { + return false; + } + } + + return true; +} + +bool +IsUTF8(const nsACString& aString, bool aRejectNonChar) +{ + nsReadingIterator<char> done_reading; + aString.EndReading(done_reading); + + int32_t state = 0; + bool overlong = false; + bool surrogate = false; + bool nonchar = false; + uint16_t olupper = 0; // overlong byte upper bound. + uint16_t slower = 0; // surrogate byte lower bound. + + nsReadingIterator<char> iter; + aString.BeginReading(iter); + + const char* ptr = iter.get(); + const char* end = done_reading.get(); + while (ptr < end) { + uint8_t c; + + if (0 == state) { + c = *ptr++; + + if (UTF8traits::isASCII(c)) { + continue; + } + + if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong. + return false; + } else if (UTF8traits::is2byte(c)) { + state = 1; + } else if (UTF8traits::is3byte(c)) { + state = 2; + if (c == 0xE0) { // to exclude E0[80-9F][80-BF] + overlong = true; + olupper = 0x9F; + } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint + surrogate = true; + slower = 0xA0; + } else if (c == 0xEF) { // EF BF [BE-BF] : non-character + nonchar = true; + } + } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090) + state = 3; + nonchar = true; + if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2} + overlong = true; + olupper = 0x8F; + } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF] + // actually not surrogates but codepoints beyond 0x10FFFF + surrogate = true; + slower = 0x90; + } + } else { + return false; // Not UTF-8 string + } + } + + if (nonchar && !aRejectNonChar) { + nonchar = false; + } + + while (ptr < end && state) { + c = *ptr++; + --state; + + // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] + if (nonchar && + ((!state && c < 0xBE) || + (state == 1 && c != 0xBF) || + (state == 2 && 0x0F != (0x0F & c)))) { + nonchar = false; + } + + if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) || + (surrogate && slower <= c) || (nonchar && !state)) { + return false; // Not UTF-8 string + } + + overlong = surrogate = false; + } + } + return !state; // state != 0 at the end indicates an invalid UTF-8 seq. +} + +/** + * A character sink for in-place case conversion. + */ +class ConvertToUpperCase +{ +public: + typedef char value_type; + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + char* cp = const_cast<char*>(aSource); + const char* end = aSource + aSourceLength; + while (cp != end) { + char ch = *cp; + if (ch >= 'a' && ch <= 'z') { + *cp = ch - ('a' - 'A'); + } + ++cp; + } + return aSourceLength; + } +}; + +void +ToUpperCase(nsCSubstring& aCString) +{ + ConvertToUpperCase converter; + char* start; + converter.write(aCString.BeginWriting(start), aCString.Length()); +} + +/** + * A character sink for copying with case conversion. + */ +class CopyToUpperCase +{ +public: + typedef char value_type; + + explicit CopyToUpperCase(nsACString::iterator& aDestIter, + const nsACString::iterator& aEndIter) + : mIter(aDestIter) + , mEnd(aEndIter) + { + } + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength); + char* cp = mIter.get(); + const char* end = aSource + len; + while (aSource != end) { + char ch = *aSource; + if ((ch >= 'a') && (ch <= 'z')) { + *cp = ch - ('a' - 'A'); + } else { + *cp = ch; + } + ++aSource; + ++cp; + } + mIter.advance(len); + return len; + } + +protected: + nsACString::iterator& mIter; + const nsACString::iterator& mEnd; +}; + +void +ToUpperCase(const nsACString& aSource, nsACString& aDest) +{ + nsACString::const_iterator fromBegin, fromEnd; + nsACString::iterator toBegin, toEnd; + aDest.SetLength(aSource.Length()); + + CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd)); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); +} + +/** + * A character sink for case conversion. + */ +class ConvertToLowerCase +{ +public: + typedef char value_type; + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + char* cp = const_cast<char*>(aSource); + const char* end = aSource + aSourceLength; + while (cp != end) { + char ch = *cp; + if ((ch >= 'A') && (ch <= 'Z')) { + *cp = ch + ('a' - 'A'); + } + ++cp; + } + return aSourceLength; + } +}; + +void +ToLowerCase(nsCSubstring& aCString) +{ + ConvertToLowerCase converter; + char* start; + converter.write(aCString.BeginWriting(start), aCString.Length()); +} + +/** + * A character sink for copying with case conversion. + */ +class CopyToLowerCase +{ +public: + typedef char value_type; + + explicit CopyToLowerCase(nsACString::iterator& aDestIter, + const nsACString::iterator& aEndIter) + : mIter(aDestIter) + , mEnd(aEndIter) + { + } + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength); + char* cp = mIter.get(); + const char* end = aSource + len; + while (aSource != end) { + char ch = *aSource; + if ((ch >= 'A') && (ch <= 'Z')) { + *cp = ch + ('a' - 'A'); + } else { + *cp = ch; + } + ++aSource; + ++cp; + } + mIter.advance(len); + return len; + } + +protected: + nsACString::iterator& mIter; + const nsACString::iterator& mEnd; +}; + +void +ToLowerCase(const nsACString& aSource, nsACString& aDest) +{ + nsACString::const_iterator fromBegin, fromEnd; + nsACString::iterator toBegin, toEnd; + aDest.SetLength(aSource.Length()); + + CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd)); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); +} + +bool +ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray) +{ + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + + uint32_t oldLength = aArray.Length(); + + for (;;) { + nsACString::const_iterator delimiter = start; + FindCharInReadable(aDelimiter, delimiter, end); + + if (delimiter != start) { + if (!aArray.AppendElement(Substring(start, delimiter))) { + aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength); + return false; + } + } + + if (delimiter == end) { + break; + } + start = ++delimiter; + if (start == end) { + break; + } + } + + return true; +} + +template <class StringT, class IteratorT, class Comparator> +bool +FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart, + IteratorT& aSearchEnd, const Comparator& aCompare) +{ + bool found_it = false; + + // only bother searching at all if we're given a non-empty range to search + if (aSearchStart != aSearchEnd) { + IteratorT aPatternStart, aPatternEnd; + aPattern.BeginReading(aPatternStart); + aPattern.EndReading(aPatternEnd); + + // outer loop keeps searching till we find it or run out of string to search + while (!found_it) { + // fast inner loop (that's what it's called, not what it is) looks for a potential match + while (aSearchStart != aSearchEnd && + aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) { + ++aSearchStart; + } + + // if we broke out of the `fast' loop because we're out of string ... we're done: no match + if (aSearchStart == aSearchEnd) { + break; + } + + // otherwise, we're at a potential match, let's see if we really hit one + IteratorT testPattern(aPatternStart); + IteratorT testSearch(aSearchStart); + + // slow inner loop verifies the potential match (found by the `fast' loop) at the current position + for (;;) { + // we already compared the first character in the outer loop, + // so we'll advance before the next comparison + ++testPattern; + ++testSearch; + + // if we verified all the way to the end of the pattern, then we found it! + if (testPattern == aPatternEnd) { + found_it = true; + aSearchEnd = testSearch; // return the exact found range through the parameters + break; + } + + // if we got to end of the string we're searching before we hit the end of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchEnd) { + aSearchStart = aSearchEnd; + break; + } + + // else if we mismatched ... it's time to advance to the next search position + // and get back into the `fast' loop + if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) { + ++aSearchStart; + break; + } + } + } + } + + return found_it; +} + +/** + * This searches the entire string from right to left, and returns the first match found, if any. + */ +template <class StringT, class IteratorT, class Comparator> +bool +RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart, + IteratorT& aSearchEnd, const Comparator& aCompare) +{ + IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; + aPattern.BeginReading(patternStart); + aPattern.EndReading(patternEnd); + + // Point to the last character in the pattern + --patternEnd; + // outer loop keeps searching till we run out of string to search + while (aSearchStart != searchEnd) { + // Point to the end position of the next possible match + --searchEnd; + + // Check last character, if a match, explore further from here + if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) { + // We're at a potential match, let's see if we really hit one + IteratorT testPattern(patternEnd); + IteratorT testSearch(searchEnd); + + // inner loop verifies the potential match at the current position + do { + // if we verified all the way to the end of the pattern, then we found it! + if (testPattern == patternStart) { + aSearchStart = testSearch; // point to start of match + aSearchEnd = ++searchEnd; // point to end of match + return true; + } + + // if we got to end of the string we're searching before we hit the end of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchStart) { + aSearchStart = aSearchEnd; + return false; + } + + // test previous character for a match + --testPattern; + --testSearch; + } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0); + } + } + + aSearchStart = aSearchEnd; + return false; +} + +bool +FindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + const nsStringComparator& aComparator) +{ + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +FindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + const nsCStringComparator& aComparator) +{ + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd) +{ + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, + nsCaseInsensitiveCStringComparator()); +} + +bool +RFindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + const nsStringComparator& aComparator) +{ + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +RFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + const nsCStringComparator& aComparator) +{ + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd) +{ + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char16_t* charFoundAt = + nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool +FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd) +{ + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char* charFoundAt = + nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +uint32_t +CountCharInReadable(const nsAString& aStr, char16_t aChar) +{ + uint32_t count = 0; + nsAString::const_iterator begin, end; + + aStr.BeginReading(begin); + aStr.EndReading(end); + + while (begin != end) { + if (*begin == aChar) { + ++count; + } + ++begin; + } + + return count; +} + +uint32_t +CountCharInReadable(const nsACString& aStr, char aChar) +{ + uint32_t count = 0; + nsACString::const_iterator begin, end; + + aStr.BeginReading(begin); + aStr.EndReading(end); + + while (begin != end) { + if (*begin == aChar) { + ++count; + } + ++begin; + } + + return count; +} + +bool +StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool +StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + const nsStringComparator& aComparator) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool +StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool +StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + const nsCStringComparator& aComparator) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool +StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool +StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + const nsStringComparator& aComparator) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, + aComparator); +} + +bool +StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool +StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + const nsCStringComparator& aComparator) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, + aComparator); +} + + + +static const char16_t empty_buffer[1] = { '\0' }; + +const nsAFlatString& +EmptyString() +{ + static const nsDependentString sEmpty(empty_buffer); + + return sEmpty; +} + +const nsAFlatCString& +EmptyCString() +{ + static const nsDependentCString sEmpty((const char*)empty_buffer); + + return sEmpty; +} + +const nsAFlatString& +NullString() +{ + static const nsXPIDLString sNull; + + return sNull; +} + +const nsAFlatCString& +NullCString() +{ + static const nsXPIDLCString sNull; + + return sNull; +} + +int32_t +CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, + const nsASingleFragmentString& aUTF16String) +{ + static const uint32_t NOT_ASCII = uint32_t(~0x7F); + + const char* u8; + const char* u8end; + aUTF8String.BeginReading(u8); + aUTF8String.EndReading(u8end); + + const char16_t* u16; + const char16_t* u16end; + aUTF16String.BeginReading(u16); + aUTF16String.EndReading(u16end); + + while (u8 != u8end && u16 != u16end) { + // Cast away the signedness of *u8 to prevent signextension when + // converting to uint32_t + uint32_t c8_32 = (uint8_t)*u8; + + if (c8_32 & NOT_ASCII) { + bool err; + c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); + if (err) { + return INT32_MIN; + } + + uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); + // The above UTF16CharEnumerator::NextChar() calls can + // fail, but if it does for anything other than no data to + // look at (which can't happen here), it returns the + // Unicode replacement character 0xFFFD for the invalid + // data they were fed. Ignore that error and treat invalid + // UTF16 as 0xFFFD. + // + // This matches what our UTF16 to UTF8 conversion code + // does, and thus a UTF8 string that came from an invalid + // UTF16 string will compare equal to the invalid UTF16 + // string it came from. Same is true for any other UTF16 + // string differs only in the invalid part of the string. + + if (c8_32 != c16_32) { + return c8_32 < c16_32 ? -1 : 1; + } + } else { + if (c8_32 != *u16) { + return c8_32 > *u16 ? 1 : -1; + } + + ++u8; + ++u16; + } + } + + if (u8 != u8end) { + // We get to the end of the UTF16 string, but no to the end of + // the UTF8 string. The UTF8 string is longer than the UTF16 + // string + + return 1; + } + + if (u16 != u16end) { + // We get to the end of the UTF8 string, but no to the end of + // the UTF16 string. The UTF16 string is longer than the UTF8 + // string + + return -1; + } + + // The two strings match. + + return 0; +} + +void +AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) +{ + NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); + if (IS_IN_BMP(aSource)) { + aDest.Append(char16_t(aSource)); + } else { + aDest.Append(H_SURROGATE(aSource)); + aDest.Append(L_SURROGATE(aSource)); + } +} + +extern "C" { + +void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther) +{ + AppendUTF16toUTF8(*aOther, *aThis); +} + +void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther) +{ + AppendUTF8toUTF16(*aOther, *aThis); +} + +} diff --git a/xpcom/string/nsReadableUtils.h b/xpcom/string/nsReadableUtils.h new file mode 100644 index 000000000..24824d927 --- /dev/null +++ b/xpcom/string/nsReadableUtils.h @@ -0,0 +1,428 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#ifndef nsReadableUtils_h___ +#define nsReadableUtils_h___ + +/** + * I guess all the routines in this file are all mis-named. + * According to our conventions, they should be |NS_xxx|. + */ + +#include "mozilla/Assertions.h" +#include "nsAString.h" + +#include "nsTArrayForwardDeclare.h" + +inline size_t +Distance(const nsReadingIterator<char16_t>& aStart, + const nsReadingIterator<char16_t>& aEnd) +{ + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} +inline size_t +Distance(const nsReadingIterator<char>& aStart, + const nsReadingIterator<char>& aEnd) +{ + MOZ_ASSERT(aStart.get() <= aEnd.get()); + return static_cast<size_t>(aEnd.get() - aStart.get()); +} + +void LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest); +void CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest); + +void LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest); +void CopyASCIItoUTF16(const char* aSource, nsAString& aDest); + +void CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest); +MOZ_MUST_USE bool CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest, + const mozilla::fallible_t&); +void CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest); + +void CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest); +void CopyUTF8toUTF16(const char* aSource, nsAString& aDest); + +void LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest); +void AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest); +MOZ_MUST_USE bool AppendASCIItoUTF16(const nsACString& aSource, + nsAString& aDest, + const mozilla::fallible_t&); + +void LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest); +MOZ_MUST_USE bool AppendASCIItoUTF16(const char* aSource, + nsAString& aDest, + const mozilla::fallible_t&); +void AppendASCIItoUTF16(const char* aSource, nsAString& aDest); + +void AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest); +MOZ_MUST_USE bool AppendUTF16toUTF8(const nsAString& aSource, + nsACString& aDest, + const mozilla::fallible_t&); +void AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest); +MOZ_MUST_USE bool AppendUTF8toUTF16(const nsACString& aSource, + nsAString& aDest, + const mozilla::fallible_t&); + +void AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest); +void AppendUTF8toUTF16(const char* aSource, nsAString& aDest); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Allocates and returns a new |char| buffer which you must free with |free|. + * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer. + * This conversion is not well defined; but it reproduces legacy string behavior. + * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. + * + * @param aSource a 16-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsAString& aSource); + + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Allocates and returns a new |char| buffer which you must free with |free|. + * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. + * + * @param aSource an 8-bit wide string + * @return a new |char| buffer you must free with |free|. + */ +char* ToNewCString(const nsACString& aSource); + +/** + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. + * + * Allocates and returns a new |char| buffer which you must free with + * |free|. + * Performs an encoding conversion from a UTF-16 string to a UTF-8 string + * copying |aSource| to your new buffer. + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string (made of char16_t's) + * @param aUTF8Count the number of 8-bit units that was returned + * @return a new |char| buffer you must free with |free|. + */ + +char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr); + + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy of + * |aSource|. + * + * Allocates and returns a new |char16_t| buffer which you must free with + * |free|. + * The new buffer is zero-terminated, but that may not help you if |aSource| + * contains embedded nulls. + * + * @param aSource a UTF-16 string + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsAString& aSource); + + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|. + * + * Allocates and returns a new |char16_t| buffer which you must free with |free|. + * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer. + * This conversion is not well defined; but it reproduces legacy string behavior. + * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. + * + * @param aSource an 8-bit wide string (a C-string, NOT UTF-8) + * @return a new |char16_t| buffer you must free with |free|. + */ +char16_t* ToNewUnicode(const nsACString& aSource); + +/** + * Returns the required length for a char16_t buffer holding + * a copy of aSource, using UTF-8 to UTF-16 conversion. + * The length does NOT include any space for zero-termination. + * + * @param aSource an 8-bit wide string, UTF-8 encoded + * @return length of UTF-16 encoded string copy, not zero-terminated + */ +uint32_t CalcUTF8ToUnicodeLength(const nsACString& aSource); + +/** + * Copies the source string into the specified buffer, converting UTF-8 to + * UTF-16 in the process. The conversion is well defined for valid UTF-8 + * strings. + * The copied string will be zero-terminated! Any embedded nulls will be + * copied nonetheless. It is the caller's responsiblity to ensure the buffer + * is large enough to hold the string copy plus one char16_t for + * zero-termination! + * + * @see CalcUTF8ToUnicodeLength( const nsACString& ) + * @see UTF8ToNewUnicode( const nsACString&, uint32_t* ) + * + * @param aSource an 8-bit wide string, UTF-8 encoded + * @param aBuffer the buffer holding the converted string copy + * @param aUTF16Count receiving optionally the number of 16-bit units that + * were copied + * @return aBuffer pointer, for convenience + */ +char16_t* UTF8ToUnicodeBuffer(const nsACString& aSource, + char16_t* aBuffer, + uint32_t* aUTF16Count = nullptr); + +/** + * Returns a new |char16_t| buffer containing a zero-terminated copy + * of |aSource|. + * + * Allocates and returns a new |char| buffer which you must free with + * |free|. Performs an encoding conversion from UTF-8 to UTF-16 + * while copying |aSource| to your new buffer. This conversion is well defined + * for a valid UTF-8 string. The new buffer is zero-terminated, but that + * may not help you if |aSource| contains embedded nulls. + * + * @param aSource an 8-bit wide string, UTF-8 encoded + * @param aUTF16Count the number of 16-bit units that was returned + * @return a new |char16_t| buffer you must free with |free|. + * (UTF-16 encoded) + */ +char16_t* UTF8ToNewUnicode(const nsACString& aSource, + uint32_t* aUTF16Count = nullptr); + +/** + * Copies |aLength| 16-bit code units from the start of |aSource| to the + * |char16_t| buffer |aDest|. + * + * After this operation |aDest| is not null terminated. + * + * @param aSource a UTF-16 string + * @param aSrcOffset start offset in the source string + * @param aDest a |char16_t| buffer + * @param aLength the number of 16-bit code units to copy + * @return pointer to destination buffer - identical to |aDest| + */ +char16_t* CopyUnicodeTo(const nsAString& aSource, + uint32_t aSrcOffset, + char16_t* aDest, + uint32_t aLength); + + +/** + * Copies 16-bit characters between iterators |aSrcStart| and + * |aSrcEnd| to the writable string |aDest|. Similar to the + * |nsString::Mid| method. + * + * After this operation |aDest| is not null terminated. + * + * @param aSrcStart start source iterator + * @param aSrcEnd end source iterator + * @param aDest destination for the copy + */ +void CopyUnicodeTo(const nsAString::const_iterator& aSrcStart, + const nsAString::const_iterator& aSrcEnd, + nsAString& aDest); + +/** + * Appends 16-bit characters between iterators |aSrcStart| and + * |aSrcEnd| to the writable string |aDest|. + * + * After this operation |aDest| is not null terminated. + * + * @param aSrcStart start source iterator + * @param aSrcEnd end source iterator + * @param aDest destination for the copy + */ +void AppendUnicodeTo(const nsAString::const_iterator& aSrcStart, + const nsAString::const_iterator& aSrcEnd, + nsAString& aDest); + +/** + * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). + * + * @param aString a 16-bit wide string to scan + */ +bool IsASCII(const nsAString& aString); + +/** + * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). + * + * @param aString a 8-bit wide string to scan + */ +bool IsASCII(const nsACString& aString); + +/** + * Returns |true| if |aString| is a valid UTF-8 string. + * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator. + * It is mainly written to replace and roughly equivalent to + * + * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str))) + * + * (see bug 191541) + * As such, it does not check for non-UTF-8 7bit encodings such as + * ISO-2022-JP and HZ. + * + * It rejects sequences with the following errors: + * + * byte sequences that cannot be decoded into characters according to + * UTF-8's rules (including cases where the input is part of a valid + * UTF-8 sequence but starts or ends mid-character) + * overlong sequences (i.e., cases where a character was encoded + * non-canonically by using more bytes than necessary) + * surrogate codepoints (i.e., the codepoints reserved for + representing astral characters in UTF-16) + * codepoints above the unicode range (i.e., outside the first 17 + * planes; higher than U+10FFFF), in accordance with + * http://tools.ietf.org/html/rfc3629 + * when aRejectNonChar is true (the default), any codepoint whose low + * 16 bits are 0xFFFE or 0xFFFF + + * + * @param aString an 8-bit wide string to scan + * @param aRejectNonChar a boolean to control the rejection of utf-8 + * non characters + */ +bool IsUTF8(const nsACString& aString, bool aRejectNonChar = true); + +bool ParseString(const nsACString& aAstring, char aDelimiter, + nsTArray<nsCString>& aArray); + +/** + * Converts case in place in the argument string. + */ +void ToUpperCase(nsACString&); + +void ToLowerCase(nsACString&); + +void ToUpperCase(nsCSubstring&); + +void ToLowerCase(nsCSubstring&); + +/** + * Converts case from string aSource to aDest. + */ +void ToUpperCase(const nsACString& aSource, nsACString& aDest); + +void ToLowerCase(const nsACString& aSource, nsACString& aDest); + +/** + * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|. + * + * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to + * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. + * + * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|. + * If we need something faster, then we can implement that later. + */ + +bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + const nsStringComparator& = nsDefaultStringComparator()); +bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + const nsCStringComparator& = nsDefaultCStringComparator()); + +/* sometimes we don't care about where the string was, just that we + * found it or not */ +inline bool +FindInReadable(const nsAString& aPattern, const nsAString& aSource, + const nsStringComparator& aCompare = nsDefaultStringComparator()) +{ + nsAString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + +inline bool +FindInReadable(const nsACString& aPattern, const nsACString& aSource, + const nsCStringComparator& aCompare = nsDefaultCStringComparator()) +{ + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + return FindInReadable(aPattern, start, end, aCompare); +} + + +bool CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator&, + nsACString::const_iterator&); + +/** + * Finds the rightmost occurrence of |aPattern| + * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to + * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. + * + */ +bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&, + nsAString::const_iterator&, + const nsStringComparator& = nsDefaultStringComparator()); +bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&, + nsACString::const_iterator&, + const nsCStringComparator& = nsDefaultCStringComparator()); + +/** +* Finds the leftmost occurrence of |aChar|, if any in the range +* |aSearchStart|..|aSearchEnd|. +* +* Returns |true| if a match was found, and adjusts |aSearchStart| to +* point to the match. If no match was found, returns |false| and +* makes |aSearchStart == aSearchEnd|. +*/ +bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd); +bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd); + +/** +* Finds the number of occurences of |aChar| in the string |aStr| +*/ +uint32_t CountCharInReadable(const nsAString& aStr, + char16_t aChar); +uint32_t CountCharInReadable(const nsACString& aStr, + char aChar); + +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + const nsStringComparator& aComparator); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + const nsCStringComparator& aComparator); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring); +bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + const nsStringComparator& aComparator); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring); +bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + const nsCStringComparator& aComparator); + +const nsAFlatString& EmptyString(); +const nsAFlatCString& EmptyCString(); + +const nsAFlatString& NullString(); +const nsAFlatCString& NullCString(); + +/** +* Compare a UTF-8 string to an UTF-16 string. +* +* Returns 0 if the strings are equal, -1 if aUTF8String is less +* than aUTF16Count, and 1 in the reverse case. In case of fatal +* error (eg the strings are not valid UTF8 and UTF16 respectively), +* this method will return INT32_MIN. +*/ +int32_t CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, + const nsASingleFragmentString& aUTF16String); + +void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); + +template<class T> +inline bool +EnsureStringLength(T& aStr, uint32_t aLen) +{ + aStr.SetLength(aLen); + return (aStr.Length() == aLen); +} + +#endif // !defined(nsReadableUtils_h___) diff --git a/xpcom/string/nsReadableUtilsImpl.h b/xpcom/string/nsReadableUtilsImpl.h new file mode 100644 index 000000000..ff1497b51 --- /dev/null +++ b/xpcom/string/nsReadableUtilsImpl.h @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stdint.h> + +namespace mozilla { + +inline bool IsASCII(char16_t aChar) { + return (aChar & 0xFF80) == 0; +} + +/** + * Provides a pointer before or equal to |aPtr| that is is suitably aligned. + */ +inline const char16_t* aligned(const char16_t* aPtr, const uintptr_t aMask) +{ + return reinterpret_cast<const char16_t*>( + reinterpret_cast<const uintptr_t>(aPtr) & ~aMask); +} + +/** + * Structures for word-sized vectorization of ASCII checking for UTF-16 + * strings. + */ +template<size_t size> struct NonASCIIParameters; +template<> struct NonASCIIParameters<4> { + static inline size_t mask() { return 0xff80ff80; } + static inline uintptr_t alignMask() { return 0x3; } + static inline size_t numUnicharsPerWord() { return 2; } +}; + +template<> struct NonASCIIParameters<8> { + static inline size_t mask() { + static const uint64_t maskAsUint64 = UINT64_C(0xff80ff80ff80ff80); + // We have to explicitly cast this 64-bit value to a size_t, or else + // compilers for 32-bit platforms will warn about it being too large to fit + // in the size_t return type. (Fortunately, this code isn't actually + // invoked on 32-bit platforms -- they'll use the <4> specialization above. + // So it is, in fact, OK that this value is too large for a 32-bit size_t.) + return (size_t)maskAsUint64; + } + static inline uintptr_t alignMask() { return 0x7; } + static inline size_t numUnicharsPerWord() { return 4; } +}; + +namespace SSE2 { + +int32_t FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd); + +} // namespace SSE2 +} // namespace mozilla diff --git a/xpcom/string/nsReadableUtilsSSE2.cpp b/xpcom/string/nsReadableUtilsSSE2.cpp new file mode 100644 index 000000000..fe01d57af --- /dev/null +++ b/xpcom/string/nsReadableUtilsSSE2.cpp @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <emmintrin.h> + +#include "nsReadableUtilsImpl.h" + +namespace mozilla { +namespace SSE2 { + +static inline bool +is_zero (__m128i x) +{ + return + _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff; +} + +int32_t +FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd) +{ + const size_t kNumUnicharsPerVector = sizeof(__m128i) / sizeof(char16_t); + typedef NonASCIIParameters<sizeof(size_t)> p; + const size_t kMask = p::mask(); + const uintptr_t kXmmAlignMask = 0xf; + const uint16_t kShortMask = 0xff80; + const size_t kNumUnicharsPerWord = p::numUnicharsPerWord(); + + const char16_t* idx = aBegin; + + // Align ourselves to a 16-byte boundary as required by _mm_load_si128 + for (; idx != aEnd && ((uintptr_t(idx) & kXmmAlignMask) != 0); idx++) { + if (!IsASCII(*idx)) { + return idx - aBegin; + } + } + + // Check one XMM register (16 bytes) at a time. + const char16_t* vectWalkEnd = aligned(aEnd, kXmmAlignMask); + __m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(kShortMask)); + for (; idx != vectWalkEnd; idx += kNumUnicharsPerVector) { + const __m128i vect = *reinterpret_cast<const __m128i*>(idx); + if (!is_zero(_mm_and_si128(vect, vectmask))) { + return idx - aBegin; + } + } + + // Check one word at a time. + const char16_t* wordWalkEnd = aligned(aEnd, p::alignMask()); + for(; idx != wordWalkEnd; idx += kNumUnicharsPerWord) { + const size_t word = *reinterpret_cast<const size_t*>(idx); + if (word & kMask) { + return idx - aBegin; + } + } + + // Take care of the remainder one character at a time. + for (; idx != aEnd; idx++) { + if (!IsASCII(*idx)) { + return idx - aBegin; + } + } + + return -1; +} + +} // namespace SSE2 +} // namespace mozilla diff --git a/xpcom/string/nsString.cpp b/xpcom/string/nsString.cpp new file mode 100644 index 000000000..2759eb4ca --- /dev/null +++ b/xpcom/string/nsString.cpp @@ -0,0 +1,17 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" + +// define nsString +#include "string-template-def-unichar.h" +#include "nsTString.cpp" +#include "string-template-undef.h" + +// define nsCString +#include "string-template-def-char.h" +#include "nsTString.cpp" +#include "string-template-undef.h" diff --git a/xpcom/string/nsString.h b/xpcom/string/nsString.h new file mode 100644 index 000000000..580e4113d --- /dev/null +++ b/xpcom/string/nsString.h @@ -0,0 +1,209 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsString_h___ +#define nsString_h___ + +#include "mozilla/Attributes.h" + +#include "nsSubstring.h" +#include "nsDependentSubstring.h" +#include "nsReadableUtils.h" + +#include <new> + +// enable support for the obsolete string API if not explicitly disabled +#ifndef MOZ_STRING_WITH_OBSOLETE_API +#define MOZ_STRING_WITH_OBSOLETE_API 1 +#endif + +#if MOZ_STRING_WITH_OBSOLETE_API +// radix values for ToInteger/AppendInt +#define kRadix10 (10) +#define kRadix16 (16) +#define kAutoDetect (100) +#define kRadixUnknown (kAutoDetect+1) +#define IGNORE_CASE (true) +#endif + + +// declare nsString, et. al. +#include "string-template-def-unichar.h" +#include "nsTString.h" +#include "string-template-undef.h" + +// declare nsCString, et. al. +#include "string-template-def-char.h" +#include "nsTString.h" +#include "string-template-undef.h" + +static_assert(sizeof(char16_t) == 2, "size of char16_t must be 2"); +static_assert(sizeof(nsString::char_type) == 2, + "size of nsString::char_type must be 2"); +static_assert(nsString::char_type(-1) > nsString::char_type(0), + "nsString::char_type must be unsigned"); +static_assert(sizeof(nsCString::char_type) == 1, + "size of nsCString::char_type must be 1"); + + +/** + * A helper class that converts a UTF-16 string to ASCII in a lossy manner + */ +class NS_LossyConvertUTF16toASCII : public nsAutoCString +{ +public: + explicit NS_LossyConvertUTF16toASCII(const char16ptr_t aString) + { + LossyAppendUTF16toASCII(aString, *this); + } + + NS_LossyConvertUTF16toASCII(const char16ptr_t aString, uint32_t aLength) + { + LossyAppendUTF16toASCII(Substring(aString, aLength), *this); + } + + explicit NS_LossyConvertUTF16toASCII(const nsAString& aString) + { + LossyAppendUTF16toASCII(aString, *this); + } + +private: + // NOT TO BE IMPLEMENTED + NS_LossyConvertUTF16toASCII(char) = delete; +}; + + +class NS_ConvertASCIItoUTF16 : public nsAutoString +{ +public: + explicit NS_ConvertASCIItoUTF16(const char* aCString) + { + AppendASCIItoUTF16(aCString, *this); + } + + NS_ConvertASCIItoUTF16(const char* aCString, uint32_t aLength) + { + AppendASCIItoUTF16(Substring(aCString, aLength), *this); + } + + explicit NS_ConvertASCIItoUTF16(const nsACString& aCString) + { + AppendASCIItoUTF16(aCString, *this); + } + +private: + // NOT TO BE IMPLEMENTED + NS_ConvertASCIItoUTF16(char16_t) = delete; +}; + + +/** + * A helper class that converts a UTF-16 string to UTF-8 + */ +class NS_ConvertUTF16toUTF8 : public nsAutoCString +{ +public: + explicit NS_ConvertUTF16toUTF8(const char16ptr_t aString) + { + AppendUTF16toUTF8(aString, *this); + } + + NS_ConvertUTF16toUTF8(const char16ptr_t aString, uint32_t aLength) + { + AppendUTF16toUTF8(Substring(aString, aLength), *this); + } + + explicit NS_ConvertUTF16toUTF8(const nsAString& aString) + { + AppendUTF16toUTF8(aString, *this); + } + +private: + // NOT TO BE IMPLEMENTED + NS_ConvertUTF16toUTF8(char) = delete; +}; + + +class NS_ConvertUTF8toUTF16 : public nsAutoString +{ +public: + explicit NS_ConvertUTF8toUTF16(const char* aCString) + { + AppendUTF8toUTF16(aCString, *this); + } + + NS_ConvertUTF8toUTF16(const char* aCString, uint32_t aLength) + { + AppendUTF8toUTF16(Substring(aCString, aLength), *this); + } + + explicit NS_ConvertUTF8toUTF16(const nsACString& aCString) + { + AppendUTF8toUTF16(aCString, *this); + } + +private: + // NOT TO BE IMPLEMENTED + NS_ConvertUTF8toUTF16(char16_t) = delete; +}; + + +#ifdef MOZ_USE_CHAR16_WRAPPER + +inline char16_t* +wwc(wchar_t* aStr) +{ + return reinterpret_cast<char16_t*>(aStr); +} + +inline wchar_t* +wwc(char16_t* aStr) +{ + return reinterpret_cast<wchar_t*>(aStr); +} + +inline const char16_t* +wwc(const wchar_t* aStr) +{ + return reinterpret_cast<const char16_t*>(aStr); +} + +inline const wchar_t* +wwc(const char16_t* aStr) +{ + return reinterpret_cast<const wchar_t*>(aStr); +} + +#else + +inline char16_t* +wwc(char16_t* aStr) +{ + return aStr; +} + +inline const char16_t* +wwc(const char16_t* aStr) +{ + return aStr; +} + +#endif + +// the following are included/declared for backwards compatibility +typedef nsAutoString nsVoidableString; + +#include "nsDependentString.h" +#include "nsLiteralString.h" +#include "nsPromiseFlatString.h" + +// need to include these for backwards compatibility +#include "nsMemory.h" +#include <string.h> +#include <stdio.h> +#include "plhash.h" + +#endif // !defined(nsString_h___) diff --git a/xpcom/string/nsStringBuffer.h b/xpcom/string/nsStringBuffer.h new file mode 100644 index 000000000..432289bf6 --- /dev/null +++ b/xpcom/string/nsStringBuffer.h @@ -0,0 +1,160 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringBuffer_h__ +#define nsStringBuffer_h__ + +#include "mozilla/Atomics.h" +#include "mozilla/MemoryReporting.h" + +template<class T> struct already_AddRefed; + +/** + * This structure precedes the string buffers "we" allocate. It may be the + * case that nsTAString::mData does not point to one of these special + * buffers. The mFlags member variable distinguishes the buffer type. + * + * When this header is in use, it enables reference counting, and capacity + * tracking. NOTE: A string buffer can be modified only if its reference + * count is 1. + */ +class nsStringBuffer +{ +private: + friend class CheckStaticAtomSizes; + + mozilla::Atomic<int32_t> mRefCount; + uint32_t mStorageSize; + +public: + + /** + * Allocates a new string buffer, with given size in bytes and a + * reference count of one. When the string buffer is no longer needed, + * it should be released via Release. + * + * It is up to the caller to set the bytes corresponding to the string + * buffer by calling the Data method to fetch the raw data pointer. Care + * must be taken to properly null terminate the character array. The + * storage size can be greater than the length of the actual string + * (i.e., it is not required that the null terminator appear in the last + * storage unit of the string buffer's data). + * + * @return new string buffer or null if out of memory. + */ + static already_AddRefed<nsStringBuffer> Alloc(size_t aStorageSize); + + /** + * Resizes the given string buffer to the specified storage size. This + * method must not be called on a readonly string buffer. Use this API + * carefully!! + * + * This method behaves like the ANSI-C realloc function. (i.e., If the + * allocation fails, null will be returned and the given string buffer + * will remain unmodified.) + * + * @see IsReadonly + */ + static nsStringBuffer* Realloc(nsStringBuffer* aBuf, size_t aStorageSize); + + /** + * Increment the reference count on this string buffer. + */ + void NS_FASTCALL AddRef(); + + /** + * Decrement the reference count on this string buffer. The string + * buffer will be destroyed when its reference count reaches zero. + */ + void NS_FASTCALL Release(); + + /** + * This method returns the string buffer corresponding to the given data + * pointer. The data pointer must have been returned previously by a + * call to the nsStringBuffer::Data method. + */ + static nsStringBuffer* FromData(void* aData) + { + return reinterpret_cast<nsStringBuffer*>(aData) - 1; + } + + /** + * This method returns the data pointer for this string buffer. + */ + void* Data() const + { + return const_cast<char*>(reinterpret_cast<const char*>(this + 1)); + } + + /** + * This function returns the storage size of a string buffer in bytes. + * This value is the same value that was originally passed to Alloc (or + * Realloc). + */ + uint32_t StorageSize() const + { + return mStorageSize; + } + + /** + * If this method returns false, then the caller can be sure that their + * reference to the string buffer is the only reference to the string + * buffer, and therefore it has exclusive access to the string buffer and + * associated data. However, if this function returns true, then other + * consumers may rely on the data in this buffer being immutable and + * other threads may access this buffer simultaneously. + */ + bool IsReadonly() const + { + return mRefCount > 1; + } + + /** + * The FromString methods return a string buffer for the given string + * object or null if the string object does not have a string buffer. + * The reference count of the string buffer is NOT incremented by these + * methods. If the caller wishes to hold onto the returned value, then + * the returned string buffer must have its reference count incremented + * via a call to the AddRef method. + */ + static nsStringBuffer* FromString(const nsAString& aStr); + static nsStringBuffer* FromString(const nsACString& aStr); + + /** + * The ToString methods assign this string buffer to a given string + * object. If the string object does not support sharable string + * buffers, then its value will be set to a copy of the given string + * buffer. Otherwise, these methods increment the reference count of the + * given string buffer. It is important to specify the length (in + * storage units) of the string contained in the string buffer since the + * length of the string may be less than its storage size. The string + * must have a null terminator at the offset specified by |len|. + * + * NOTE: storage size is measured in bytes even for wide strings; + * however, string length is always measured in storage units + * (2-byte units for wide strings). + */ + void ToString(uint32_t aLen, nsAString& aStr, bool aMoveOwnership = false); + void ToString(uint32_t aLen, nsACString& aStr, bool aMoveOwnership = false); + + /** + * This measures the size only if the StringBuffer is unshared. + */ + size_t SizeOfIncludingThisIfUnshared(mozilla::MallocSizeOf aMallocSizeOf) const; + + /** + * This measures the size regardless of whether the StringBuffer is + * unshared. + * + * WARNING: Only use this if you really know what you are doing, because + * it can easily lead to double-counting strings. If you do use them, + * please explain clearly in a comment why it's safe and won't lead to + * double-counting. + */ + size_t SizeOfIncludingThisEvenIfShared(mozilla::MallocSizeOf aMallocSizeOf) const; +}; + +#endif /* !defined(nsStringBuffer_h__ */ diff --git a/xpcom/string/nsStringComparator.cpp b/xpcom/string/nsStringComparator.cpp new file mode 100644 index 000000000..81f1629f8 --- /dev/null +++ b/xpcom/string/nsStringComparator.cpp @@ -0,0 +1,39 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ctype.h> +#include "nsAString.h" +#include "plstr.h" + + +// define nsStringComparator +#include "string-template-def-unichar.h" +#include "nsTStringComparator.cpp" +#include "string-template-undef.h" + +// define nsCStringComparator +#include "string-template-def-char.h" +#include "nsTStringComparator.cpp" +#include "string-template-undef.h" + + +int +nsCaseInsensitiveCStringComparator::operator()(const char_type* aLhs, + const char_type* aRhs, + uint32_t aLhsLength, + uint32_t aRhsLength) const +{ + if (aLhsLength != aRhsLength) { + return (aLhsLength > aRhsLength) ? 1 : -1; + } + int32_t result = int32_t(PL_strncasecmp(aLhs, aRhs, aLhsLength)); + //Egads. PL_strncasecmp is returning *very* negative numbers. + //Some folks expect -1,0,1, so let's temper its enthusiasm. + if (result < 0) { + result = -1; + } + return result; +} diff --git a/xpcom/string/nsStringFwd.h b/xpcom/string/nsStringFwd.h new file mode 100644 index 000000000..a9162f384 --- /dev/null +++ b/xpcom/string/nsStringFwd.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* nsStringFwd.h --- forward declarations for string classes */ + +#ifndef nsStringFwd_h___ +#define nsStringFwd_h___ + +#include "nscore.h" + +#ifndef MOZILLA_INTERNAL_API +#error Internal string headers are not available from external-linkage code. +#endif + +/** + * double-byte (char16_t) string types + */ + +class nsAString; +class nsSubstringTuple; +class nsString; +class nsAutoString; +class nsDependentString; +class nsDependentSubstring; +class nsPromiseFlatString; +class nsStringComparator; +class nsDefaultStringComparator; +class nsXPIDLString; + + +/** + * single-byte (char) string types + */ + +class nsACString; +class nsCSubstringTuple; +class nsCString; +class nsAutoCString; +class nsDependentCString; +class nsDependentCSubstring; +class nsPromiseFlatCString; +class nsCStringComparator; +class nsDefaultCStringComparator; +class nsXPIDLCString; + + +/** + * typedefs for backwards compatibility + */ + +typedef nsAString nsSubstring; +typedef nsACString nsCSubstring; + +typedef nsString nsAFlatString; +typedef nsSubstring nsASingleFragmentString; + +typedef nsCString nsAFlatCString; +typedef nsCSubstring nsASingleFragmentCString; + + +#endif /* !defined(nsStringFwd_h___) */ diff --git a/xpcom/string/nsStringIterator.h b/xpcom/string/nsStringIterator.h new file mode 100644 index 000000000..e309a21e9 --- /dev/null +++ b/xpcom/string/nsStringIterator.h @@ -0,0 +1,268 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsStringIterator_h___ +#define nsStringIterator_h___ + +#include "nsCharTraits.h" +#include "nsAlgorithm.h" +#include "nsDebug.h" + +/** + * @see nsTAString + */ + +template <class CharT> +class nsReadingIterator +{ +public: + typedef nsReadingIterator<CharT> self_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef CharT value_type; + typedef const CharT* pointer; + typedef const CharT& reference; + +private: + friend class nsAString; + friend class nsACString; + + // unfortunately, the API for nsReadingIterator requires that the + // iterator know its start and end positions. this was needed when + // we supported multi-fragment strings, but now it is really just + // extra baggage. we should remove mStart and mEnd at some point. + + const CharT* mStart; + const CharT* mEnd; + const CharT* mPosition; + +public: + nsReadingIterator() + { + } + // nsReadingIterator( const nsReadingIterator<CharT>& ); // auto-generated copy-constructor OK + // nsReadingIterator<CharT>& operator=( const nsReadingIterator<CharT>& ); // auto-generated copy-assignment operator OK + + pointer get() const + { + return mPosition; + } + + CharT operator*() const + { + return *get(); + } + + self_type& operator++() + { + ++mPosition; + return *this; + } + + self_type operator++(int) + { + self_type result(*this); + ++mPosition; + return result; + } + + self_type& operator--() + { + --mPosition; + return *this; + } + + self_type operator--(int) + { + self_type result(*this); + --mPosition; + return result; + } + + self_type& advance(difference_type aN) + { + if (aN > 0) { + difference_type step = XPCOM_MIN(aN, mEnd - mPosition); + + NS_ASSERTION(step > 0, + "can't advance a reading iterator beyond the end of a string"); + + mPosition += step; + } else if (aN < 0) { + difference_type step = XPCOM_MAX(aN, -(mPosition - mStart)); + + NS_ASSERTION(step < 0, + "can't advance (backward) a reading iterator beyond the end of a string"); + + mPosition += step; + } + return *this; + } + + // We return an unsigned type here (with corresponding assert) rather than + // the more usual difference_type because we want to make this class go + // away in favor of mozilla::RangedPtr. Since RangedPtr has the same + // requirement we are enforcing here, the transition ought to be much + // smoother. + size_type operator-(const self_type& aOther) const + { + MOZ_ASSERT(mPosition >= aOther.mPosition); + return mPosition - aOther.mPosition; + } +}; + +/** + * @see nsTAString + */ + +template <class CharT> +class nsWritingIterator +{ +public: + typedef nsWritingIterator<CharT> self_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef CharT value_type; + typedef CharT* pointer; + typedef CharT& reference; + +private: + friend class nsAString; + friend class nsACString; + + // unfortunately, the API for nsWritingIterator requires that the + // iterator know its start and end positions. this was needed when + // we supported multi-fragment strings, but now it is really just + // extra baggage. we should remove mStart and mEnd at some point. + + CharT* mStart; + CharT* mEnd; + CharT* mPosition; + +public: + nsWritingIterator() + { + } + // nsWritingIterator( const nsWritingIterator<CharT>& ); // auto-generated copy-constructor OK + // nsWritingIterator<CharT>& operator=( const nsWritingIterator<CharT>& ); // auto-generated copy-assignment operator OK + + pointer get() const + { + return mPosition; + } + + reference operator*() const + { + return *get(); + } + + self_type& operator++() + { + ++mPosition; + return *this; + } + + self_type operator++(int) + { + self_type result(*this); + ++mPosition; + return result; + } + + self_type& operator--() + { + --mPosition; + return *this; + } + + self_type operator--(int) + { + self_type result(*this); + --mPosition; + return result; + } + + self_type& advance(difference_type aN) + { + if (aN > 0) { + difference_type step = XPCOM_MIN(aN, mEnd - mPosition); + + NS_ASSERTION(step > 0, + "can't advance a writing iterator beyond the end of a string"); + + mPosition += step; + } else if (aN < 0) { + difference_type step = XPCOM_MAX(aN, -(mPosition - mStart)); + + NS_ASSERTION(step < 0, + "can't advance (backward) a writing iterator beyond the end of a string"); + + mPosition += step; + } + return *this; + } + + // We return an unsigned type here (with corresponding assert) rather than + // the more usual difference_type because we want to make this class go + // away in favor of mozilla::RangedPtr. Since RangedPtr has the same + // requirement we are enforcing here, the transition ought to be much + // smoother. + size_type operator-(const self_type& aOther) const + { + MOZ_ASSERT(mPosition >= aOther.mPosition); + return mPosition - aOther.mPosition; + } +}; + +template <class CharT> +struct nsCharSinkTraits<nsWritingIterator<CharT>> +{ + static void + write(nsWritingIterator<CharT>& aIter, const CharT* aStr, uint32_t aN) + { + nsCharTraits<CharT>::move(aIter.get(), aStr, aN); + aIter.advance(aN); + } +}; + +template <class CharT> +inline bool +operator==(const nsReadingIterator<CharT>& aLhs, + const nsReadingIterator<CharT>& aRhs) +{ + return aLhs.get() == aRhs.get(); +} + +template <class CharT> +inline bool +operator!=(const nsReadingIterator<CharT>& aLhs, + const nsReadingIterator<CharT>& aRhs) +{ + return aLhs.get() != aRhs.get(); +} + + +// +// |nsWritingIterator|s +// + +template <class CharT> +inline bool +operator==(const nsWritingIterator<CharT>& aLhs, + const nsWritingIterator<CharT>& aRhs) +{ + return aLhs.get() == aRhs.get(); +} + +template <class CharT> +inline bool +operator!=(const nsWritingIterator<CharT>& aLhs, + const nsWritingIterator<CharT>& aRhs) +{ + return aLhs.get() != aRhs.get(); +} + +#endif /* !defined(nsStringIterator_h___) */ diff --git a/xpcom/string/nsStringObsolete.cpp b/xpcom/string/nsStringObsolete.cpp new file mode 100644 index 000000000..bd6daacab --- /dev/null +++ b/xpcom/string/nsStringObsolete.cpp @@ -0,0 +1,1053 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" + + +/** + * nsTString obsolete API support + */ + +#if MOZ_STRING_WITH_OBSOLETE_API + +#include "nsDependentString.h" +#include "nsDependentSubstring.h" +#include "nsReadableUtils.h" +#include "nsCRT.h" +#include "nsUTF8Utils.h" +#include "prdtoa.h" + +/* ***** BEGIN RICKG BLOCK ***** + * + * NOTE: This section of code was extracted from rickg's bufferRoutines.h file. + * For the most part it remains unmodified. We want to eliminate (or at + * least clean up) this code at some point. If you find the formatting + * in this section somewhat inconsistent, don't blame me! ;-) + */ + +// avoid STDC's tolower since it may do weird things with non-ASCII bytes +inline char +ascii_tolower(char aChar) +{ + if (aChar >= 'A' && aChar <= 'Z') + return aChar + ('a' - 'A'); + return aChar; +} + +//----------------------------------------------------------------------------- +// +// This set of methods is used to search a buffer looking for a char. +// + + +/** + * This methods cans the given buffer for the given char + * + * @update gess 02/17/00 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ +static int32_t +FindChar1(const char* aDest,uint32_t aDestLength,int32_t anOffset,const char16_t aChar,int32_t aCount) { + + if(anOffset < 0) + anOffset=0; + + if(aCount < 0) + aCount = (int32_t)aDestLength; + + if((aChar < 256) && (0 < aDestLength) && ((uint32_t)anOffset < aDestLength)) { + + //We'll only search if the given aChar is within the normal ascii a range, + //(Since this string is definitely within the ascii range). + + if(0<aCount) { + + const char* left= aDest+anOffset; + const char* last= left+aCount; + const char* max = aDest+aDestLength; + const char* end = (last<max) ? last : max; + + int32_t theMax = end-left; + if(0<theMax) { + + unsigned char theChar = (unsigned char) aChar; + const char* result=(const char*)memchr(left, (int)theChar, theMax); + + if(result) + return result-aDest; + + } + } + } + + return kNotFound; +} + + +/** + * This methods cans the given buffer for the given char + * + * @update gess 3/25/98 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ +static int32_t +FindChar2(const char16_t* aDest,uint32_t aDestLength,int32_t anOffset,const char16_t aChar,int32_t aCount) { + + if(anOffset < 0) + anOffset=0; + + if(aCount < 0) + aCount = (int32_t)aDestLength; + + if((0<aDestLength) && ((uint32_t)anOffset < aDestLength)) { + + if(0<aCount) { + + const char16_t* root = aDest; + const char16_t* left = root+anOffset; + const char16_t* last = left+aCount; + const char16_t* max = root+aDestLength; + const char16_t* end = (last<max) ? last : max; + + while(left<end){ + + if(*left==aChar) + return (left-root); + + ++left; + } + } + } + + return kNotFound; +} + + +/** + * This methods cans the given buffer (in reverse) for the given char + * + * @update gess 02/17/00 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ + +static int32_t +RFindChar1(const char* aDest,uint32_t aDestLength,int32_t anOffset,const char16_t aChar,int32_t aCount) { + + if(anOffset < 0) + anOffset=(int32_t)aDestLength-1; + + if(aCount < 0) + aCount = int32_t(aDestLength); + + if((aChar<256) && (0 < aDestLength) && ((uint32_t)anOffset < aDestLength)) { + + //We'll only search if the given aChar is within the normal ascii a range, + //(Since this string is definitely within the ascii range). + + if(0 < aCount) { + + const char* rightmost = aDest + anOffset; + const char* min = rightmost - aCount + 1; + const char* leftmost = (min<aDest) ? aDest: min; + + char theChar=(char)aChar; + while(leftmost <= rightmost){ + + if((*rightmost) == theChar) + return rightmost - aDest; + + --rightmost; + } + } + } + + return kNotFound; +} + + +/** + * This methods cans the given buffer for the given char + * + * @update gess 3/25/98 + * @param aDest is the buffer to be searched + * @param aDestLength is the size (in char-units, not bytes) of the buffer + * @param anOffset is the start pos to begin searching + * @param aChar is the target character we're looking for + * @param aCount tells us how many characters to iterate through (which may be different than aLength); -1 means use full length. + * @return index of pos if found, else -1 (kNotFound) + */ +static int32_t +RFindChar2(const char16_t* aDest,uint32_t aDestLength,int32_t anOffset,const char16_t aChar,int32_t aCount) { + + if(anOffset < 0) + anOffset=(int32_t)aDestLength-1; + + if(aCount < 0) + aCount = int32_t(aDestLength); + + if((0 < aDestLength) && ((uint32_t)anOffset < aDestLength)) { + + if(0 < aCount) { + + const char16_t* root = aDest; + const char16_t* rightmost = root + anOffset; + const char16_t* min = rightmost - aCount + 1; + const char16_t* leftmost = (min<root) ? root: min; + + while(leftmost <= rightmost){ + + if((*rightmost) == aChar) + return rightmost - root; + + --rightmost; + } + } + } + + return kNotFound; +} + +//----------------------------------------------------------------------------- +// +// This set of methods is used to compare one buffer onto another. The +// functions are differentiated by the size of source and dest character +// sizes. WARNING: Your destination buffer MUST be big enough to hold all the +// source bytes. We don't validate these ranges here (this should be done in +// higher level routines). +// + + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +static +#ifdef __SUNPRO_CC +inline +#endif /* __SUNPRO_CC */ +int32_t +Compare1To1(const char* aStr1,const char* aStr2,uint32_t aCount,bool aIgnoreCase) { + int32_t result=0; + if(aIgnoreCase) + result=int32_t(PL_strncasecmp(aStr1, aStr2, aCount)); + else + result=nsCharTraits<char>::compare(aStr1,aStr2,aCount); + + // alien comparisons may return out-of-bound answers + // instead of the -1, 0, 1 expected by most clients + if ( result < -1 ) + result = -1; + else if ( result > 1 ) + result = 1; + return result; +} + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +static +#ifdef __SUNPRO_CC +inline +#endif /* __SUNPRO_CC */ +int32_t +Compare2To2(const char16_t* aStr1,const char16_t* aStr2,uint32_t aCount){ + int32_t result; + + if ( aStr1 && aStr2 ) + result = nsCharTraits<char16_t>::compare(aStr1, aStr2, aCount); + + // The following cases are rare and survivable caller errors. + // Two null pointers are equal, but any string, even 0 length + // is greater than a null pointer. It might not really matter, + // but we pick something reasonable anyway. + else if ( !aStr1 && !aStr2 ) + result = 0; + else if ( aStr1 ) + result = 1; + else + result = -1; + + // alien comparisons may give answers outside the -1, 0, 1 expected by callers + if ( result < -1 ) + result = -1; + else if ( result > 1 ) + result = 1; + return result; +} + + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +static +#ifdef __SUNPRO_CC +inline +#endif /* __SUNPRO_CC */ +int32_t +Compare2To1(const char16_t* aStr1,const char* aStr2,uint32_t aCount,bool aIgnoreCase){ + const char16_t* s1 = aStr1; + const char *s2 = aStr2; + + if (aStr1 && aStr2) { + if (aCount != 0) { + do { + + char16_t c1 = *s1++; + char16_t c2 = char16_t((unsigned char)*s2++); + + if (c1 != c2) { +#ifdef DEBUG + // we won't warn on c1>=128 (the 2-byte value) because often + // it is just fine to compare an constant, ascii value (i.e. "body") + // against some non-ascii value (i.e. a unicode string that + // was downloaded from a web page) + if (aIgnoreCase && c2>=128) + NS_WARNING("got a non-ASCII string, but we can't do an accurate case conversion!"); +#endif + + // can't do case conversion on characters out of our range + if (aIgnoreCase && c1<128 && c2<128) { + + c1 = ascii_tolower(char(c1)); + c2 = ascii_tolower(char(c2)); + + if (c1 == c2) continue; + } + + if (c1 < c2) return -1; + return 1; + } + } while (--aCount); + } + } + return 0; +} + + +/** + * This method compares the data in one buffer with another + * @update gess 01/04/99 + * @param aStr1 is the first buffer to be compared + * @param aStr2 is the 2nd buffer to be compared + * @param aCount is the number of chars to compare + * @param aIgnoreCase tells us whether to use a case-sensitive comparison + * @return -1,0,1 depending on <,==,> + */ +inline int32_t +Compare1To2(const char* aStr1,const char16_t* aStr2,uint32_t aCount,bool aIgnoreCase){ + return Compare2To1(aStr2, aStr1, aCount, aIgnoreCase) * -1; +} + + +//----------------------------------------------------------------------------- +// +// This set of methods is used compress char sequences in a buffer... +// + + +/** + * This method compresses duplicate runs of a given char from the given buffer + * + * @update rickg 03.23.2000 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start of the buffer + * @return the new length of the given buffer + */ +static int32_t +CompressChars1(char* aString,uint32_t aLength,const char* aSet){ + + char* from = aString; + char* end = aString + aLength; + char* to = from; + + //this code converts /n, /t, /r into normal space ' '; + //it also compresses runs of whitespace down to a single char... + if(aSet && aString && (0 < aLength)){ + uint32_t aSetLen=strlen(aSet); + + while (from < end) { + char theChar = *from++; + + *to++=theChar; //always copy this char... + + if((kNotFound!=FindChar1(aSet,aSetLen,0,theChar,aSetLen))){ + while (from < end) { + theChar = *from++; + if(kNotFound==FindChar1(aSet,aSetLen,0,theChar,aSetLen)){ + *to++ = theChar; + break; + } + } //while + } //if + } //if + *to = 0; + } + return to - aString; +} + + + +/** + * This method compresses duplicate runs of a given char from the given buffer + * + * @update rickg 03.23.2000 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start of the buffer + * @return the new length of the given buffer + */ +static int32_t +CompressChars2(char16_t* aString,uint32_t aLength,const char* aSet) { + + char16_t* from = aString; + char16_t* end = from + aLength; + char16_t* to = from; + + //this code converts /n, /t, /r into normal space ' '; + //it also compresses runs of whitespace down to a single char... + if(aSet && aString && (0 < aLength)){ + uint32_t aSetLen=strlen(aSet); + + while (from < end) { + char16_t theChar = *from++; + + *to++=theChar; //always copy this char... + + if((theChar<256) && (kNotFound!=FindChar1(aSet,aSetLen,0,theChar,aSetLen))){ + while (from < end) { + theChar = *from++; + if(kNotFound==FindChar1(aSet,aSetLen,0,theChar,aSetLen)){ + *to++ = theChar; + break; + } + } //while + } //if + } //if + *to = 0; + } + return to - (char16_t*)aString; +} + +/** + * This method strips chars in a given set from the given buffer + * + * @update gess 01/04/99 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start of the buffer + * @return the new length of the given buffer + */ +static int32_t +StripChars1(char* aString,uint32_t aLength,const char* aSet) { + + // XXX(darin): this code should defer writing until necessary. + + char* to = aString; + char* from = aString-1; + char* end = aString + aLength; + + if(aSet && aString && (0 < aLength)){ + uint32_t aSetLen=strlen(aSet); + while (++from < end) { + char theChar = *from; + if(kNotFound==FindChar1(aSet,aSetLen,0,theChar,aSetLen)){ + *to++ = theChar; + } + } + *to = 0; + } + return to - (char*)aString; +} + + +/** + * This method strips chars in a given set from the given buffer + * + * @update gess 01/04/99 + * @param aString is the buffer to be manipulated + * @param aLength is the length of the buffer + * @param aSet tells us which chars to compress from given buffer + * @param aEliminateLeading tells us whether to strip chars from the start of the buffer + * @param aEliminateTrailing tells us whether to strip chars from the start of the buffer + * @return the new length of the given buffer + */ +static int32_t +StripChars2(char16_t* aString,uint32_t aLength,const char* aSet) { + + // XXX(darin): this code should defer writing until necessary. + + char16_t* to = aString; + char16_t* from = aString-1; + char16_t* end = to + aLength; + + if(aSet && aString && (0 < aLength)){ + uint32_t aSetLen=strlen(aSet); + while (++from < end) { + char16_t theChar = *from; + //Note the test for ascii range below. If you have a real unicode char, + //and you're searching for chars in the (given) ascii string, there's no + //point in doing the real search since it's out of the ascii range. + if((255<theChar) || (kNotFound==FindChar1(aSet,aSetLen,0,theChar,aSetLen))){ + *to++ = theChar; + } + } + *to = 0; + } + return to - (char16_t*)aString; +} + +/* ***** END RICKG BLOCK ***** */ + +static const char* kWhitespace="\f\t\r\n "; + +// This function is used to implement FindCharInSet and friends +template <class CharT> +#ifndef __SUNPRO_CC +static +#endif /* !__SUNPRO_CC */ +CharT +GetFindInSetFilter( const CharT* set) +{ + CharT filter = ~CharT(0); // All bits set + while (*set) { + filter &= ~(*set); + ++set; + } + return filter; +} + +// This template class is used by our code to access rickg's buffer routines. +template <class CharT> struct nsBufferRoutines {}; + +template <> +struct nsBufferRoutines<char> +{ + static + int32_t compare( const char* a, const char* b, uint32_t max, bool ic ) + { + return Compare1To1(a, b, max, ic); + } + + static + int32_t compare( const char* a, const char16_t* b, uint32_t max, bool ic ) + { + return Compare1To2(a, b, max, ic); + } + + static + int32_t find_char( const char* s, uint32_t max, int32_t offset, const char16_t c, int32_t count ) + { + return FindChar1(s, max, offset, c, count); + } + + static + int32_t rfind_char( const char* s, uint32_t max, int32_t offset, const char16_t c, int32_t count ) + { + return RFindChar1(s, max, offset, c, count); + } + + static + char get_find_in_set_filter( const char* set ) + { + return GetFindInSetFilter(set); + } + + static + int32_t strip_chars( char* s, uint32_t len, const char* set ) + { + return StripChars1(s, len, set); + } + + static + int32_t compress_chars( char* s, uint32_t len, const char* set ) + { + return CompressChars1(s, len, set); + } +}; + +template <> +struct nsBufferRoutines<char16_t> +{ + static + int32_t compare( const char16_t* a, const char16_t* b, uint32_t max, bool ic ) + { + NS_ASSERTION(!ic, "no case-insensitive compare here"); + return Compare2To2(a, b, max); + } + + static + int32_t compare( const char16_t* a, const char* b, uint32_t max, bool ic ) + { + return Compare2To1(a, b, max, ic); + } + + static + int32_t find_char( const char16_t* s, uint32_t max, int32_t offset, const char16_t c, int32_t count ) + { + return FindChar2(s, max, offset, c, count); + } + + static + int32_t rfind_char( const char16_t* s, uint32_t max, int32_t offset, const char16_t c, int32_t count ) + { + return RFindChar2(s, max, offset, c, count); + } + + static + char16_t get_find_in_set_filter( const char16_t* set ) + { + return GetFindInSetFilter(set); + } + + static + char16_t get_find_in_set_filter( const char* set ) + { + return (~char16_t(0)^~char(0)) | GetFindInSetFilter(set); + } + + static + int32_t strip_chars( char16_t* s, uint32_t max, const char* set ) + { + return StripChars2(s, max, set); + } + + static + int32_t compress_chars( char16_t* s, uint32_t len, const char* set ) + { + return CompressChars2(s, len, set); + } +}; + +//----------------------------------------------------------------------------- + +template <class L, class R> +#ifndef __SUNPRO_CC +static +#endif /* !__SUNPRO_CC */ +int32_t +FindSubstring( const L* big, uint32_t bigLen, + const R* little, uint32_t littleLen, + bool ignoreCase ) +{ + if (littleLen > bigLen) + return kNotFound; + + int32_t i, max = int32_t(bigLen - littleLen); + for (i=0; i<=max; ++i, ++big) + { + if (nsBufferRoutines<L>::compare(big, little, littleLen, ignoreCase) == 0) + return i; + } + + return kNotFound; +} + +template <class L, class R> +#ifndef __SUNPRO_CC +static +#endif /* !__SUNPRO_CC */ +int32_t +RFindSubstring( const L* big, uint32_t bigLen, + const R* little, uint32_t littleLen, + bool ignoreCase ) +{ + if (littleLen > bigLen) + return kNotFound; + + int32_t i, max = int32_t(bigLen - littleLen); + + const L* iter = big + max; + for (i=max; iter >= big; --i, --iter) + { + if (nsBufferRoutines<L>::compare(iter, little, littleLen, ignoreCase) == 0) + return i; + } + + return kNotFound; +} + +template <class CharT, class SetCharT> +#ifndef __SUNPRO_CC +static +#endif /* !__SUNPRO_CC */ +int32_t +FindCharInSet( const CharT* data, uint32_t dataLen, const SetCharT* set ) +{ + CharT filter = nsBufferRoutines<CharT>::get_find_in_set_filter(set); + + const CharT* end = data + dataLen; + for (const CharT* iter = data; iter < end; ++iter) + { + CharT currentChar = *iter; + if (currentChar & filter) + continue; // char is not in filter set; go on with next char. + + // test all chars + const SetCharT* charInSet = set; + CharT setChar = CharT(*charInSet); + while (setChar) + { + if (setChar == currentChar) + return iter - data; // found it! return index of the found char. + + setChar = CharT(*(++charInSet)); + } + } + return kNotFound; +} + +template <class CharT, class SetCharT> +#ifndef __SUNPRO_CC +static +#endif /* !__SUNPRO_CC */ +int32_t +RFindCharInSet( const CharT* data, uint32_t dataLen, const SetCharT* set ) +{ + CharT filter = nsBufferRoutines<CharT>::get_find_in_set_filter(set); + + for (const CharT* iter = data + dataLen - 1; iter >= data; --iter) + { + CharT currentChar = *iter; + if (currentChar & filter) + continue; // char is not in filter set; go on with next char. + + // test all chars + const CharT* charInSet = set; + CharT setChar = *charInSet; + while (setChar) + { + if (setChar == currentChar) + return iter - data; // found it! return index of the found char. + + setChar = *(++charInSet); + } + } + return kNotFound; +} + +/** + * this method changes the meaning of |offset| and |count|: + * + * upon return, + * |offset| specifies start of search range + * |count| specifies length of search range + */ +static void +Find_ComputeSearchRange( uint32_t bigLen, uint32_t littleLen, int32_t& offset, int32_t& count ) +{ + // |count| specifies how many iterations to make from |offset| + + if (offset < 0) + { + offset = 0; + } + else if (uint32_t(offset) > bigLen) + { + count = 0; + return; + } + + int32_t maxCount = bigLen - offset; + if (count < 0 || count > maxCount) + { + count = maxCount; + } + else + { + count += littleLen; + if (count > maxCount) + count = maxCount; + } +} + +/** + * this method changes the meaning of |offset| and |count|: + * + * upon entry, + * |offset| specifies the end point from which to search backwards + * |count| specifies the number of iterations from |offset| + * + * upon return, + * |offset| specifies start of search range + * |count| specifies length of search range + * + * + * EXAMPLE + * + * + -- littleLen=4 -- + + * : : + * |____|____|____|____|____|____|____|____|____|____|____|____| + * : : + * offset=5 bigLen=12 + * + * if count = 4, then we expect this function to return offset = 2 and + * count = 7. + * + */ +static void +RFind_ComputeSearchRange( uint32_t bigLen, uint32_t littleLen, int32_t& offset, int32_t& count ) +{ + if (littleLen > bigLen) + { + offset = 0; + count = 0; + return; + } + + if (offset < 0) + offset = bigLen - littleLen; + if (count < 0) + count = offset + 1; + + int32_t start = offset - count + 1; + if (start < 0) + start = 0; + + count = offset + littleLen - start; + offset = start; +} + +//----------------------------------------------------------------------------- + +// define nsString obsolete methods +#include "string-template-def-unichar.h" +#include "nsTStringObsolete.cpp" +#include "string-template-undef.h" + +// define nsCString obsolete methods +#include "string-template-def-char.h" +#include "nsTStringObsolete.cpp" +#include "string-template-undef.h" + +//----------------------------------------------------------------------------- + +// specialized methods: + +int32_t +nsString::Find( const nsAFlatString& aString, int32_t aOffset, int32_t aCount ) const +{ + // this method changes the meaning of aOffset and aCount: + Find_ComputeSearchRange(mLength, aString.Length(), aOffset, aCount); + + int32_t result = FindSubstring(mData + aOffset, aCount, static_cast<const char16_t*>(aString.get()), aString.Length(), false); + if (result != kNotFound) + result += aOffset; + return result; +} + +int32_t +nsString::Find( const char16_t* aString, int32_t aOffset, int32_t aCount ) const +{ + return Find(nsDependentString(aString), aOffset, aCount); +} + +int32_t +nsString::RFind( const nsAFlatString& aString, int32_t aOffset, int32_t aCount ) const +{ + // this method changes the meaning of aOffset and aCount: + RFind_ComputeSearchRange(mLength, aString.Length(), aOffset, aCount); + + int32_t result = RFindSubstring(mData + aOffset, aCount, static_cast<const char16_t*>(aString.get()), aString.Length(), false); + if (result != kNotFound) + result += aOffset; + return result; +} + +int32_t +nsString::RFind( const char16_t* aString, int32_t aOffset, int32_t aCount ) const +{ + return RFind(nsDependentString(aString), aOffset, aCount); +} + +int32_t +nsString::FindCharInSet( const char16_t* aSet, int32_t aOffset ) const +{ + if (aOffset < 0) + aOffset = 0; + else if (aOffset >= int32_t(mLength)) + return kNotFound; + + int32_t result = ::FindCharInSet(mData + aOffset, mLength - aOffset, aSet); + if (result != kNotFound) + result += aOffset; + return result; +} + +void +nsString::ReplaceChar( const char16_t* aSet, char16_t aNewChar ) +{ + if (!EnsureMutable()) // XXX do this lazily? + AllocFailed(mLength); + + char16_t* data = mData; + uint32_t lenRemaining = mLength; + + while (lenRemaining) + { + int32_t i = ::FindCharInSet(data, lenRemaining, aSet); + if (i == kNotFound) + break; + + data[i++] = aNewChar; + data += i; + lenRemaining -= i; + } +} + + +/** + * nsTString::Compare,CompareWithConversion,etc. + */ + +int32_t +nsCString::Compare( const char* aString, bool aIgnoreCase, int32_t aCount ) const +{ + uint32_t strLen = char_traits::length(aString); + + int32_t maxCount = int32_t(XPCOM_MIN(mLength, strLen)); + + int32_t compareCount; + if (aCount < 0 || aCount > maxCount) + compareCount = maxCount; + else + compareCount = aCount; + + int32_t result = + nsBufferRoutines<char>::compare(mData, aString, compareCount, aIgnoreCase); + + if (result == 0 && + (aCount < 0 || strLen < uint32_t(aCount) || mLength < uint32_t(aCount))) + { + // Since the caller didn't give us a length to test, or strings shorter + // than aCount, and compareCount characters matched, we have to assume + // that the longer string is greater. + + if (mLength != strLen) + result = (mLength < strLen) ? -1 : 1; + } + return result; +} + +bool +nsString::EqualsIgnoreCase( const char* aString, int32_t aCount ) const +{ + uint32_t strLen = nsCharTraits<char>::length(aString); + + int32_t maxCount = int32_t(XPCOM_MIN(mLength, strLen)); + + int32_t compareCount; + if (aCount < 0 || aCount > maxCount) + compareCount = maxCount; + else + compareCount = aCount; + + int32_t result = + nsBufferRoutines<char16_t>::compare(mData, aString, compareCount, true); + + if (result == 0 && + (aCount < 0 || strLen < uint32_t(aCount) || mLength < uint32_t(aCount))) + { + // Since the caller didn't give us a length to test, or strings shorter + // than aCount, and compareCount characters matched, we have to assume + // that the longer string is greater. + + if (mLength != strLen) + result = 1; // Arbitrarily using any number != 0 + } + return result == 0; +} + + +/** + * nsTString::ToDouble + */ + +double +nsCString::ToDouble(nsresult* aErrorCode) const +{ + double res = 0.0; + if (mLength > 0) + { + char *conv_stopped; + const char *str = mData; + // Use PR_strtod, not strtod, since we don't want locale involved. + res = PR_strtod(str, &conv_stopped); + if (conv_stopped == str+mLength) + *aErrorCode = NS_OK; + else // Not all the string was scanned + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + } + else + { + // The string was too short (0 characters) + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + } + return res; +} + +double +nsString::ToDouble(nsresult* aErrorCode) const +{ + return NS_LossyConvertUTF16toASCII(*this).ToDouble(aErrorCode); +} + + +/** + * nsTString::AssignWithConversion + */ + +void +nsCString::AssignWithConversion( const nsAString& aData ) +{ + LossyCopyUTF16toASCII(aData, *this); +} + +void +nsString::AssignWithConversion( const nsACString& aData ) +{ + CopyASCIItoUTF16(aData, *this); +} + +#endif // !MOZ_STRING_WITH_OBSOLETE_API diff --git a/xpcom/string/nsSubstring.cpp b/xpcom/string/nsSubstring.cpp new file mode 100644 index 000000000..5bc69f741 --- /dev/null +++ b/xpcom/string/nsSubstring.cpp @@ -0,0 +1,388 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef DEBUG +#define ENABLE_STRING_STATS +#endif + +#include "mozilla/Atomics.h" +#include "mozilla/MemoryReporting.h" + +#ifdef ENABLE_STRING_STATS +#include <stdio.h> +#endif + +#include <stdlib.h> +#include "nsSubstring.h" +#include "nsString.h" +#include "nsStringBuffer.h" +#include "nsDependentString.h" +#include "nsMemory.h" +#include "prprf.h" +#include "nsStaticAtom.h" +#include "nsCOMPtr.h" + +#include "mozilla/IntegerPrintfMacros.h" +#ifdef XP_WIN +#include <windows.h> +#include <process.h> +#define getpid() _getpid() +#define pthread_self() GetCurrentThreadId() +#else +#include <pthread.h> +#include <unistd.h> +#endif + +using mozilla::Atomic; + +// --------------------------------------------------------------------------- + +static const char16_t gNullChar = 0; + +char* const nsCharTraits<char>::sEmptyBuffer = + (char*)const_cast<char16_t*>(&gNullChar); +char16_t* const nsCharTraits<char16_t>::sEmptyBuffer = + const_cast<char16_t*>(&gNullChar); + +// --------------------------------------------------------------------------- + +#ifdef ENABLE_STRING_STATS +class nsStringStats +{ +public: + nsStringStats() + : mAllocCount(0) + , mReallocCount(0) + , mFreeCount(0) + , mShareCount(0) + { + } + + ~nsStringStats() + { + // this is a hack to suppress duplicate string stats printing + // in seamonkey as a result of the string code being linked + // into seamonkey and libxpcom! :-( + if (!mAllocCount && !mAdoptCount) { + return; + } + + printf("nsStringStats\n"); + printf(" => mAllocCount: % 10d\n", int(mAllocCount)); + printf(" => mReallocCount: % 10d\n", int(mReallocCount)); + printf(" => mFreeCount: % 10d", int(mFreeCount)); + if (mAllocCount > mFreeCount) { + printf(" -- LEAKED %d !!!\n", mAllocCount - mFreeCount); + } else { + printf("\n"); + } + printf(" => mShareCount: % 10d\n", int(mShareCount)); + printf(" => mAdoptCount: % 10d\n", int(mAdoptCount)); + printf(" => mAdoptFreeCount: % 10d", int(mAdoptFreeCount)); + if (mAdoptCount > mAdoptFreeCount) { + printf(" -- LEAKED %d !!!\n", mAdoptCount - mAdoptFreeCount); + } else { + printf("\n"); + } + printf(" => Process ID: %" PRIuPTR ", Thread ID: %" PRIuPTR "\n", + uintptr_t(getpid()), uintptr_t(pthread_self())); + } + + Atomic<int32_t> mAllocCount; + Atomic<int32_t> mReallocCount; + Atomic<int32_t> mFreeCount; + Atomic<int32_t> mShareCount; + Atomic<int32_t> mAdoptCount; + Atomic<int32_t> mAdoptFreeCount; +}; +static nsStringStats gStringStats; +#define STRING_STAT_INCREMENT(_s) (gStringStats.m ## _s ## Count)++ +#else +#define STRING_STAT_INCREMENT(_s) +#endif + +// --------------------------------------------------------------------------- + +void +ReleaseData(void* aData, uint32_t aFlags) +{ + if (aFlags & nsSubstring::F_SHARED) { + nsStringBuffer::FromData(aData)->Release(); + } else if (aFlags & nsSubstring::F_OWNED) { + free(aData); + STRING_STAT_INCREMENT(AdoptFree); + // Treat this as destruction of a "StringAdopt" object for leak + // tracking purposes. + MOZ_LOG_DTOR(aData, "StringAdopt", 1); + } + // otherwise, nothing to do. +} + +// --------------------------------------------------------------------------- + +// XXX or we could make nsStringBuffer be a friend of nsTAString + +class nsAStringAccessor : public nsAString +{ +private: + nsAStringAccessor(); // NOT IMPLEMENTED + +public: + char_type* data() const + { + return mData; + } + size_type length() const + { + return mLength; + } + uint32_t flags() const + { + return mFlags; + } + + void set(char_type* aData, size_type aLen, uint32_t aFlags) + { + ReleaseData(mData, mFlags); + mData = aData; + mLength = aLen; + mFlags = aFlags; + } +}; + +class nsACStringAccessor : public nsACString +{ +private: + nsACStringAccessor(); // NOT IMPLEMENTED + +public: + char_type* data() const + { + return mData; + } + size_type length() const + { + return mLength; + } + uint32_t flags() const + { + return mFlags; + } + + void set(char_type* aData, size_type aLen, uint32_t aFlags) + { + ReleaseData(mData, mFlags); + mData = aData; + mLength = aLen; + mFlags = aFlags; + } +}; + +// --------------------------------------------------------------------------- + +void +nsStringBuffer::AddRef() +{ + ++mRefCount; + STRING_STAT_INCREMENT(Share); + NS_LOG_ADDREF(this, mRefCount, "nsStringBuffer", sizeof(*this)); +} + +void +nsStringBuffer::Release() +{ + int32_t count = --mRefCount; + NS_LOG_RELEASE(this, count, "nsStringBuffer"); + if (count == 0) { + STRING_STAT_INCREMENT(Free); + free(this); // we were allocated with |malloc| + } +} + +/** + * Alloc returns a pointer to a new string header with set capacity. + */ +already_AddRefed<nsStringBuffer> +nsStringBuffer::Alloc(size_t aSize) +{ + NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed"); + NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) && + sizeof(nsStringBuffer) + aSize > aSize, + "mStorageSize will truncate"); + + nsStringBuffer* hdr = + (nsStringBuffer*)malloc(sizeof(nsStringBuffer) + aSize); + if (hdr) { + STRING_STAT_INCREMENT(Alloc); + + hdr->mRefCount = 1; + hdr->mStorageSize = aSize; + NS_LOG_ADDREF(hdr, 1, "nsStringBuffer", sizeof(*hdr)); + } + return dont_AddRef(hdr); +} + +nsStringBuffer* +nsStringBuffer::Realloc(nsStringBuffer* aHdr, size_t aSize) +{ + STRING_STAT_INCREMENT(Realloc); + + NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed"); + NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) && + sizeof(nsStringBuffer) + aSize > aSize, + "mStorageSize will truncate"); + + // no point in trying to save ourselves if we hit this assertion + NS_ASSERTION(!aHdr->IsReadonly(), "|Realloc| attempted on readonly string"); + + // Treat this as a release and addref for refcounting purposes, since we + // just asserted that the refcount is 1. If we don't do that, refcount + // logging will claim we've leaked all sorts of stuff. + NS_LOG_RELEASE(aHdr, 0, "nsStringBuffer"); + + aHdr = (nsStringBuffer*)realloc(aHdr, sizeof(nsStringBuffer) + aSize); + if (aHdr) { + NS_LOG_ADDREF(aHdr, 1, "nsStringBuffer", sizeof(*aHdr)); + aHdr->mStorageSize = aSize; + } + + return aHdr; +} + +nsStringBuffer* +nsStringBuffer::FromString(const nsAString& aStr) +{ + const nsAStringAccessor* accessor = + static_cast<const nsAStringAccessor*>(&aStr); + + if (!(accessor->flags() & nsSubstring::F_SHARED)) { + return nullptr; + } + + return FromData(accessor->data()); +} + +nsStringBuffer* +nsStringBuffer::FromString(const nsACString& aStr) +{ + const nsACStringAccessor* accessor = + static_cast<const nsACStringAccessor*>(&aStr); + + if (!(accessor->flags() & nsCSubstring::F_SHARED)) { + return nullptr; + } + + return FromData(accessor->data()); +} + +void +nsStringBuffer::ToString(uint32_t aLen, nsAString& aStr, + bool aMoveOwnership) +{ + char16_t* data = static_cast<char16_t*>(Data()); + + nsAStringAccessor* accessor = static_cast<nsAStringAccessor*>(&aStr); + MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char16_t(0), + "data should be null terminated"); + + // preserve class flags + uint32_t flags = accessor->flags(); + flags = (flags & 0xFFFF0000) | nsSubstring::F_SHARED | nsSubstring::F_TERMINATED; + + if (!aMoveOwnership) { + AddRef(); + } + accessor->set(data, aLen, flags); +} + +void +nsStringBuffer::ToString(uint32_t aLen, nsACString& aStr, + bool aMoveOwnership) +{ + char* data = static_cast<char*>(Data()); + + nsACStringAccessor* accessor = static_cast<nsACStringAccessor*>(&aStr); + MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char(0), + "data should be null terminated"); + + // preserve class flags + uint32_t flags = accessor->flags(); + flags = (flags & 0xFFFF0000) | nsCSubstring::F_SHARED | nsCSubstring::F_TERMINATED; + + if (!aMoveOwnership) { + AddRef(); + } + accessor->set(data, aLen, flags); +} + +size_t +nsStringBuffer::SizeOfIncludingThisIfUnshared(mozilla::MallocSizeOf aMallocSizeOf) const +{ + return IsReadonly() ? 0 : aMallocSizeOf(this); +} + +size_t +nsStringBuffer::SizeOfIncludingThisEvenIfShared(mozilla::MallocSizeOf aMallocSizeOf) const +{ + return aMallocSizeOf(this); +} + +// --------------------------------------------------------------------------- + + +// define nsSubstring +#include "string-template-def-unichar.h" +#include "nsTSubstring.cpp" +#include "string-template-undef.h" + +// define nsCSubstring +#include "string-template-def-char.h" +#include "nsTSubstring.cpp" +#include "string-template-undef.h" + +// Check that internal and external strings have the same size. +// See https://bugzilla.mozilla.org/show_bug.cgi?id=430581 + +#include "mozilla/Logging.h" +#include "nsXPCOMStrings.h" + +static_assert(sizeof(nsStringContainer_base) == sizeof(nsSubstring), + "internal and external strings must have the same size"); + +// Provide rust bindings to the nsA[C]String types +extern "C" { + +void Gecko_FinalizeCString(nsACString* aThis) +{ + aThis->~nsACString(); +} + +void Gecko_AssignCString(nsACString* aThis, const nsACString* aOther) +{ + aThis->Assign(*aOther); +} + +void Gecko_AppendCString(nsACString* aThis, const nsACString* aOther) +{ + aThis->Append(*aOther); +} + +void Gecko_FinalizeString(nsAString* aThis) +{ + aThis->~nsAString(); +} + +void Gecko_AssignString(nsAString* aThis, const nsAString* aOther) +{ + aThis->Assign(*aOther); +} + +void Gecko_AppendString(nsAString* aThis, const nsAString* aOther) +{ + aThis->Append(*aOther); +} + +} // extern "C" diff --git a/xpcom/string/nsSubstring.h b/xpcom/string/nsSubstring.h new file mode 100644 index 000000000..67125ba31 --- /dev/null +++ b/xpcom/string/nsSubstring.h @@ -0,0 +1,12 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsSubstring_h___ +#define nsSubstring_h___ + +#include "nsAString.h" + +#endif // !defined(nsSubstring_h___) diff --git a/xpcom/string/nsSubstringTuple.cpp b/xpcom/string/nsSubstringTuple.cpp new file mode 100644 index 000000000..3de928dda --- /dev/null +++ b/xpcom/string/nsSubstringTuple.cpp @@ -0,0 +1,20 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsSubstringTuple.h" + +// convert fragment to |const substring_type&| +#define TO_SUBSTRING(_v) (*(_v)) + +// define nsSubstringTuple +#include "string-template-def-unichar.h" +#include "nsTSubstringTuple.cpp" +#include "string-template-undef.h" + +// define nsCSubstringTuple +#include "string-template-def-char.h" +#include "nsTSubstringTuple.cpp" +#include "string-template-undef.h" diff --git a/xpcom/string/nsSubstringTuple.h b/xpcom/string/nsSubstringTuple.h new file mode 100644 index 000000000..5a61cd831 --- /dev/null +++ b/xpcom/string/nsSubstringTuple.h @@ -0,0 +1,22 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsSubstringTuple_h___ +#define nsSubstringTuple_h___ + +#include "nsSubstring.h" + +// declare nsSubstringTuple +#include "string-template-def-unichar.h" +#include "nsTSubstringTuple.h" +#include "string-template-undef.h" + +// declare nsCSubstringTuple +#include "string-template-def-char.h" +#include "nsTSubstringTuple.h" +#include "string-template-undef.h" + +#endif // !defined(nsSubstringTuple_h___) diff --git a/xpcom/string/nsTDependentString.cpp b/xpcom/string/nsTDependentString.cpp new file mode 100644 index 000000000..2f3a095d1 --- /dev/null +++ b/xpcom/string/nsTDependentString.cpp @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +void +nsTDependentString_CharT::Rebind(const string_type& str, uint32_t startPos) +{ + MOZ_ASSERT(str.Flags() & F_TERMINATED, "Unterminated flat string"); + + // If we currently own a buffer, release it. + Finalize(); + + size_type strLength = str.Length(); + + if (startPos > strLength) { + startPos = strLength; + } + + mData = const_cast<char_type*>(static_cast<const char_type*>(str.Data())) + startPos; + mLength = strLength - startPos; + + SetDataFlags(str.Flags() & (F_TERMINATED | F_LITERAL)); +} diff --git a/xpcom/string/nsTDependentString.h b/xpcom/string/nsTDependentString.h new file mode 100644 index 000000000..44055d5ac --- /dev/null +++ b/xpcom/string/nsTDependentString.h @@ -0,0 +1,106 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +/** + * nsTDependentString_CharT + * + * Stores a null-terminated, immutable sequence of characters. + * + * Subclass of nsTString that restricts string value to an immutable + * character sequence. This class does not own its data, so the creator + * of objects of this type must take care to ensure that a + * nsTDependentString continues to reference valid memory for the + * duration of its use. + */ +class nsTDependentString_CharT : public nsTString_CharT +{ +public: + + typedef nsTDependentString_CharT self_type; + +public: + + /** + * constructors + */ + + nsTDependentString_CharT(const char_type* aStart, const char_type* aEnd) + : string_type(const_cast<char_type*>(aStart), + uint32_t(aEnd - aStart), F_TERMINATED) + { + AssertValidDependentString(); + } + + nsTDependentString_CharT(const char_type* aData, uint32_t aLength) + : string_type(const_cast<char_type*>(aData), aLength, F_TERMINATED) + { + AssertValidDependentString(); + } + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + nsTDependentString_CharT(char16ptr_t aData, uint32_t aLength) + : nsTDependentString_CharT(static_cast<const char16_t*>(aData), aLength) + { + } +#endif + + explicit + nsTDependentString_CharT(const char_type* aData) + : string_type(const_cast<char_type*>(aData), + uint32_t(char_traits::length(aData)), F_TERMINATED) + { + AssertValidDependentString(); + } + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + explicit + nsTDependentString_CharT(char16ptr_t aData) + : nsTDependentString_CharT(static_cast<const char16_t*>(aData)) + { + } +#endif + + nsTDependentString_CharT(const string_type& aStr, uint32_t aStartPos) + : string_type() + { + Rebind(aStr, aStartPos); + } + + // Create a nsTDependentSubstring to be bound later + nsTDependentString_CharT() + : string_type() + { + } + + // XXX are you sure?? + // auto-generated copy-constructor OK + // auto-generated copy-assignment operator OK + // auto-generated destructor OK + + + /** + * allow this class to be bound to a different string... + */ + + using nsTString_CharT::Rebind; + void Rebind(const char_type* aData) + { + Rebind(aData, uint32_t(char_traits::length(aData))); + } + + void Rebind(const char_type* aStart, const char_type* aEnd) + { + Rebind(aStart, uint32_t(aEnd - aStart)); + } + + void Rebind(const string_type&, uint32_t aStartPos); + +private: + + // NOT USED + nsTDependentString_CharT(const substring_tuple_type&) = delete; +}; diff --git a/xpcom/string/nsTDependentSubstring.cpp b/xpcom/string/nsTDependentSubstring.cpp new file mode 100644 index 000000000..b540c028d --- /dev/null +++ b/xpcom/string/nsTDependentSubstring.cpp @@ -0,0 +1,37 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +void +nsTDependentSubstring_CharT::Rebind(const substring_type& str, + uint32_t startPos, uint32_t length) +{ + // If we currently own a buffer, release it. + Finalize(); + + size_type strLength = str.Length(); + + if (startPos > strLength) { + startPos = strLength; + } + + mData = const_cast<char_type*>(static_cast<const char_type*>(str.Data())) + startPos; + mLength = XPCOM_MIN(length, strLength - startPos); + + SetDataFlags(F_NONE); +} + +void +nsTDependentSubstring_CharT::Rebind(const char_type* data, size_type length) +{ + NS_ASSERTION(data, "nsTDependentSubstring must wrap a non-NULL buffer"); + + // If we currently own a buffer, release it. + Finalize(); + + mData = const_cast<char_type*>(static_cast<const char_type*>(data)); + mLength = length; + SetDataFlags(F_NONE); +} diff --git a/xpcom/string/nsTDependentSubstring.h b/xpcom/string/nsTDependentSubstring.h new file mode 100644 index 000000000..dd28f32f9 --- /dev/null +++ b/xpcom/string/nsTDependentSubstring.h @@ -0,0 +1,124 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +/** + * nsTDependentSubstring_CharT + * + * A string class which wraps an external array of string characters. It + * is the client code's responsibility to ensure that the external buffer + * remains valid for a long as the string is alive. + * + * NAMES: + * nsDependentSubstring for wide characters + * nsDependentCSubstring for narrow characters + */ +class nsTDependentSubstring_CharT : public nsTSubstring_CharT +{ +public: + + typedef nsTDependentSubstring_CharT self_type; + +public: + + void Rebind(const substring_type&, uint32_t aStartPos, + uint32_t aLength = size_type(-1)); + + void Rebind(const char_type* aData, size_type aLength); + + void Rebind(const char_type* aStart, const char_type* aEnd) + { + Rebind(aStart, size_type(aEnd - aStart)); + } + + nsTDependentSubstring_CharT(const substring_type& aStr, uint32_t aStartPos, + uint32_t aLength = size_type(-1)) + : substring_type() + { + Rebind(aStr, aStartPos, aLength); + } + + nsTDependentSubstring_CharT(const char_type* aData, size_type aLength) + : substring_type(const_cast<char_type*>(aData), aLength, F_NONE) + { + } + + nsTDependentSubstring_CharT(const char_type* aStart, const char_type* aEnd) + : substring_type(const_cast<char_type*>(aStart), uint32_t(aEnd - aStart), + F_NONE) + { + } + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + nsTDependentSubstring_CharT(char16ptr_t aData, size_type aLength) + : nsTDependentSubstring_CharT(static_cast<const char16_t*>(aData), aLength) + { + } + + nsTDependentSubstring_CharT(char16ptr_t aStart, char16ptr_t aEnd) + : nsTDependentSubstring_CharT(static_cast<const char16_t*>(aStart), + static_cast<const char16_t*>(aEnd)) + { + } +#endif + + nsTDependentSubstring_CharT(const const_iterator& aStart, + const const_iterator& aEnd) + : substring_type(const_cast<char_type*>(aStart.get()), + uint32_t(aEnd.get() - aStart.get()), F_NONE) + { + } + + // Create a nsTDependentSubstring to be bound later + nsTDependentSubstring_CharT() + : substring_type() + { + } + + // auto-generated copy-constructor OK (XXX really?? what about base class copy-ctor?) + +private: + // NOT USED + void operator=(const self_type&); // we're immutable, you can't assign into a substring +}; + +inline const nsTDependentSubstring_CharT +Substring(const nsTSubstring_CharT& aStr, uint32_t aStartPos, + uint32_t aLength = uint32_t(-1)) +{ + return nsTDependentSubstring_CharT(aStr, aStartPos, aLength); +} + +inline const nsTDependentSubstring_CharT +Substring(const nsReadingIterator<CharT>& aStart, + const nsReadingIterator<CharT>& aEnd) +{ + return nsTDependentSubstring_CharT(aStart.get(), aEnd.get()); +} + +inline const nsTDependentSubstring_CharT +Substring(const CharT* aData, uint32_t aLength) +{ + return nsTDependentSubstring_CharT(aData, aLength); +} + +inline const nsTDependentSubstring_CharT +Substring(const CharT* aStart, const CharT* aEnd) +{ + return nsTDependentSubstring_CharT(aStart, aEnd); +} + +inline const nsTDependentSubstring_CharT +StringHead(const nsTSubstring_CharT& aStr, uint32_t aCount) +{ + return nsTDependentSubstring_CharT(aStr, 0, aCount); +} + +inline const nsTDependentSubstring_CharT +StringTail(const nsTSubstring_CharT& aStr, uint32_t aCount) +{ + return nsTDependentSubstring_CharT(aStr, aStr.Length() - aCount, aCount); +} diff --git a/xpcom/string/nsTLiteralString.h b/xpcom/string/nsTLiteralString.h new file mode 100644 index 000000000..fa75ba829 --- /dev/null +++ b/xpcom/string/nsTLiteralString.h @@ -0,0 +1,41 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +/** + * nsTLiteralString_CharT + * + * Stores a null-terminated, immutable sequence of characters. + * + * Subclass of nsTString that restricts string value to a literal + * character sequence. This class does not own its data. The data is + * assumed to be permanent. In practice this is true because this code + * is only usable by and for libxul. + */ +class nsTLiteralString_CharT : public nsTString_CharT +{ +public: + + typedef nsTLiteralString_CharT self_type; + +public: + + /** + * constructor + */ + + template<size_type N> + explicit nsTLiteralString_CharT(const char_type (&aStr)[N]) + : string_type(const_cast<char_type*>(aStr), N - 1, F_TERMINATED | F_LITERAL) + { + } + +private: + + // NOT TO BE IMPLEMENTED + template<size_type N> + nsTLiteralString_CharT(char_type (&aStr)[N]) = delete; +}; diff --git a/xpcom/string/nsTPromiseFlatString.cpp b/xpcom/string/nsTPromiseFlatString.cpp new file mode 100644 index 000000000..f02fc925c --- /dev/null +++ b/xpcom/string/nsTPromiseFlatString.cpp @@ -0,0 +1,18 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +void +nsTPromiseFlatString_CharT::Init(const substring_type& str) +{ + if (str.IsTerminated()) { + mData = const_cast<char_type*>(static_cast<const char_type*>(str.Data())); + mLength = str.Length(); + mFlags = str.mFlags & (F_TERMINATED | F_LITERAL); + // does not promote F_VOIDED + } else { + Assign(str); + } +} diff --git a/xpcom/string/nsTPromiseFlatString.h b/xpcom/string/nsTPromiseFlatString.h new file mode 100644 index 000000000..d71ce148d --- /dev/null +++ b/xpcom/string/nsTPromiseFlatString.h @@ -0,0 +1,112 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +/** + * NOTE: + * + * Try to avoid flat strings. |PromiseFlat[C]String| will help you as a last + * resort, and this may be necessary when dealing with legacy or OS calls, + * but in general, requiring a null-terminated array of characters kills many + * of the performance wins the string classes offer. Write your own code to + * use |nsA[C]String&|s for parameters. Write your string proccessing + * algorithms to exploit iterators. If you do this, you will benefit from + * being able to chain operations without copying or allocating and your code + * will be significantly more efficient. Remember, a function that takes an + * |const nsA[C]String&| can always be passed a raw character pointer by + * wrapping it (for free) in a |nsDependent[C]String|. But a function that + * takes a character pointer always has the potential to force allocation and + * copying. + * + * + * How to use it: + * + * A |nsPromiseFlat[C]String| doesn't necessarily own the characters it + * promises. You must never use it to promise characters out of a string + * with a shorter lifespan. The typical use will be something like this: + * + * SomeOSFunction( PromiseFlatCString(aCSubstring).get() ); // GOOD + * + * Here's a BAD use: + * + * const char* buffer = PromiseFlatCString(aCSubstring).get(); + * SomeOSFunction(buffer); // BAD!! |buffer| is a dangling pointer + * + * The only way to make one is with the function |PromiseFlat[C]String|, + * which produce a |const| instance. ``What if I need to keep a promise + * around for a little while?'' you might ask. In that case, you can keep a + * reference, like so: + * + * const nsCString& flat = PromiseFlatString(aCSubstring); + * // Temporaries usually die after the full expression containing the + * // expression that created the temporary is evaluated. But when a + * // temporary is assigned to a local reference, the temporary's lifetime + * // is extended to the reference's lifetime (C++11 [class.temporary]p5). + * // + * // This reference holds the anonymous temporary alive. But remember: it + * // must _still_ have a lifetime shorter than that of |aCSubstring|, and + * // |aCSubstring| must not be changed while the PromiseFlatString lives. + * + * SomeOSFunction(flat.get()); + * SomeOtherOSFunction(flat.get()); + * + * + * How does it work? + * + * A |nsPromiseFlat[C]String| is just a wrapper for another string. If you + * apply it to a string that happens to be flat, your promise is just a + * dependent reference to the string's data. If you apply it to a non-flat + * string, then a temporary flat string is created for you, by allocating and + * copying. In the event that you end up assigning the result into a sharing + * string (e.g., |nsTString|), the right thing happens. + */ + +class nsTPromiseFlatString_CharT : public nsTString_CharT +{ +public: + + typedef nsTPromiseFlatString_CharT self_type; + +private: + + void Init(const substring_type&); + + // NOT TO BE IMPLEMENTED + void operator=(const self_type&) = delete; + + // NOT TO BE IMPLEMENTED + nsTPromiseFlatString_CharT() = delete; + + // NOT TO BE IMPLEMENTED + nsTPromiseFlatString_CharT(const string_type& aStr) = delete; + +public: + + explicit + nsTPromiseFlatString_CharT(const substring_type& aStr) + : string_type() + { + Init(aStr); + } + + explicit + nsTPromiseFlatString_CharT(const substring_tuple_type& aTuple) + : string_type() + { + // nothing else to do here except assign the value of the tuple + // into ourselves. + Assign(aTuple); + } +}; + +// We template this so that the constructor is chosen based on the type of the +// parameter. This allows us to reject attempts to promise a flat flat string. +template<class T> +const nsTPromiseFlatString_CharT +TPromiseFlatString_CharT(const T& aString) +{ + return nsTPromiseFlatString_CharT(aString); +} diff --git a/xpcom/string/nsTString.cpp b/xpcom/string/nsTString.cpp new file mode 100644 index 000000000..13dd2628e --- /dev/null +++ b/xpcom/string/nsTString.cpp @@ -0,0 +1,47 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +nsTAdoptingString_CharT& +nsTAdoptingString_CharT::operator=(const self_type& str) +{ + // This'll violate the constness of this argument, that's just + // the nature of this class... + self_type* mutable_str = const_cast<self_type*>(&str); + + if (str.mFlags & F_OWNED) { + // We want to do what Adopt() does, but without actually incrementing + // the Adopt count. Note that we can be a little more straightforward + // about this than Adopt() is, because we know that str.mData is + // non-null. Should we be able to assert that str is not void here? + NS_ASSERTION(str.mData, "String with null mData?"); + Finalize(); + mData = str.mData; + mLength = str.mLength; + SetDataFlags(F_TERMINATED | F_OWNED); + + // Make str forget the buffer we just took ownership of. + new (mutable_str) self_type(); + } else { + Assign(str); + + mutable_str->Truncate(); + } + + return *this; +} + +void +nsTString_CharT::Rebind(const char_type* data, size_type length) +{ + // If we currently own a buffer, release it. + Finalize(); + + mData = const_cast<char_type*>(data); + mLength = length; + SetDataFlags(F_TERMINATED); + AssertValidDependentString(); +} + diff --git a/xpcom/string/nsTString.h b/xpcom/string/nsTString.h new file mode 100644 index 000000000..6fbb9d3ad --- /dev/null +++ b/xpcom/string/nsTString.h @@ -0,0 +1,883 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +/** + * This is the canonical null-terminated string class. All subclasses + * promise null-terminated storage. Instances of this class allocate + * strings on the heap. + * + * NAMES: + * nsString for wide characters + * nsCString for narrow characters + * + * This class is also known as nsAFlat[C]String, where "flat" is used + * to denote a null-terminated string. + */ +class nsTString_CharT : public nsTSubstring_CharT +{ +public: + + typedef nsTString_CharT self_type; + +public: + + /** + * constructors + */ + + nsTString_CharT() + : substring_type() + { + } + + explicit + nsTString_CharT(const char_type* aData, size_type aLength = size_type(-1)) + : substring_type() + { + Assign(aData, aLength); + } + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + explicit + nsTString_CharT(char16ptr_t aStr, size_type aLength = size_type(-1)) + : substring_type() + { + Assign(static_cast<const char16_t*>(aStr), aLength); + } +#endif + + nsTString_CharT(const self_type& aStr) + : substring_type() + { + Assign(aStr); + } + + MOZ_IMPLICIT nsTString_CharT(const substring_tuple_type& aTuple) + : substring_type() + { + Assign(aTuple); + } + + explicit + nsTString_CharT(const substring_type& aReadable) + : substring_type() + { + Assign(aReadable); + } + + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) + { + Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) + { + Assign(aData); + return *this; + } + self_type& operator=(const self_type& aStr) + { + Assign(aStr); + return *this; + } +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + self_type& operator=(const char16ptr_t aStr) + { + Assign(static_cast<const char16_t*>(aStr)); + return *this; + } +#endif + self_type& operator=(const substring_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) + { + Assign(aTuple); + return *this; + } + + /** + * returns the null-terminated string + */ + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + char16ptr_t get() const +#else + const char_type* get() const +#endif + { + return mData; + } + + + /** + * returns character at specified index. + * + * NOTE: unlike nsTSubstring::CharAt, this function allows you to index + * the null terminator character. + */ + + char_type CharAt(index_type aIndex) const + { + NS_ASSERTION(aIndex <= mLength, "index exceeds allowable range"); + return mData[aIndex]; + } + + char_type operator[](index_type aIndex) const + { + return CharAt(aIndex); + } + + +#if MOZ_STRING_WITH_OBSOLETE_API + + + /** + * Search for the given substring within this string. + * + * @param aString is substring to be sought in this + * @param aIgnoreCase selects case sensitivity + * @param aOffset tells us where in this string to start searching + * @param aCount tells us how far from the offset we are to search. Use + * -1 to search the whole string. + * @return offset in string, or kNotFound + */ + + int32_t Find(const nsCString& aString, bool aIgnoreCase = false, + int32_t aOffset = 0, int32_t aCount = -1) const; + int32_t Find(const char* aString, bool aIgnoreCase = false, + int32_t aOffset = 0, int32_t aCount = -1) const; + +#ifdef CharT_is_PRUnichar + int32_t Find(const nsAFlatString& aString, int32_t aOffset = 0, + int32_t aCount = -1) const; + int32_t Find(const char16_t* aString, int32_t aOffset = 0, + int32_t aCount = -1) const; +#ifdef MOZ_USE_CHAR16_WRAPPER + int32_t Find(char16ptr_t aString, int32_t aOffset = 0, + int32_t aCount = -1) const + { + return Find(static_cast<const char16_t*>(aString), aOffset, aCount); + } +#endif +#endif + + + /** + * This methods scans the string backwards, looking for the given string + * + * @param aString is substring to be sought in this + * @param aIgnoreCase tells us whether or not to do caseless compare + * @param aOffset tells us where in this string to start searching. + * Use -1 to search from the end of the string. + * @param aCount tells us how many iterations to make starting at the + * given offset. + * @return offset in string, or kNotFound + */ + + int32_t RFind(const nsCString& aString, bool aIgnoreCase = false, + int32_t aOffset = -1, int32_t aCount = -1) const; + int32_t RFind(const char* aCString, bool aIgnoreCase = false, + int32_t aOffset = -1, int32_t aCount = -1) const; + +#ifdef CharT_is_PRUnichar + int32_t RFind(const nsAFlatString& aString, int32_t aOffset = -1, + int32_t aCount = -1) const; + int32_t RFind(const char16_t* aString, int32_t aOffset = -1, + int32_t aCount = -1) const; +#endif + + + /** + * Search for given char within this string + * + * @param aChar is the character to search for + * @param aOffset tells us where in this string to start searching + * @param aCount tells us how far from the offset we are to search. + * Use -1 to search the whole string. + * @return offset in string, or kNotFound + */ + + // int32_t FindChar( char16_t aChar, int32_t aOffset=0, int32_t aCount=-1 ) const; + int32_t RFindChar(char16_t aChar, int32_t aOffset = -1, + int32_t aCount = -1) const; + + + /** + * This method searches this string for the first character found in + * the given string. + * + * @param aString contains set of chars to be found + * @param aOffset tells us where in this string to start searching + * (counting from left) + * @return offset in string, or kNotFound + */ + + int32_t FindCharInSet(const char* aString, int32_t aOffset = 0) const; + int32_t FindCharInSet(const self_type& aString, int32_t aOffset = 0) const + { + return FindCharInSet(aString.get(), aOffset); + } + +#ifdef CharT_is_PRUnichar + int32_t FindCharInSet(const char16_t* aString, int32_t aOffset = 0) const; +#endif + + + /** + * This method searches this string for the last character found in + * the given string. + * + * @param aString contains set of chars to be found + * @param aOffset tells us where in this string to start searching + * (counting from left) + * @return offset in string, or kNotFound + */ + + int32_t RFindCharInSet(const char_type* aString, int32_t aOffset = -1) const; + int32_t RFindCharInSet(const self_type& aString, int32_t aOffset = -1) const + { + return RFindCharInSet(aString.get(), aOffset); + } + + + /** + * Compares a given string to this string. + * + * @param aString is the string to be compared + * @param aIgnoreCase tells us how to treat case + * @param aCount tells us how many chars to compare + * @return -1,0,1 + */ + +#ifdef CharT_is_char + int32_t Compare(const char* aString, bool aIgnoreCase = false, + int32_t aCount = -1) const; +#endif + + + /** + * Equality check between given string and this string. + * + * @param aString is the string to check + * @param aIgnoreCase tells us how to treat case + * @param aCount tells us how many chars to compare + * @return boolean + */ +#ifdef CharT_is_char + bool EqualsIgnoreCase(const char* aString, int32_t aCount = -1) const + { + return Compare(aString, true, aCount) == 0; + } +#else + bool EqualsIgnoreCase(const char* aString, int32_t aCount = -1) const; + + +#endif // !CharT_is_PRUnichar + + /** + * Perform string to double-precision float conversion. + * + * @param aErrorCode will contain error if one occurs + * @return double-precision float rep of string value + */ + double ToDouble(nsresult* aErrorCode) const; + + /** + * Perform string to single-precision float conversion. + * + * @param aErrorCode will contain error if one occurs + * @return single-precision float rep of string value + */ + float ToFloat(nsresult* aErrorCode) const + { + return (float)ToDouble(aErrorCode); + } + + + /** + * Perform string to int conversion. + * @param aErrorCode will contain error if one occurs + * @param aRadix tells us which radix to assume; kAutoDetect tells us to determine the radix for you. + * @return int rep of string value, and possible (out) error code + */ + int32_t ToInteger(nsresult* aErrorCode, uint32_t aRadix = kRadix10) const; + + /** + * Perform string to 64-bit int conversion. + * @param aErrorCode will contain error if one occurs + * @param aRadix tells us which radix to assume; kAutoDetect tells us to determine the radix for you. + * @return 64-bit int rep of string value, and possible (out) error code + */ + int64_t ToInteger64(nsresult* aErrorCode, uint32_t aRadix = kRadix10) const; + + + /** + * |Left|, |Mid|, and |Right| are annoying signatures that seem better almost + * any _other_ way than they are now. Consider these alternatives + * + * aWritable = aReadable.Left(17); // ...a member function that returns a |Substring| + * aWritable = Left(aReadable, 17); // ...a global function that returns a |Substring| + * Left(aReadable, 17, aWritable); // ...a global function that does the assignment + * + * as opposed to the current signature + * + * aReadable.Left(aWritable, 17); // ...a member function that does the assignment + * + * or maybe just stamping them out in favor of |Substring|, they are just duplicate functionality + * + * aWritable = Substring(aReadable, 0, 17); + */ + + size_type Mid(self_type& aResult, uint32_t aStartPos, uint32_t aCount) const; + + size_type Left(self_type& aResult, size_type aCount) const + { + return Mid(aResult, 0, aCount); + } + + size_type Right(self_type& aResult, size_type aCount) const + { + aCount = XPCOM_MIN(mLength, aCount); + return Mid(aResult, mLength - aCount, aCount); + } + + + /** + * Set a char inside this string at given index + * + * @param aChar is the char you want to write into this string + * @param anIndex is the ofs where you want to write the given char + * @return TRUE if successful + */ + + bool SetCharAt(char16_t aChar, uint32_t aIndex); + + + /** + * These methods are used to remove all occurrences of the + * characters found in aSet from this string. + * + * @param aSet -- characters to be cut from this + */ + void StripChars(const char* aSet); + bool StripChars(const char* aSet, const fallible_t&); + + + /** + * This method strips whitespace throughout the string. + */ + void StripWhitespace(); + bool StripWhitespace(const fallible_t&); + + + /** + * swaps occurence of 1 string for another + */ + + void ReplaceChar(char_type aOldChar, char_type aNewChar); + void ReplaceChar(const char* aSet, char_type aNewChar); +#ifdef CharT_is_PRUnichar + void ReplaceChar(const char16_t* aSet, char16_t aNewChar); +#endif + /** + * Replace all occurrences of aTarget with aNewValue. + * The complexity of this function is O(n+m), n being the length of the string + * and m being the length of aNewValue. + */ + void ReplaceSubstring(const self_type& aTarget, const self_type& aNewValue); + void ReplaceSubstring(const char_type* aTarget, const char_type* aNewValue); + MOZ_MUST_USE bool ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue, + const fallible_t&); + MOZ_MUST_USE bool ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue, + const fallible_t&); + + + /** + * This method trims characters found in aTrimSet from + * either end of the underlying string. + * + * @param aSet -- contains chars to be trimmed from both ends + * @param aEliminateLeading + * @param aEliminateTrailing + * @param aIgnoreQuotes -- if true, causes surrounding quotes to be ignored + * @return this + */ + void Trim(const char* aSet, bool aEliminateLeading = true, + bool aEliminateTrailing = true, bool aIgnoreQuotes = false); + + /** + * This method strips whitespace from string. + * You can control whether whitespace is yanked from start and end of + * string as well. + * + * @param aEliminateLeading controls stripping of leading ws + * @param aEliminateTrailing controls stripping of trailing ws + */ + void CompressWhitespace(bool aEliminateLeading = true, + bool aEliminateTrailing = true); + + + /** + * assign/append/insert with _LOSSY_ conversion + */ + + void AssignWithConversion(const nsTAString_IncompatibleCharT& aString); + void AssignWithConversion(const incompatible_char_type* aData, + int32_t aLength = -1); + +#endif // !MOZ_STRING_WITH_OBSOLETE_API + + /** + * Allow this string to be bound to a character buffer + * until the string is rebound or mutated; the caller + * must ensure that the buffer outlives the string. + */ + void Rebind(const char_type* aData, size_type aLength); + + /** + * verify restrictions for dependent strings + */ + void AssertValidDependentString() + { + NS_ASSERTION(mData, "nsTDependentString must wrap a non-NULL buffer"); + NS_ASSERTION(mLength != size_type(-1), "nsTDependentString has bogus length"); + NS_ASSERTION(mData[mLength] == 0, + "nsTDependentString must wrap only null-terminated strings. " + "You are probably looking for nsTDependentSubstring."); + } + + +protected: + + explicit + nsTString_CharT(uint32_t aFlags) + : substring_type(aFlags) + { + } + + // allow subclasses to initialize fields directly + nsTString_CharT(char_type* aData, size_type aLength, uint32_t aFlags) + : substring_type(aData, aLength, aFlags) + { + } + + struct Segment { + uint32_t mBegin, mLength; + Segment(uint32_t aBegin, uint32_t aLength) + : mBegin(aBegin) + , mLength(aLength) + {} + }; +}; + + +class nsTFixedString_CharT : public nsTString_CharT +{ +public: + + typedef nsTFixedString_CharT self_type; + typedef nsTFixedString_CharT fixed_string_type; + +public: + + /** + * @param aData + * fixed-size buffer to be used by the string (the contents of + * this buffer may be modified by the string) + * @param aStorageSize + * the size of the fixed buffer + * @param aLength (optional) + * the length of the string already contained in the buffer + */ + + nsTFixedString_CharT(char_type* aData, size_type aStorageSize) + : string_type(aData, uint32_t(char_traits::length(aData)), + F_TERMINATED | F_FIXED | F_CLASS_FIXED) + , mFixedCapacity(aStorageSize - 1) + , mFixedBuf(aData) + { + } + + nsTFixedString_CharT(char_type* aData, size_type aStorageSize, + size_type aLength) + : string_type(aData, aLength, F_TERMINATED | F_FIXED | F_CLASS_FIXED) + , mFixedCapacity(aStorageSize - 1) + , mFixedBuf(aData) + { + // null-terminate + mFixedBuf[aLength] = char_type(0); + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) + { + Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) + { + Assign(aData); + return *this; + } + self_type& operator=(const substring_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) + { + Assign(aTuple); + return *this; + } + +protected: + + friend class nsTSubstring_CharT; + + size_type mFixedCapacity; + char_type* mFixedBuf; +}; + + +/** + * nsTAutoString_CharT + * + * Subclass of nsTString_CharT that adds support for stack-based string + * allocation. It is normally not a good idea to use this class on the + * heap, because it will allocate space which may be wasted if the string + * it contains is significantly smaller or any larger than 64 characters. + * + * NAMES: + * nsAutoString for wide characters + * nsAutoCString for narrow characters + */ +class MOZ_NON_MEMMOVABLE nsTAutoString_CharT : public nsTFixedString_CharT +{ +public: + + typedef nsTAutoString_CharT self_type; + +public: + + /** + * constructors + */ + + nsTAutoString_CharT() + : fixed_string_type(mStorage, kDefaultStorageSize, 0) + { + } + + explicit + nsTAutoString_CharT(char_type aChar) + : fixed_string_type(mStorage, kDefaultStorageSize, 0) + { + Assign(aChar); + } + + explicit + nsTAutoString_CharT(const char_type* aData, size_type aLength = size_type(-1)) + : fixed_string_type(mStorage, kDefaultStorageSize, 0) + { + Assign(aData, aLength); + } + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + explicit + nsTAutoString_CharT(char16ptr_t aData, size_type aLength = size_type(-1)) + : nsTAutoString_CharT(static_cast<const char16_t*>(aData), aLength) + { + } +#endif + + nsTAutoString_CharT(const self_type& aStr) + : fixed_string_type(mStorage, kDefaultStorageSize, 0) + { + Assign(aStr); + } + + explicit + nsTAutoString_CharT(const substring_type& aStr) + : fixed_string_type(mStorage, kDefaultStorageSize, 0) + { + Assign(aStr); + } + + MOZ_IMPLICIT nsTAutoString_CharT(const substring_tuple_type& aTuple) + : fixed_string_type(mStorage, kDefaultStorageSize, 0) + { + Assign(aTuple); + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) + { + Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) + { + Assign(aData); + return *this; + } +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + self_type& operator=(char16ptr_t aStr) + { + Assign(aStr); + return *this; + } +#endif + self_type& operator=(const self_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) + { + Assign(aTuple); + return *this; + } + + enum + { + kDefaultStorageSize = 64 + }; + +private: + + char_type mStorage[kDefaultStorageSize]; +}; + + +// +// nsAutoString stores pointers into itself which are invalidated when an +// nsTArray is resized, so nsTArray must not be instantiated with nsAutoString +// elements! +// +template<class E> class nsTArrayElementTraits; +template<> +class nsTArrayElementTraits<nsTAutoString_CharT> +{ +public: + template<class A> struct Dont_Instantiate_nsTArray_of; + template<class A> struct Instead_Use_nsTArray_of; + + static Dont_Instantiate_nsTArray_of<nsTAutoString_CharT>* + Construct(Instead_Use_nsTArray_of<nsTString_CharT>* aE) + { + return 0; + } + template<class A> + static Dont_Instantiate_nsTArray_of<nsTAutoString_CharT>* + Construct(Instead_Use_nsTArray_of<nsTString_CharT>* aE, const A& aArg) + { + return 0; + } + static Dont_Instantiate_nsTArray_of<nsTAutoString_CharT>* + Destruct(Instead_Use_nsTArray_of<nsTString_CharT>* aE) + { + return 0; + } +}; + +/** + * nsTXPIDLString extends nsTString such that: + * + * (1) mData can be null + * (2) objects of this type can be automatically cast to |const CharT*| + * (3) getter_Copies method is supported to adopt data allocated with + * moz_xmalloc, such as "out string" parameters in XPIDL. + * + * NAMES: + * nsXPIDLString for wide characters + * nsXPIDLCString for narrow characters + */ +class nsTXPIDLString_CharT : public nsTString_CharT +{ +public: + + typedef nsTXPIDLString_CharT self_type; + +public: + + nsTXPIDLString_CharT() + : string_type(char_traits::sEmptyBuffer, 0, F_TERMINATED | F_VOIDED) + { + } + + // copy-constructor required to avoid default + nsTXPIDLString_CharT(const self_type& aStr) + : string_type(char_traits::sEmptyBuffer, 0, F_TERMINATED | F_VOIDED) + { + Assign(aStr); + } + + // return nullptr if we are voided +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + char16ptr_t get() const +#else + const char_type* get() const +#endif + { + return (mFlags & F_VOIDED) ? nullptr : mData; + } + + // this case operator is the reason why this class cannot just be a + // typedef for nsTString + operator const char_type*() const + { + return get(); + } + + // need this to diambiguous operator[int] + char_type operator[](int32_t aIndex) const + { + return CharAt(index_type(aIndex)); + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(char_type aChar) + { + Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const self_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) + { + Assign(aTuple); + return *this; + } +}; + + +/** + * getter_Copies support for use with raw string out params: + * + * NS_IMETHOD GetBlah(char**); + * + * void some_function() + * { + * nsXPIDLCString blah; + * GetBlah(getter_Copies(blah)); + * // ... + * } + */ +class MOZ_STACK_CLASS nsTGetterCopies_CharT +{ +public: + typedef CharT char_type; + + explicit nsTGetterCopies_CharT(nsTSubstring_CharT& aStr) + : mString(aStr) + , mData(nullptr) + { + } + + ~nsTGetterCopies_CharT() + { + mString.Adopt(mData); // OK if mData is null + } + + operator char_type**() + { + return &mData; + } + +private: + nsTSubstring_CharT& mString; + char_type* mData; +}; + +inline nsTGetterCopies_CharT +getter_Copies(nsTSubstring_CharT& aString) +{ + return nsTGetterCopies_CharT(aString); +} + + +/** + * nsTAdoptingString extends nsTXPIDLString such that: + * + * (1) Adopt given string on construction or assignment, i.e. take + * the value of what's given, and make what's given forget its + * value. Note that this class violates constness in a few + * places. Be careful! + */ +class nsTAdoptingString_CharT : public nsTXPIDLString_CharT +{ +public: + + typedef nsTAdoptingString_CharT self_type; + +public: + + explicit nsTAdoptingString_CharT() + { + } + explicit nsTAdoptingString_CharT(char_type* aStr, + size_type aLength = size_type(-1)) + { + Adopt(aStr, aLength); + } + + // copy-constructor required to adopt on copy. Note that this + // will violate the constness of |aStr| in the operator=() + // call. |aStr| will be truncated as a side-effect of this + // constructor. + nsTAdoptingString_CharT(const self_type& aStr) + { + *this = aStr; + } + + // |operator=| does not inherit, so we must define our own + self_type& operator=(const substring_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) + { + Assign(aTuple); + return *this; + } + + // Adopt(), if possible, when assigning to a self_type&. Note + // that this violates the constness of aStr, aStr is always + // truncated when this operator is called. + self_type& operator=(const self_type& aStr); + +private: + self_type& operator=(const char_type* aData) = delete; + self_type& operator=(char_type* aData) = delete; +}; + diff --git a/xpcom/string/nsTStringComparator.cpp b/xpcom/string/nsTStringComparator.cpp new file mode 100644 index 000000000..3f383c65f --- /dev/null +++ b/xpcom/string/nsTStringComparator.cpp @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +int NS_FASTCALL +Compare(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs, + const nsTStringComparator_CharT& comp) +{ + typedef nsTSubstring_CharT::size_type size_type; + + if (&aLhs == &aRhs) { + return 0; + } + + nsTSubstring_CharT::const_iterator leftIter, rightIter; + aLhs.BeginReading(leftIter); + aRhs.BeginReading(rightIter); + + size_type lLength = aLhs.Length(); + size_type rLength = aRhs.Length(); + size_type lengthToCompare = XPCOM_MIN(lLength, rLength); + + int result; + if ((result = comp(leftIter.get(), rightIter.get(), + lengthToCompare, lengthToCompare)) == 0) { + if (lLength < rLength) { + result = -1; + } else if (rLength < lLength) { + result = 1; + } else { + result = 0; + } + } + + return result; +} + +int +nsTDefaultStringComparator_CharT::operator()(const char_type* aLhs, + const char_type* aRhs, + uint32_t aLLength, + uint32_t aRLength) const +{ + return + aLLength == aRLength ? nsCharTraits<CharT>::compare(aLhs, aRhs, aLLength) : + (aLLength > aRLength) ? 1 : -1; +} diff --git a/xpcom/string/nsTStringObsolete.cpp b/xpcom/string/nsTStringObsolete.cpp new file mode 100644 index 000000000..5a47ca310 --- /dev/null +++ b/xpcom/string/nsTStringObsolete.cpp @@ -0,0 +1,700 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsTArray.h" + +/** + * nsTString::Find + * + * aOffset specifies starting index + * aCount specifies number of string compares (iterations) + */ + +int32_t +nsTString_CharT::Find( const nsCString& aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const +{ + // this method changes the meaning of aOffset and aCount: + Find_ComputeSearchRange(mLength, aString.Length(), aOffset, aCount); + + int32_t result = FindSubstring(mData + aOffset, aCount, aString.get(), aString.Length(), aIgnoreCase); + if (result != kNotFound) + result += aOffset; + return result; +} + +int32_t +nsTString_CharT::Find( const char* aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const +{ + return Find(nsDependentCString(aString), aIgnoreCase, aOffset, aCount); +} + + +/** + * nsTString::RFind + * + * aOffset specifies starting index + * aCount specifies number of string compares (iterations) + */ + +int32_t +nsTString_CharT::RFind( const nsCString& aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const +{ + // this method changes the meaning of aOffset and aCount: + RFind_ComputeSearchRange(mLength, aString.Length(), aOffset, aCount); + + int32_t result = RFindSubstring(mData + aOffset, aCount, aString.get(), aString.Length(), aIgnoreCase); + if (result != kNotFound) + result += aOffset; + return result; +} + +int32_t +nsTString_CharT::RFind( const char* aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const +{ + return RFind(nsDependentCString(aString), aIgnoreCase, aOffset, aCount); +} + + +/** + * nsTString::RFindChar + */ + +int32_t +nsTString_CharT::RFindChar( char16_t aChar, int32_t aOffset, int32_t aCount) const +{ + return nsBufferRoutines<CharT>::rfind_char(mData, mLength, aOffset, aChar, aCount); +} + + +/** + * nsTString::FindCharInSet + */ + +int32_t +nsTString_CharT::FindCharInSet( const char* aSet, int32_t aOffset ) const +{ + if (aOffset < 0) + aOffset = 0; + else if (aOffset >= int32_t(mLength)) + return kNotFound; + + int32_t result = ::FindCharInSet(mData + aOffset, mLength - aOffset, aSet); + if (result != kNotFound) + result += aOffset; + return result; +} + + +/** + * nsTString::RFindCharInSet + */ + +int32_t +nsTString_CharT::RFindCharInSet( const CharT* aSet, int32_t aOffset ) const +{ + // We want to pass a "data length" to ::RFindCharInSet + if (aOffset < 0 || aOffset > int32_t(mLength)) + aOffset = mLength; + else + ++aOffset; + + return ::RFindCharInSet(mData, aOffset, aSet); +} + + +// it's a shame to replicate this code. it was done this way in the past +// to help performance. this function also gets to keep the rickg style +// indentation :-/ +int32_t +nsTString_CharT::ToInteger( nsresult* aErrorCode, uint32_t aRadix ) const +{ + CharT* cp=mData; + int32_t theRadix=10; // base 10 unless base 16 detected, or overriden (aRadix != kAutoDetect) + int32_t result=0; + bool negate=false; + CharT theChar=0; + + //initial value, override if we find an integer + *aErrorCode=NS_ERROR_ILLEGAL_VALUE; + + if(cp) { + + //begin by skipping over leading chars that shouldn't be part of the number... + + CharT* endcp=cp+mLength; + bool done=false; + + while((cp<endcp) && (!done)){ + switch(*cp++) { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + theRadix=16; + done=true; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + done=true; + break; + case '-': + negate=true; //fall through... + break; + case 'X': case 'x': + theRadix=16; + break; + default: + break; + } //switch + } + + if (done) { + + //integer found + *aErrorCode = NS_OK; + + if (aRadix!=kAutoDetect) theRadix = aRadix; // override + + //now iterate the numeric chars and build our result + CharT* first=--cp; //in case we have to back up. + bool haveValue = false; + + while(cp<endcp){ + int32_t oldresult = result; + + theChar=*cp++; + if(('0'<=theChar) && (theChar<='9')){ + result = (theRadix * result) + (theChar-'0'); + haveValue = true; + } + else if((theChar>='A') && (theChar<='F')) { + if(10==theRadix) { + if(kAutoDetect==aRadix){ + theRadix=16; + cp=first; //backup + result=0; + haveValue = false; + } + else { + *aErrorCode=NS_ERROR_ILLEGAL_VALUE; + result=0; + break; + } + } + else { + result = (theRadix * result) + ((theChar-'A')+10); + haveValue = true; + } + } + else if((theChar>='a') && (theChar<='f')) { + if(10==theRadix) { + if(kAutoDetect==aRadix){ + theRadix=16; + cp=first; //backup + result=0; + haveValue = false; + } + else { + *aErrorCode=NS_ERROR_ILLEGAL_VALUE; + result=0; + break; + } + } + else { + result = (theRadix * result) + ((theChar-'a')+10); + haveValue = true; + } + } + else if((('X'==theChar) || ('x'==theChar)) && (!haveValue || result == 0)) { + continue; + } + else if((('#'==theChar) || ('+'==theChar)) && !haveValue) { + continue; + } + else { + //we've encountered a char that's not a legal number or sign + break; + } + + if (result < oldresult) { + // overflow! + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + result = 0; + break; + } + } //while + if(negate) + result=-result; + } //if + } + return result; +} + + +/** + * nsTString::ToInteger64 + */ +int64_t +nsTString_CharT::ToInteger64( nsresult* aErrorCode, uint32_t aRadix ) const +{ + CharT* cp=mData; + int32_t theRadix=10; // base 10 unless base 16 detected, or overriden (aRadix != kAutoDetect) + int64_t result=0; + bool negate=false; + CharT theChar=0; + + //initial value, override if we find an integer + *aErrorCode=NS_ERROR_ILLEGAL_VALUE; + + if(cp) { + + //begin by skipping over leading chars that shouldn't be part of the number... + + CharT* endcp=cp+mLength; + bool done=false; + + while((cp<endcp) && (!done)){ + switch(*cp++) { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + theRadix=16; + done=true; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + done=true; + break; + case '-': + negate=true; //fall through... + break; + case 'X': case 'x': + theRadix=16; + break; + default: + break; + } //switch + } + + if (done) { + + //integer found + *aErrorCode = NS_OK; + + if (aRadix!=kAutoDetect) theRadix = aRadix; // override + + //now iterate the numeric chars and build our result + CharT* first=--cp; //in case we have to back up. + bool haveValue = false; + + while(cp<endcp){ + int64_t oldresult = result; + + theChar=*cp++; + if(('0'<=theChar) && (theChar<='9')){ + result = (theRadix * result) + (theChar-'0'); + haveValue = true; + } + else if((theChar>='A') && (theChar<='F')) { + if(10==theRadix) { + if(kAutoDetect==aRadix){ + theRadix=16; + cp=first; //backup + result=0; + haveValue = false; + } + else { + *aErrorCode=NS_ERROR_ILLEGAL_VALUE; + result=0; + break; + } + } + else { + result = (theRadix * result) + ((theChar-'A')+10); + haveValue = true; + } + } + else if((theChar>='a') && (theChar<='f')) { + if(10==theRadix) { + if(kAutoDetect==aRadix){ + theRadix=16; + cp=first; //backup + result=0; + haveValue = false; + } + else { + *aErrorCode=NS_ERROR_ILLEGAL_VALUE; + result=0; + break; + } + } + else { + result = (theRadix * result) + ((theChar-'a')+10); + haveValue = true; + } + } + else if((('X'==theChar) || ('x'==theChar)) && (!haveValue || result == 0)) { + continue; + } + else if((('#'==theChar) || ('+'==theChar)) && !haveValue) { + continue; + } + else { + //we've encountered a char that's not a legal number or sign + break; + } + + if (result < oldresult) { + // overflow! + *aErrorCode = NS_ERROR_ILLEGAL_VALUE; + result = 0; + break; + } + } //while + if(negate) + result=-result; + } //if + } + return result; +} + + +/** + * nsTString::Mid + */ + +uint32_t +nsTString_CharT::Mid( self_type& aResult, index_type aStartPos, size_type aLengthToCopy ) const +{ + if (aStartPos == 0 && aLengthToCopy >= mLength) + aResult = *this; + else + aResult = Substring(*this, aStartPos, aLengthToCopy); + + return aResult.mLength; +} + + +/** + * nsTString::SetCharAt + */ + +bool +nsTString_CharT::SetCharAt( char16_t aChar, uint32_t aIndex ) +{ + if (aIndex >= mLength) + return false; + + if (!EnsureMutable()) + AllocFailed(mLength); + + mData[aIndex] = CharT(aChar); + return true; +} + + +/** + * nsTString::StripChars,StripChar,StripWhitespace + */ + +void +nsTString_CharT::StripChars( const char* aSet ) +{ + if (!EnsureMutable()) + AllocFailed(mLength); + + mLength = nsBufferRoutines<CharT>::strip_chars(mData, mLength, aSet); +} + +bool +nsTString_CharT::StripChars( const char* aSet, const fallible_t& ) +{ + if (!EnsureMutable()) { + return false; + } + + mLength = nsBufferRoutines<CharT>::strip_chars(mData, mLength, aSet); + return true; +} + +void +nsTString_CharT::StripWhitespace() +{ + StripChars(kWhitespace); +} + +bool +nsTString_CharT::StripWhitespace(const fallible_t& aFallible) +{ + return StripChars(kWhitespace, aFallible); +} + +/** + * nsTString::ReplaceChar,ReplaceSubstring + */ + +void +nsTString_CharT::ReplaceChar( char_type aOldChar, char_type aNewChar ) +{ + if (!EnsureMutable()) // XXX do this lazily? + AllocFailed(mLength); + + for (uint32_t i=0; i<mLength; ++i) + { + if (mData[i] == aOldChar) + mData[i] = aNewChar; + } +} + +void +nsTString_CharT::ReplaceChar( const char* aSet, char_type aNewChar ) +{ + if (!EnsureMutable()) // XXX do this lazily? + AllocFailed(mLength); + + char_type* data = mData; + uint32_t lenRemaining = mLength; + + while (lenRemaining) + { + int32_t i = ::FindCharInSet(data, lenRemaining, aSet); + if (i == kNotFound) + break; + + data[i++] = aNewChar; + data += i; + lenRemaining -= i; + } +} + +void ReleaseData(void* aData, uint32_t aFlags); + +void +nsTString_CharT::ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue) +{ + ReplaceSubstring(nsTDependentString_CharT(aTarget), + nsTDependentString_CharT(aNewValue)); +} + +bool +nsTString_CharT::ReplaceSubstring(const char_type* aTarget, + const char_type* aNewValue, + const fallible_t& aFallible) +{ + return ReplaceSubstring(nsTDependentString_CharT(aTarget), + nsTDependentString_CharT(aNewValue), + aFallible); +} + +void +nsTString_CharT::ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue) +{ + if (!ReplaceSubstring(aTarget, aNewValue, mozilla::fallible)) { + // Note that this may wildly underestimate the allocation that failed, as + // we could have been replacing multiple copies of aTarget. + AllocFailed(mLength + (aNewValue.Length() - aTarget.Length())); + } +} + +bool +nsTString_CharT::ReplaceSubstring(const self_type& aTarget, + const self_type& aNewValue, + const fallible_t&) +{ + if (aTarget.Length() == 0) + return true; + + // Remember all of the non-matching parts. + AutoTArray<Segment, 16> nonMatching; + uint32_t i = 0; + uint32_t newLength = 0; + while (true) + { + int32_t r = FindSubstring(mData + i, mLength - i, static_cast<const char_type*>(aTarget.Data()), aTarget.Length(), false); + int32_t until = (r == kNotFound) ? mLength - i : r; + nonMatching.AppendElement(Segment(i, until)); + newLength += until; + if (r == kNotFound) { + break; + } + + newLength += aNewValue.Length(); + i += r + aTarget.Length(); + if (i >= mLength) { + // Add an auxiliary entry at the end of the list to help as an edge case + // for the algorithms below. + nonMatching.AppendElement(Segment(mLength, 0)); + break; + } + } + + // If there's only one non-matching segment, then the target string was not + // found, and there's nothing to do. + if (nonMatching.Length() == 1) { + MOZ_ASSERT(nonMatching[0].mBegin == 0 && nonMatching[0].mLength == mLength, + "We should have the correct non-matching segment."); + return true; + } + + // Make sure that we can mutate our buffer. + // Note that we always allocate at least an mLength sized buffer, because the + // rest of the algorithm relies on having access to all of the original + // string. In other words, we over-allocate in the shrinking case. + char_type* oldData; + uint32_t oldFlags; + if (!MutatePrep(XPCOM_MAX(mLength, newLength), &oldData, &oldFlags)) + return false; + if (oldData) { + // Copy all of the old data to the new buffer. + char_traits::copy(mData, oldData, mLength); + ::ReleaseData(oldData, oldFlags); + } + + if (aTarget.Length() >= aNewValue.Length()) { + // In the shrinking case, start filling the buffer from the beginning. + const uint32_t delta = (aTarget.Length() - aNewValue.Length()); + for (i = 1; i < nonMatching.Length(); ++i) { + // When we move the i'th non-matching segment into position, we need to + // account for the characters deleted by the previous |i| replacements by + // subtracting |i * delta|. + const char_type* sourceSegmentPtr = mData + nonMatching[i].mBegin; + char_type* destinationSegmentPtr = mData + nonMatching[i].mBegin - i * delta; + // Write the i'th replacement immediately before the new i'th non-matching + // segment. + char_traits::copy(destinationSegmentPtr - aNewValue.Length(), + aNewValue.Data(), aNewValue.Length()); + char_traits::move(destinationSegmentPtr, sourceSegmentPtr, + nonMatching[i].mLength); + } + } else { + // In the growing case, start filling the buffer from the end. + const uint32_t delta = (aNewValue.Length() - aTarget.Length()); + for (i = nonMatching.Length() - 1; i > 0; --i) { + // When we move the i'th non-matching segment into position, we need to + // account for the characters added by the previous |i| replacements by + // adding |i * delta|. + const char_type* sourceSegmentPtr = mData + nonMatching[i].mBegin; + char_type* destinationSegmentPtr = mData + nonMatching[i].mBegin + i * delta; + char_traits::move(destinationSegmentPtr, sourceSegmentPtr, + nonMatching[i].mLength); + // Write the i'th replacement immediately before the new i'th non-matching + // segment. + char_traits::copy(destinationSegmentPtr - aNewValue.Length(), + aNewValue.Data(), aNewValue.Length()); + } + } + + // Adjust the length and make sure the string is null terminated. + mLength = newLength; + mData[mLength] = char_type(0); + + return true; +} + +/** + * nsTString::Trim + */ + +void +nsTString_CharT::Trim( const char* aSet, bool aTrimLeading, bool aTrimTrailing, bool aIgnoreQuotes ) +{ + // the old implementation worried about aSet being null :-/ + if (!aSet) + return; + + char_type* start = mData; + char_type* end = mData + mLength; + + // skip over quotes if requested + if (aIgnoreQuotes && mLength > 2 && mData[0] == mData[mLength - 1] && + (mData[0] == '\'' || mData[0] == '"')) + { + ++start; + --end; + } + + uint32_t setLen = nsCharTraits<char>::length(aSet); + + if (aTrimLeading) + { + uint32_t cutStart = start - mData; + uint32_t cutLength = 0; + + // walk forward from start to end + for (; start != end; ++start, ++cutLength) + { + int32_t pos = FindChar1(aSet, setLen, 0, *start, setLen); + if (pos == kNotFound) + break; + } + + if (cutLength) + { + Cut(cutStart, cutLength); + + // reset iterators + start = mData + cutStart; + end = mData + mLength - cutStart; + } + } + + if (aTrimTrailing) + { + uint32_t cutEnd = end - mData; + uint32_t cutLength = 0; + + // walk backward from end to start + --end; + for (; end >= start; --end, ++cutLength) + { + int32_t pos = FindChar1(aSet, setLen, 0, *end, setLen); + if (pos == kNotFound) + break; + } + + if (cutLength) + Cut(cutEnd - cutLength, cutLength); + } +} + + +/** + * nsTString::CompressWhitespace + */ + +void +nsTString_CharT::CompressWhitespace( bool aTrimLeading, bool aTrimTrailing ) +{ + const char* set = kWhitespace; + + ReplaceChar(set, ' '); + Trim(set, aTrimLeading, aTrimTrailing); + + // this one does some questionable fu... just copying the old code! + mLength = nsBufferRoutines<char_type>::compress_chars(mData, mLength, set); +} + + +/** + * nsTString::AssignWithConversion + */ + +void +nsTString_CharT::AssignWithConversion( const incompatible_char_type* aData, int32_t aLength ) +{ + // for compatibility with the old string implementation, we need to allow + // for a nullptr input buffer :-( + if (!aData) + { + Truncate(); + } + else + { + if (aLength < 0) + aLength = nsCharTraits<incompatible_char_type>::length(aData); + + AssignWithConversion(Substring(aData, aLength)); + } +} diff --git a/xpcom/string/nsTSubstring.cpp b/xpcom/string/nsTSubstring.cpp new file mode 100644 index 000000000..a3a830b9d --- /dev/null +++ b/xpcom/string/nsTSubstring.cpp @@ -0,0 +1,1089 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/CheckedInt.h" +#include "mozilla/double-conversion.h" +#include "mozilla/MemoryReporting.h" + +using double_conversion::DoubleToStringConverter; + +const nsTSubstring_CharT::size_type nsTSubstring_CharT::kMaxCapacity = + (nsTSubstring_CharT::size_type(-1) / + 2 - sizeof(nsStringBuffer)) / + sizeof(nsTSubstring_CharT::char_type) - 2; + +#ifdef XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE +nsTSubstring_CharT::nsTSubstring_CharT(char_type* aData, size_type aLength, + uint32_t aFlags) + : mData(aData), + mLength(aLength), + mFlags(aFlags) +{ + MOZ_RELEASE_ASSERT(CheckCapacity(aLength), "String is too large."); + + if (aFlags & F_OWNED) { + STRING_STAT_INCREMENT(Adopt); + MOZ_LOG_CTOR(mData, "StringAdopt", 1); + } +} +#endif /* XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE */ + +/** + * helper function for down-casting a nsTSubstring to a nsTFixedString. + */ +inline const nsTFixedString_CharT* +AsFixedString(const nsTSubstring_CharT* aStr) +{ + return static_cast<const nsTFixedString_CharT*>(aStr); +} + + +/** + * this function is called to prepare mData for writing. the given capacity + * indicates the required minimum storage size for mData, in sizeof(char_type) + * increments. this function returns true if the operation succeeds. it also + * returns the old data and old flags members if mData is newly allocated. + * the old data must be released by the caller. + */ +bool +nsTSubstring_CharT::MutatePrep(size_type aCapacity, char_type** aOldData, + uint32_t* aOldFlags) +{ + // initialize to no old data + *aOldData = nullptr; + *aOldFlags = 0; + + size_type curCapacity = Capacity(); + + // If |aCapacity > kMaxCapacity|, then our doubling algorithm may not be + // able to allocate it. Just bail out in cases like that. We don't want + // to be allocating 2GB+ strings anyway. + static_assert((sizeof(nsStringBuffer) & 0x1) == 0, + "bad size for nsStringBuffer"); + if (!CheckCapacity(aCapacity)) { + return false; + } + + // |curCapacity == 0| means that the buffer is immutable or 0-sized, so we + // need to allocate a new buffer. We cannot use the existing buffer even + // though it might be large enough. + + if (curCapacity != 0) { + if (aCapacity <= curCapacity) { + mFlags &= ~F_VOIDED; // mutation clears voided flag + return true; + } + } + + if (curCapacity < aCapacity) { + // We increase our capacity so that the allocated buffer grows + // exponentially, which gives us amortized O(1) appending. Below the + // threshold, we use powers-of-two. Above the threshold, we grow by at + // least 1.125, rounding up to the nearest MiB. + const size_type slowGrowthThreshold = 8 * 1024 * 1024; + + // nsStringBuffer allocates sizeof(nsStringBuffer) + passed size, and + // storageSize below wants extra 1 * sizeof(char_type). + const size_type neededExtraSpace = + sizeof(nsStringBuffer) / sizeof(char_type) + 1; + + size_type temp; + if (aCapacity >= slowGrowthThreshold) { + size_type minNewCapacity = curCapacity + (curCapacity >> 3); // multiply by 1.125 + temp = XPCOM_MAX(aCapacity, minNewCapacity) + neededExtraSpace; + + // Round up to the next multiple of MiB, but ensure the expected + // capacity doesn't include the extra space required by nsStringBuffer + // and null-termination. + const size_t MiB = 1 << 20; + temp = (MiB * ((temp + MiB - 1) / MiB)) - neededExtraSpace; + } else { + // Round up to the next power of two. + temp = + mozilla::RoundUpPow2(aCapacity + neededExtraSpace) - neededExtraSpace; + } + + MOZ_ASSERT(XPCOM_MIN(temp, kMaxCapacity) >= aCapacity, + "should have hit the early return at the top"); + aCapacity = XPCOM_MIN(temp, kMaxCapacity); + } + + // + // several cases: + // + // (1) we have a shared buffer (mFlags & F_SHARED) + // (2) we have an owned buffer (mFlags & F_OWNED) + // (3) we have a fixed buffer (mFlags & F_FIXED) + // (4) we have a readonly buffer + // + // requiring that we in some cases preserve the data before creating + // a new buffer complicates things just a bit ;-) + // + + size_type storageSize = (aCapacity + 1) * sizeof(char_type); + + // case #1 + if (mFlags & F_SHARED) { + nsStringBuffer* hdr = nsStringBuffer::FromData(mData); + if (!hdr->IsReadonly()) { + nsStringBuffer* newHdr = nsStringBuffer::Realloc(hdr, storageSize); + if (!newHdr) { + return false; // out-of-memory (original header left intact) + } + + hdr = newHdr; + mData = (char_type*)hdr->Data(); + mFlags &= ~F_VOIDED; // mutation clears voided flag + return true; + } + } + + char_type* newData; + uint32_t newDataFlags; + + // if we have a fixed buffer of sufficient size, then use it. this helps + // avoid heap allocations. + if ((mFlags & F_CLASS_FIXED) && + (aCapacity < AsFixedString(this)->mFixedCapacity)) { + newData = AsFixedString(this)->mFixedBuf; + newDataFlags = F_TERMINATED | F_FIXED; + } else { + // if we reach here then, we must allocate a new buffer. we cannot + // make use of our F_OWNED or F_FIXED buffers because they are not + // large enough. + + nsStringBuffer* newHdr = + nsStringBuffer::Alloc(storageSize).take(); + if (!newHdr) { + return false; // we are still in a consistent state + } + + newData = (char_type*)newHdr->Data(); + newDataFlags = F_TERMINATED | F_SHARED; + } + + // save old data and flags + *aOldData = mData; + *aOldFlags = mFlags; + + mData = newData; + SetDataFlags(newDataFlags); + + // mLength does not change + + // though we are not necessarily terminated at the moment, now is probably + // still the best time to set F_TERMINATED. + + return true; +} + +void +nsTSubstring_CharT::Finalize() +{ + ::ReleaseData(mData, mFlags); + // mData, mLength, and mFlags are purposefully left dangling +} + +bool +nsTSubstring_CharT::ReplacePrep(index_type aCutStart, + size_type aCutLength, + size_type aNewLength) +{ + aCutLength = XPCOM_MIN(aCutLength, mLength - aCutStart); + + mozilla::CheckedInt<size_type> newTotalLen = mLength; + newTotalLen += aNewLength; + newTotalLen -= aCutLength; + if (!newTotalLen.isValid()) { + return false; + } + + if (aCutStart == mLength && Capacity() > newTotalLen.value()) { + mFlags &= ~F_VOIDED; + mData[newTotalLen.value()] = char_type(0); + mLength = newTotalLen.value(); + return true; + } + + return ReplacePrepInternal(aCutStart, aCutLength, aNewLength, + newTotalLen.value()); +} + +bool +nsTSubstring_CharT::ReplacePrepInternal(index_type aCutStart, size_type aCutLen, + size_type aFragLen, size_type aNewLen) +{ + char_type* oldData; + uint32_t oldFlags; + if (!MutatePrep(aNewLen, &oldData, &oldFlags)) { + return false; // out-of-memory + } + + if (oldData) { + // determine whether or not we need to copy part of the old string + // over to the new string. + + if (aCutStart > 0) { + // copy prefix from old string + char_traits::copy(mData, oldData, aCutStart); + } + + if (aCutStart + aCutLen < mLength) { + // copy suffix from old string to new offset + size_type from = aCutStart + aCutLen; + size_type fromLen = mLength - from; + uint32_t to = aCutStart + aFragLen; + char_traits::copy(mData + to, oldData + from, fromLen); + } + + ::ReleaseData(oldData, oldFlags); + } else { + // original data remains intact + + // determine whether or not we need to move part of the existing string + // to make room for the requested hole. + if (aFragLen != aCutLen && aCutStart + aCutLen < mLength) { + uint32_t from = aCutStart + aCutLen; + uint32_t fromLen = mLength - from; + uint32_t to = aCutStart + aFragLen; + char_traits::move(mData + to, mData + from, fromLen); + } + } + + // add null terminator (mutable mData always has room for the null- + // terminator). + mData[aNewLen] = char_type(0); + mLength = aNewLen; + + return true; +} + +nsTSubstring_CharT::size_type +nsTSubstring_CharT::Capacity() const +{ + // return 0 to indicate an immutable or 0-sized buffer + + size_type capacity; + if (mFlags & F_SHARED) { + // if the string is readonly, then we pretend that it has no capacity. + nsStringBuffer* hdr = nsStringBuffer::FromData(mData); + if (hdr->IsReadonly()) { + capacity = 0; + } else { + capacity = (hdr->StorageSize() / sizeof(char_type)) - 1; + } + } else if (mFlags & F_FIXED) { + capacity = AsFixedString(this)->mFixedCapacity; + } else if (mFlags & F_OWNED) { + // we don't store the capacity of an adopted buffer because that would + // require an additional member field. the best we can do is base the + // capacity on our length. remains to be seen if this is the right + // trade-off. + capacity = mLength; + } else { + capacity = 0; + } + + return capacity; +} + +bool +nsTSubstring_CharT::EnsureMutable(size_type aNewLen) +{ + if (aNewLen == size_type(-1) || aNewLen == mLength) { + if (mFlags & (F_FIXED | F_OWNED)) { + return true; + } + if ((mFlags & F_SHARED) && + !nsStringBuffer::FromData(mData)->IsReadonly()) { + return true; + } + + aNewLen = mLength; + } + return SetLength(aNewLen, mozilla::fallible); +} + +// --------------------------------------------------------------------------- + +// This version of Assign is optimized for single-character assignment. +void +nsTSubstring_CharT::Assign(char_type aChar) +{ + if (!ReplacePrep(0, mLength, 1)) { + AllocFailed(mLength); + } + + *mData = aChar; +} + +bool +nsTSubstring_CharT::Assign(char_type aChar, const fallible_t&) +{ + if (!ReplacePrep(0, mLength, 1)) { + return false; + } + + *mData = aChar; + return true; +} + +void +nsTSubstring_CharT::Assign(const char_type* aData) +{ + if (!Assign(aData, mozilla::fallible)) { + AllocFailed(char_traits::length(aData)); + } +} + +bool +nsTSubstring_CharT::Assign(const char_type* aData, const fallible_t&) +{ + return Assign(aData, size_type(-1), mozilla::fallible); +} + +void +nsTSubstring_CharT::Assign(const char_type* aData, size_type aLength) +{ + if (!Assign(aData, aLength, mozilla::fallible)) { + AllocFailed(aLength == size_type(-1) ? char_traits::length(aData) + : aLength); + } +} + +bool +nsTSubstring_CharT::Assign(const char_type* aData, size_type aLength, + const fallible_t& aFallible) +{ + if (!aData || aLength == 0) { + Truncate(); + return true; + } + + if (aLength == size_type(-1)) { + aLength = char_traits::length(aData); + } + + if (IsDependentOn(aData, aData + aLength)) { + return Assign(string_type(aData, aLength), aFallible); + } + + if (!ReplacePrep(0, mLength, aLength)) { + return false; + } + + char_traits::copy(mData, aData, aLength); + return true; +} + +void +nsTSubstring_CharT::AssignASCII(const char* aData, size_type aLength) +{ + if (!AssignASCII(aData, aLength, mozilla::fallible)) { + AllocFailed(aLength); + } +} + +bool +nsTSubstring_CharT::AssignASCII(const char* aData, size_type aLength, + const fallible_t& aFallible) +{ + // A Unicode string can't depend on an ASCII string buffer, + // so this dependence check only applies to CStrings. +#ifdef CharT_is_char + if (IsDependentOn(aData, aData + aLength)) { + return Assign(string_type(aData, aLength), aFallible); + } +#endif + + if (!ReplacePrep(0, mLength, aLength)) { + return false; + } + + char_traits::copyASCII(mData, aData, aLength); + return true; +} + +void +nsTSubstring_CharT::AssignLiteral(const char_type* aData, size_type aLength) +{ + ::ReleaseData(mData, mFlags); + mData = const_cast<char_type*>(aData); + mLength = aLength; + SetDataFlags(F_TERMINATED | F_LITERAL); +} + +void +nsTSubstring_CharT::Assign(const self_type& aStr) +{ + if (!Assign(aStr, mozilla::fallible)) { + AllocFailed(aStr.Length()); + } +} + +bool +nsTSubstring_CharT::Assign(const self_type& aStr, const fallible_t& aFallible) +{ + // |aStr| could be sharable. We need to check its flags to know how to + // deal with it. + + if (&aStr == this) { + return true; + } + + if (!aStr.mLength) { + Truncate(); + mFlags |= aStr.mFlags & F_VOIDED; + return true; + } + + if (aStr.mFlags & F_SHARED) { + // nice! we can avoid a string copy :-) + + // |aStr| should be null-terminated + NS_ASSERTION(aStr.mFlags & F_TERMINATED, "shared, but not terminated"); + + ::ReleaseData(mData, mFlags); + + mData = aStr.mData; + mLength = aStr.mLength; + SetDataFlags(F_TERMINATED | F_SHARED); + + // get an owning reference to the mData + nsStringBuffer::FromData(mData)->AddRef(); + return true; + } else if (aStr.mFlags & F_LITERAL) { + MOZ_ASSERT(aStr.mFlags & F_TERMINATED, "Unterminated literal"); + + AssignLiteral(aStr.mData, aStr.mLength); + return true; + } + + // else, treat this like an ordinary assignment. + return Assign(aStr.Data(), aStr.Length(), aFallible); +} + +void +nsTSubstring_CharT::Assign(const substring_tuple_type& aTuple) +{ + if (!Assign(aTuple, mozilla::fallible)) { + AllocFailed(aTuple.Length()); + } +} + +bool +nsTSubstring_CharT::Assign(const substring_tuple_type& aTuple, + const fallible_t& aFallible) +{ + if (aTuple.IsDependentOn(mData, mData + mLength)) { + // take advantage of sharing here... + return Assign(string_type(aTuple), aFallible); + } + + size_type length = aTuple.Length(); + + // don't use ReplacePrep here because it changes the length + char_type* oldData; + uint32_t oldFlags; + if (!MutatePrep(length, &oldData, &oldFlags)) { + return false; + } + + if (oldData) { + ::ReleaseData(oldData, oldFlags); + } + + aTuple.WriteTo(mData, length); + mData[length] = 0; + mLength = length; + return true; +} + +void +nsTSubstring_CharT::Adopt(char_type* aData, size_type aLength) +{ + if (aData) { + ::ReleaseData(mData, mFlags); + + if (aLength == size_type(-1)) { + aLength = char_traits::length(aData); + } + + MOZ_RELEASE_ASSERT(CheckCapacity(aLength), "adopting a too-long string"); + + mData = aData; + mLength = aLength; + SetDataFlags(F_TERMINATED | F_OWNED); + + STRING_STAT_INCREMENT(Adopt); + // Treat this as construction of a "StringAdopt" object for leak + // tracking purposes. + MOZ_LOG_CTOR(mData, "StringAdopt", 1); + } else { + SetIsVoid(true); + } +} + + +// This version of Replace is optimized for single-character replacement. +void +nsTSubstring_CharT::Replace(index_type aCutStart, size_type aCutLength, + char_type aChar) +{ + aCutStart = XPCOM_MIN(aCutStart, Length()); + + if (ReplacePrep(aCutStart, aCutLength, 1)) { + mData[aCutStart] = aChar; + } +} + +bool +nsTSubstring_CharT::Replace(index_type aCutStart, size_type aCutLength, + char_type aChar, + const fallible_t&) +{ + aCutStart = XPCOM_MIN(aCutStart, Length()); + + if (!ReplacePrep(aCutStart, aCutLength, 1)) { + return false; + } + + mData[aCutStart] = aChar; + + return true; +} + +void +nsTSubstring_CharT::Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength) +{ + if (!Replace(aCutStart, aCutLength, aData, aLength, + mozilla::fallible)) { + AllocFailed(Length() - aCutLength + 1); + } +} + +bool +nsTSubstring_CharT::Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength, + const fallible_t& aFallible) +{ + // unfortunately, some callers pass null :-( + if (!aData) { + aLength = 0; + } else { + if (aLength == size_type(-1)) { + aLength = char_traits::length(aData); + } + + if (IsDependentOn(aData, aData + aLength)) { + nsTAutoString_CharT temp(aData, aLength); + return Replace(aCutStart, aCutLength, temp, aFallible); + } + } + + aCutStart = XPCOM_MIN(aCutStart, Length()); + + bool ok = ReplacePrep(aCutStart, aCutLength, aLength); + if (!ok) { + return false; + } + + if (aLength > 0) { + char_traits::copy(mData + aCutStart, aData, aLength); + } + + return true; +} + +void +nsTSubstring_CharT::ReplaceASCII(index_type aCutStart, size_type aCutLength, + const char* aData, size_type aLength) +{ + if (!ReplaceASCII(aCutStart, aCutLength, aData, aLength, mozilla::fallible)) { + AllocFailed(Length() - aCutLength + 1); + } +} + +bool +nsTSubstring_CharT::ReplaceASCII(index_type aCutStart, size_type aCutLength, + const char* aData, size_type aLength, + const fallible_t& aFallible) +{ + if (aLength == size_type(-1)) { + aLength = strlen(aData); + } + + // A Unicode string can't depend on an ASCII string buffer, + // so this dependence check only applies to CStrings. +#ifdef CharT_is_char + if (IsDependentOn(aData, aData + aLength)) { + nsTAutoString_CharT temp(aData, aLength); + return Replace(aCutStart, aCutLength, temp, aFallible); + } +#endif + + aCutStart = XPCOM_MIN(aCutStart, Length()); + + bool ok = ReplacePrep(aCutStart, aCutLength, aLength); + if (!ok) { + return false; + } + + if (aLength > 0) { + char_traits::copyASCII(mData + aCutStart, aData, aLength); + } + + return true; +} + +void +nsTSubstring_CharT::Replace(index_type aCutStart, size_type aCutLength, + const substring_tuple_type& aTuple) +{ + if (aTuple.IsDependentOn(mData, mData + mLength)) { + nsTAutoString_CharT temp(aTuple); + Replace(aCutStart, aCutLength, temp); + return; + } + + size_type length = aTuple.Length(); + + aCutStart = XPCOM_MIN(aCutStart, Length()); + + if (ReplacePrep(aCutStart, aCutLength, length) && length > 0) { + aTuple.WriteTo(mData + aCutStart, length); + } +} + +void +nsTSubstring_CharT::ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength) +{ + aCutStart = XPCOM_MIN(aCutStart, Length()); + + if (!aCutStart && aCutLength == Length()) { + AssignLiteral(aData, aLength); + } else if (ReplacePrep(aCutStart, aCutLength, aLength) && aLength > 0) { + char_traits::copy(mData + aCutStart, aData, aLength); + } +} + +void +nsTSubstring_CharT::SetCapacity(size_type aCapacity) +{ + if (!SetCapacity(aCapacity, mozilla::fallible)) { + AllocFailed(aCapacity); + } +} + +bool +nsTSubstring_CharT::SetCapacity(size_type aCapacity, const fallible_t&) +{ + // capacity does not include room for the terminating null char + + // if our capacity is reduced to zero, then free our buffer. + if (aCapacity == 0) { + ::ReleaseData(mData, mFlags); + mData = char_traits::sEmptyBuffer; + mLength = 0; + SetDataFlags(F_TERMINATED); + return true; + } + + char_type* oldData; + uint32_t oldFlags; + if (!MutatePrep(aCapacity, &oldData, &oldFlags)) { + return false; // out-of-memory + } + + // compute new string length + size_type newLen = XPCOM_MIN(mLength, aCapacity); + + if (oldData) { + // preserve old data + if (mLength > 0) { + char_traits::copy(mData, oldData, newLen); + } + + ::ReleaseData(oldData, oldFlags); + } + + // adjust mLength if our buffer shrunk down in size + if (newLen < mLength) { + mLength = newLen; + } + + // always null-terminate here, even if the buffer got longer. this is + // for backwards compat with the old string implementation. + mData[aCapacity] = char_type(0); + + return true; +} + +void +nsTSubstring_CharT::SetLength(size_type aLength) +{ + SetCapacity(aLength); + mLength = aLength; +} + +bool +nsTSubstring_CharT::SetLength(size_type aLength, const fallible_t& aFallible) +{ + if (!SetCapacity(aLength, aFallible)) { + return false; + } + + mLength = aLength; + return true; +} + +void +nsTSubstring_CharT::SetIsVoid(bool aVal) +{ + if (aVal) { + Truncate(); + mFlags |= F_VOIDED; + } else { + mFlags &= ~F_VOIDED; + } +} + +bool +nsTSubstring_CharT::Equals(const self_type& aStr) const +{ + return mLength == aStr.mLength && + char_traits::compare(mData, aStr.mData, mLength) == 0; +} + +bool +nsTSubstring_CharT::Equals(const self_type& aStr, + const comparator_type& aComp) const +{ + return mLength == aStr.mLength && + aComp(mData, aStr.mData, mLength, aStr.mLength) == 0; +} + +bool +nsTSubstring_CharT::Equals(const char_type* aData) const +{ + // unfortunately, some callers pass null :-( + if (!aData) { + NS_NOTREACHED("null data pointer"); + return mLength == 0; + } + + // XXX avoid length calculation? + size_type length = char_traits::length(aData); + return mLength == length && + char_traits::compare(mData, aData, mLength) == 0; +} + +bool +nsTSubstring_CharT::Equals(const char_type* aData, + const comparator_type& aComp) const +{ + // unfortunately, some callers pass null :-( + if (!aData) { + NS_NOTREACHED("null data pointer"); + return mLength == 0; + } + + // XXX avoid length calculation? + size_type length = char_traits::length(aData); + return mLength == length && aComp(mData, aData, mLength, length) == 0; +} + +bool +nsTSubstring_CharT::EqualsASCII(const char* aData, size_type aLen) const +{ + return mLength == aLen && + char_traits::compareASCII(mData, aData, aLen) == 0; +} + +bool +nsTSubstring_CharT::EqualsASCII(const char* aData) const +{ + return char_traits::compareASCIINullTerminated(mData, mLength, aData) == 0; +} + +bool +nsTSubstring_CharT::LowerCaseEqualsASCII(const char* aData, + size_type aLen) const +{ + return mLength == aLen && + char_traits::compareLowerCaseToASCII(mData, aData, aLen) == 0; +} + +bool +nsTSubstring_CharT::LowerCaseEqualsASCII(const char* aData) const +{ + return char_traits::compareLowerCaseToASCIINullTerminated(mData, + mLength, + aData) == 0; +} + +nsTSubstring_CharT::size_type +nsTSubstring_CharT::CountChar(char_type aChar) const +{ + const char_type* start = mData; + const char_type* end = mData + mLength; + + return NS_COUNT(start, end, aChar); +} + +int32_t +nsTSubstring_CharT::FindChar(char_type aChar, index_type aOffset) const +{ + if (aOffset < mLength) { + const char_type* result = char_traits::find(mData + aOffset, + mLength - aOffset, aChar); + if (result) { + return result - mData; + } + } + return -1; +} + +void +nsTSubstring_CharT::StripChar(char_type aChar, int32_t aOffset) +{ + if (mLength == 0 || aOffset >= int32_t(mLength)) { + return; + } + + if (!EnsureMutable()) { // XXX do this lazily? + AllocFailed(mLength); + } + + // XXX(darin): this code should defer writing until necessary. + + char_type* to = mData + aOffset; + char_type* from = mData + aOffset; + char_type* end = mData + mLength; + + while (from < end) { + char_type theChar = *from++; + if (aChar != theChar) { + *to++ = theChar; + } + } + *to = char_type(0); // add the null + mLength = to - mData; +} + +void +nsTSubstring_CharT::StripChars(const char_type* aChars, uint32_t aOffset) +{ + if (aOffset >= uint32_t(mLength)) { + return; + } + + if (!EnsureMutable()) { // XXX do this lazily? + AllocFailed(mLength); + } + + // XXX(darin): this code should defer writing until necessary. + + char_type* to = mData + aOffset; + char_type* from = mData + aOffset; + char_type* end = mData + mLength; + + while (from < end) { + char_type theChar = *from++; + const char_type* test = aChars; + + for (; *test && *test != theChar; ++test); + + if (!*test) { + // Not stripped, copy this char. + *to++ = theChar; + } + } + *to = char_type(0); // add the null + mLength = to - mData; +} + +int +nsTSubstring_CharT::AppendFunc(void* aArg, const char* aStr, uint32_t aLen) +{ + self_type* self = static_cast<self_type*>(aArg); + + // NSPR sends us the final null terminator even though we don't want it + if (aLen && aStr[aLen - 1] == '\0') { + --aLen; + } + + self->AppendASCII(aStr, aLen); + + return aLen; +} + +void +nsTSubstring_CharT::AppendPrintf(const char* aFormat, ...) +{ + va_list ap; + va_start(ap, aFormat); + uint32_t r = PR_vsxprintf(AppendFunc, this, aFormat, ap); + if (r == (uint32_t)-1) { + NS_RUNTIMEABORT("Allocation or other failure in PR_vsxprintf"); + } + va_end(ap); +} + +void +nsTSubstring_CharT::AppendPrintf(const char* aFormat, va_list aAp) +{ + uint32_t r = PR_vsxprintf(AppendFunc, this, aFormat, aAp); + if (r == (uint32_t)-1) { + NS_RUNTIMEABORT("Allocation or other failure in PR_vsxprintf"); + } +} + +/* hack to make sure we define FormatWithoutTrailingZeros only once */ +#ifdef CharT_is_PRUnichar +// Returns the length of the formatted aDouble in aBuf. +static int +FormatWithoutTrailingZeros(char (&aBuf)[40], double aDouble, + int aPrecision) +{ + static const DoubleToStringConverter converter(DoubleToStringConverter::UNIQUE_ZERO | + DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + "Infinity", + "NaN", + 'e', + -6, 21, + 6, 1); + double_conversion::StringBuilder builder(aBuf, sizeof(aBuf)); + bool exponential_notation = false; + converter.ToPrecision(aDouble, aPrecision, &exponential_notation, &builder); + int length = builder.position(); + char* formattedDouble = builder.Finalize(); + + // If we have a shorter string than aPrecision, it means we have a special + // value (NaN or Infinity). All other numbers will be formatted with at + // least aPrecision digits. + if (length <= aPrecision) { + return length; + } + + char* end = formattedDouble + length; + char* decimalPoint = strchr(aBuf, '.'); + // No trailing zeros to remove. + if (!decimalPoint) { + return length; + } + + if (MOZ_UNLIKELY(exponential_notation)) { + // We need to check for cases like 1.00000e-10 (yes, this is + // disgusting). + char* exponent = end - 1; + for (; ; --exponent) { + if (*exponent == 'e') { + break; + } + } + char* zerosBeforeExponent = exponent - 1; + for (; zerosBeforeExponent != decimalPoint; --zerosBeforeExponent) { + if (*zerosBeforeExponent != '0') { + break; + } + } + if (zerosBeforeExponent == decimalPoint) { + --zerosBeforeExponent; + } + // Slide the exponent to the left over the trailing zeros. Don't + // worry about copying the trailing NUL character. + size_t exponentSize = end - exponent; + memmove(zerosBeforeExponent + 1, exponent, exponentSize); + length -= exponent - (zerosBeforeExponent + 1); + } else { + char* trailingZeros = end - 1; + for (; trailingZeros != decimalPoint; --trailingZeros) { + if (*trailingZeros != '0') { + break; + } + } + if (trailingZeros == decimalPoint) { + --trailingZeros; + } + length -= end - (trailingZeros + 1); + } + + return length; +} +#endif /* CharT_is_PRUnichar */ + +void +nsTSubstring_CharT::AppendFloat(float aFloat) +{ + char buf[40]; + int length = FormatWithoutTrailingZeros(buf, aFloat, 6); + AppendASCII(buf, length); +} + +void +nsTSubstring_CharT::AppendFloat(double aFloat) +{ + char buf[40]; + int length = FormatWithoutTrailingZeros(buf, aFloat, 15); + AppendASCII(buf, length); +} + +size_t +nsTSubstring_CharT::SizeOfExcludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const +{ + if (mFlags & F_SHARED) { + return nsStringBuffer::FromData(mData)-> + SizeOfIncludingThisIfUnshared(aMallocSizeOf); + } + if (mFlags & F_OWNED) { + return aMallocSizeOf(mData); + } + + // If we reach here, exactly one of the following must be true: + // - F_VOIDED is set, and mData points to sEmptyBuffer; + // - F_FIXED is set, and mData points to a buffer within a string + // object (e.g. nsAutoString); + // - None of F_SHARED, F_OWNED, F_FIXED is set, and mData points to a buffer + // owned by something else. + // + // In all three cases, we don't measure it. + return 0; +} + +size_t +nsTSubstring_CharT::SizeOfExcludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const +{ + // This is identical to SizeOfExcludingThisIfUnshared except for the + // F_SHARED case. + if (mFlags & F_SHARED) { + return nsStringBuffer::FromData(mData)-> + SizeOfIncludingThisEvenIfShared(aMallocSizeOf); + } + if (mFlags & F_OWNED) { + return aMallocSizeOf(mData); + } + return 0; +} + +size_t +nsTSubstring_CharT::SizeOfIncludingThisIfUnshared( + mozilla::MallocSizeOf aMallocSizeOf) const +{ + return aMallocSizeOf(this) + SizeOfExcludingThisIfUnshared(aMallocSizeOf); +} + +size_t +nsTSubstring_CharT::SizeOfIncludingThisEvenIfShared( + mozilla::MallocSizeOf aMallocSizeOf) const +{ + return aMallocSizeOf(this) + SizeOfExcludingThisEvenIfShared(aMallocSizeOf); +} + diff --git a/xpcom/string/nsTSubstring.h b/xpcom/string/nsTSubstring.h new file mode 100644 index 000000000..a08036b1f --- /dev/null +++ b/xpcom/string/nsTSubstring.h @@ -0,0 +1,1186 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#include "mozilla/Casting.h" +#include "mozilla/MemoryReporting.h" + +#ifndef MOZILLA_INTERNAL_API +#error Cannot use internal string classes without MOZILLA_INTERNAL_API defined. Use the frozen header nsStringAPI.h instead. +#endif + +/** + * The base for string comparators + */ +class nsTStringComparator_CharT +{ +public: + typedef CharT char_type; + + nsTStringComparator_CharT() + { + } + + virtual int operator()(const char_type*, const char_type*, + uint32_t, uint32_t) const = 0; +}; + + +/** + * The default string comparator (case-sensitive comparision) + */ +class nsTDefaultStringComparator_CharT + : public nsTStringComparator_CharT +{ +public: + typedef CharT char_type; + + nsTDefaultStringComparator_CharT() + { + } + + virtual int operator()(const char_type*, const char_type*, + uint32_t, uint32_t) const override; +}; + +/** + * nsTSubstring is the most abstract class in the string hierarchy. It + * represents a single contiguous array of characters, which may or may not + * be null-terminated. This type is not instantiated directly. A sub-class + * is instantiated instead. For example, see nsTString. + * + * NAMES: + * nsAString for wide characters + * nsACString for narrow characters + * + * Many of the accessors on nsTSubstring are inlined as an optimization. + */ +class nsTSubstring_CharT +{ +public: + typedef mozilla::fallible_t fallible_t; + + typedef CharT char_type; + + typedef nsCharTraits<char_type> char_traits; + typedef char_traits::incompatible_char_type incompatible_char_type; + + typedef nsTSubstring_CharT self_type; + typedef self_type abstract_string_type; + typedef self_type base_string_type; + + typedef self_type substring_type; + typedef nsTSubstringTuple_CharT substring_tuple_type; + typedef nsTString_CharT string_type; + + typedef nsReadingIterator<char_type> const_iterator; + typedef nsWritingIterator<char_type> iterator; + + typedef nsTStringComparator_CharT comparator_type; + + typedef char_type* char_iterator; + typedef const char_type* const_char_iterator; + + typedef uint32_t size_type; + typedef uint32_t index_type; + +public: + + // this acts like a virtual destructor + ~nsTSubstring_CharT() + { + Finalize(); + } + + /** + * reading iterators + */ + + const_char_iterator BeginReading() const + { + return mData; + } + const_char_iterator EndReading() const + { + return mData + mLength; + } + + /** + * deprecated reading iterators + */ + + const_iterator& BeginReading(const_iterator& aIter) const + { + aIter.mStart = mData; + aIter.mEnd = mData + mLength; + aIter.mPosition = aIter.mStart; + return aIter; + } + + const_iterator& EndReading(const_iterator& aIter) const + { + aIter.mStart = mData; + aIter.mEnd = mData + mLength; + aIter.mPosition = aIter.mEnd; + return aIter; + } + + const_char_iterator& BeginReading(const_char_iterator& aIter) const + { + return aIter = mData; + } + + const_char_iterator& EndReading(const_char_iterator& aIter) const + { + return aIter = mData + mLength; + } + + + /** + * writing iterators + */ + + char_iterator BeginWriting() + { + if (!EnsureMutable()) { + AllocFailed(mLength); + } + + return mData; + } + + char_iterator BeginWriting(const fallible_t&) + { + return EnsureMutable() ? mData : char_iterator(0); + } + + char_iterator EndWriting() + { + if (!EnsureMutable()) { + AllocFailed(mLength); + } + + return mData + mLength; + } + + char_iterator EndWriting(const fallible_t&) + { + return EnsureMutable() ? (mData + mLength) : char_iterator(0); + } + + char_iterator& BeginWriting(char_iterator& aIter) + { + return aIter = BeginWriting(); + } + + char_iterator& BeginWriting(char_iterator& aIter, const fallible_t& aFallible) + { + return aIter = BeginWriting(aFallible); + } + + char_iterator& EndWriting(char_iterator& aIter) + { + return aIter = EndWriting(); + } + + char_iterator& EndWriting(char_iterator& aIter, const fallible_t& aFallible) + { + return aIter = EndWriting(aFallible); + } + + /** + * deprecated writing iterators + */ + + iterator& BeginWriting(iterator& aIter) + { + char_type* data = BeginWriting(); + aIter.mStart = data; + aIter.mEnd = data + mLength; + aIter.mPosition = aIter.mStart; + return aIter; + } + + iterator& EndWriting(iterator& aIter) + { + char_type* data = BeginWriting(); + aIter.mStart = data; + aIter.mEnd = data + mLength; + aIter.mPosition = aIter.mEnd; + return aIter; + } + + /** + * accessors + */ + + // returns pointer to string data (not necessarily null-terminated) +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + char16ptr_t Data() const +#else + const char_type* Data() const +#endif + { + return mData; + } + + size_type Length() const + { + return mLength; + } + + uint32_t Flags() const + { + return mFlags; + } + + bool IsEmpty() const + { + return mLength == 0; + } + + bool IsLiteral() const + { + return (mFlags & F_LITERAL) != 0; + } + + bool IsVoid() const + { + return (mFlags & F_VOIDED) != 0; + } + + bool IsTerminated() const + { + return (mFlags & F_TERMINATED) != 0; + } + + char_type CharAt(index_type aIndex) const + { + NS_ASSERTION(aIndex < mLength, "index exceeds allowable range"); + return mData[aIndex]; + } + + char_type operator[](index_type aIndex) const + { + return CharAt(aIndex); + } + + char_type First() const + { + NS_ASSERTION(mLength > 0, "|First()| called on an empty string"); + return mData[0]; + } + + inline char_type Last() const + { + NS_ASSERTION(mLength > 0, "|Last()| called on an empty string"); + return mData[mLength - 1]; + } + + size_type NS_FASTCALL CountChar(char_type) const; + int32_t NS_FASTCALL FindChar(char_type, index_type aOffset = 0) const; + + inline bool Contains(char_type aChar) const + { + return FindChar(aChar) != kNotFound; + } + + /** + * equality + */ + + bool NS_FASTCALL Equals(const self_type&) const; + bool NS_FASTCALL Equals(const self_type&, const comparator_type&) const; + + bool NS_FASTCALL Equals(const char_type* aData) const; + bool NS_FASTCALL Equals(const char_type* aData, + const comparator_type& aComp) const; + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + bool NS_FASTCALL Equals(char16ptr_t aData) const + { + return Equals(static_cast<const char16_t*>(aData)); + } + bool NS_FASTCALL Equals(char16ptr_t aData, const comparator_type& aComp) const + { + return Equals(static_cast<const char16_t*>(aData), aComp); + } +#endif + + /** + * An efficient comparison with ASCII that can be used even + * for wide strings. Call this version when you know the + * length of 'data'. + */ + bool NS_FASTCALL EqualsASCII(const char* aData, size_type aLen) const; + /** + * An efficient comparison with ASCII that can be used even + * for wide strings. Call this version when 'data' is + * null-terminated. + */ + bool NS_FASTCALL EqualsASCII(const char* aData) const; + + // EqualsLiteral must ONLY be applied to an actual literal string, or + // a char array *constant* declared without an explicit size. + // Do not attempt to use it with a regular char* pointer, or with a + // non-constant char array variable. Use EqualsASCII for them. + // The template trick to acquire the array length at compile time without + // using a macro is due to Corey Kosak, with much thanks. + template<int N> + inline bool EqualsLiteral(const char (&aStr)[N]) const + { + return EqualsASCII(aStr, N - 1); + } + + // The LowerCaseEquals methods compare the ASCII-lowercase version of + // this string (lowercasing only ASCII uppercase characters) to some + // ASCII/Literal string. The ASCII string is *not* lowercased for + // you. If you compare to an ASCII or literal string that contains an + // uppercase character, it is guaranteed to return false. We will + // throw assertions too. + bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData, + size_type aLen) const; + bool NS_FASTCALL LowerCaseEqualsASCII(const char* aData) const; + + // LowerCaseEqualsLiteral must ONLY be applied to an actual + // literal string, or a char array *constant* declared without an + // explicit size. Do not attempt to use it with a regular char* + // pointer, or with a non-constant char array variable. Use + // LowerCaseEqualsASCII for them. + template<int N> + inline bool LowerCaseEqualsLiteral(const char (&aStr)[N]) const + { + return LowerCaseEqualsASCII(aStr, N - 1); + } + + /** + * assignment + */ + + void NS_FASTCALL Assign(char_type aChar); + MOZ_MUST_USE bool NS_FASTCALL Assign(char_type aChar, const fallible_t&); + + void NS_FASTCALL Assign(const char_type* aData); + MOZ_MUST_USE bool NS_FASTCALL Assign(const char_type* aData, + const fallible_t&); + + void NS_FASTCALL Assign(const char_type* aData, size_type aLength); + MOZ_MUST_USE bool NS_FASTCALL Assign(const char_type* aData, + size_type aLength, const fallible_t&); + + void NS_FASTCALL Assign(const self_type&); + MOZ_MUST_USE bool NS_FASTCALL Assign(const self_type&, const fallible_t&); + + void NS_FASTCALL Assign(const substring_tuple_type&); + MOZ_MUST_USE bool NS_FASTCALL Assign(const substring_tuple_type&, + const fallible_t&); + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + void Assign(char16ptr_t aData) + { + Assign(static_cast<const char16_t*>(aData)); + } + + void Assign(char16ptr_t aData, size_type aLength) + { + Assign(static_cast<const char16_t*>(aData), aLength); + } + + MOZ_MUST_USE bool Assign(char16ptr_t aData, size_type aLength, + const fallible_t& aFallible) + { + return Assign(static_cast<const char16_t*>(aData), aLength, + aFallible); + } +#endif + + void NS_FASTCALL AssignASCII(const char* aData, size_type aLength); + MOZ_MUST_USE bool NS_FASTCALL AssignASCII(const char* aData, + size_type aLength, + const fallible_t&); + + void NS_FASTCALL AssignASCII(const char* aData) + { + AssignASCII(aData, mozilla::AssertedCast<size_type, size_t>(strlen(aData))); + } + MOZ_MUST_USE bool NS_FASTCALL AssignASCII(const char* aData, + const fallible_t& aFallible) + { + return AssignASCII(aData, + mozilla::AssertedCast<size_type, size_t>(strlen(aData)), + aFallible); + } + + // AssignLiteral must ONLY be applied to an actual literal string, or + // a char array *constant* declared without an explicit size. + // Do not attempt to use it with a regular char* pointer, or with a + // non-constant char array variable. Use AssignASCII for those. + // There are not fallible version of these methods because they only really + // apply to small allocations that we wouldn't want to check anyway. + template<int N> + void AssignLiteral(const char_type (&aStr)[N]) + { + AssignLiteral(aStr, N - 1); + } +#ifdef CharT_is_PRUnichar + template<int N> + void AssignLiteral(const char (&aStr)[N]) + { + AssignASCII(aStr, N - 1); + } +#endif + + self_type& operator=(char_type aChar) + { + Assign(aChar); + return *this; + } + self_type& operator=(const char_type* aData) + { + Assign(aData); + return *this; + } +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + self_type& operator=(char16ptr_t aData) + { + Assign(aData); + return *this; + } +#endif + self_type& operator=(const self_type& aStr) + { + Assign(aStr); + return *this; + } + self_type& operator=(const substring_tuple_type& aTuple) + { + Assign(aTuple); + return *this; + } + + void NS_FASTCALL Adopt(char_type* aData, size_type aLength = size_type(-1)); + + + /** + * buffer manipulation + */ + + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + char_type aChar); + MOZ_MUST_USE bool NS_FASTCALL Replace(index_type aCutStart, + size_type aCutLength, + char_type aChar, + const fallible_t&); + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + const char_type* aData, + size_type aLength = size_type(-1)); + MOZ_MUST_USE bool NS_FASTCALL Replace(index_type aCutStart, + size_type aCutLength, + const char_type* aData, + size_type aLength, + const fallible_t&); + void Replace(index_type aCutStart, size_type aCutLength, + const self_type& aStr) + { + Replace(aCutStart, aCutLength, aStr.Data(), aStr.Length()); + } + MOZ_MUST_USE bool Replace(index_type aCutStart, + size_type aCutLength, + const self_type& aStr, + const fallible_t& aFallible) + { + return Replace(aCutStart, aCutLength, aStr.Data(), aStr.Length(), + aFallible); + } + void NS_FASTCALL Replace(index_type aCutStart, size_type aCutLength, + const substring_tuple_type& aTuple); + + void NS_FASTCALL ReplaceASCII(index_type aCutStart, size_type aCutLength, + const char* aData, + size_type aLength = size_type(-1)); + + MOZ_MUST_USE bool NS_FASTCALL ReplaceASCII(index_type aCutStart, size_type aCutLength, + const char* aData, + size_type aLength, + const fallible_t&); + + // ReplaceLiteral must ONLY be applied to an actual literal string. + // Do not attempt to use it with a regular char* pointer, or with a char + // array variable. Use Replace or ReplaceASCII for those. + template<int N> + void ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type (&aStr)[N]) + { + ReplaceLiteral(aCutStart, aCutLength, aStr, N - 1); + } + + void Append(char_type aChar) + { + Replace(mLength, 0, aChar); + } + MOZ_MUST_USE bool Append(char_type aChar, const fallible_t& aFallible) + { + return Replace(mLength, 0, aChar, aFallible); + } + void Append(const char_type* aData, size_type aLength = size_type(-1)) + { + Replace(mLength, 0, aData, aLength); + } + MOZ_MUST_USE bool Append(const char_type* aData, size_type aLength, + const fallible_t& aFallible) + { + return Replace(mLength, 0, aData, aLength, aFallible); + } + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + void Append(char16ptr_t aData, size_type aLength = size_type(-1)) + { + Append(static_cast<const char16_t*>(aData), aLength); + } +#endif + + void Append(const self_type& aStr) + { + Replace(mLength, 0, aStr); + } + MOZ_MUST_USE bool Append(const self_type& aStr, const fallible_t& aFallible) + { + return Replace(mLength, 0, aStr, aFallible); + } + void Append(const substring_tuple_type& aTuple) + { + Replace(mLength, 0, aTuple); + } + + void AppendASCII(const char* aData, size_type aLength = size_type(-1)) + { + ReplaceASCII(mLength, 0, aData, aLength); + } + + MOZ_MUST_USE bool AppendASCII(const char* aData, const fallible_t& aFallible) + { + return ReplaceASCII(mLength, 0, aData, size_type(-1), aFallible); + } + + MOZ_MUST_USE bool AppendASCII(const char* aData, size_type aLength, const fallible_t& aFallible) + { + return ReplaceASCII(mLength, 0, aData, aLength, aFallible); + } + + /** + * Append a formatted string to the current string. Uses the format + * codes documented in prprf.h + */ + void AppendPrintf(const char* aFormat, ...); + void AppendPrintf(const char* aFormat, va_list aAp); + void AppendInt(int32_t aInteger) + { + AppendPrintf("%d", aInteger); + } + void AppendInt(int32_t aInteger, int aRadix) + { + const char* fmt = aRadix == 10 ? "%d" : aRadix == 8 ? "%o" : "%x"; + AppendPrintf(fmt, aInteger); + } + void AppendInt(uint32_t aInteger) + { + AppendPrintf("%u", aInteger); + } + void AppendInt(uint32_t aInteger, int aRadix) + { + const char* fmt = aRadix == 10 ? "%u" : aRadix == 8 ? "%o" : "%x"; + AppendPrintf(fmt, aInteger); + } + void AppendInt(int64_t aInteger) + { + AppendPrintf("%lld", aInteger); + } + void AppendInt(int64_t aInteger, int aRadix) + { + const char* fmt = aRadix == 10 ? "%lld" : aRadix == 8 ? "%llo" : "%llx"; + AppendPrintf(fmt, aInteger); + } + void AppendInt(uint64_t aInteger) + { + AppendPrintf("%llu", aInteger); + } + void AppendInt(uint64_t aInteger, int aRadix) + { + const char* fmt = aRadix == 10 ? "%llu" : aRadix == 8 ? "%llo" : "%llx"; + AppendPrintf(fmt, aInteger); + } + + /** + * Append the given float to this string + */ + void NS_FASTCALL AppendFloat(float aFloat); + void NS_FASTCALL AppendFloat(double aFloat); +public: + + // AppendLiteral must ONLY be applied to an actual literal string. + // Do not attempt to use it with a regular char* pointer, or with a char + // array variable. Use Append or AppendASCII for those. + template<int N> + void AppendLiteral(const char_type (&aStr)[N]) + { + ReplaceLiteral(mLength, 0, aStr, N - 1); + } +#ifdef CharT_is_PRUnichar + template<int N> + void AppendLiteral(const char (&aStr)[N]) + { + AppendASCII(aStr, N - 1); + } + + template<int N> + MOZ_MUST_USE bool AppendLiteral(const char (&aStr)[N], const fallible_t& aFallible) + { + return AppendASCII(aStr, N - 1, aFallible); + } +#endif + + self_type& operator+=(char_type aChar) + { + Append(aChar); + return *this; + } + self_type& operator+=(const char_type* aData) + { + Append(aData); + return *this; + } +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + self_type& operator+=(char16ptr_t aData) + { + Append(aData); + return *this; + } +#endif + self_type& operator+=(const self_type& aStr) + { + Append(aStr); + return *this; + } + self_type& operator+=(const substring_tuple_type& aTuple) + { + Append(aTuple); + return *this; + } + + void Insert(char_type aChar, index_type aPos) + { + Replace(aPos, 0, aChar); + } + void Insert(const char_type* aData, index_type aPos, + size_type aLength = size_type(-1)) + { + Replace(aPos, 0, aData, aLength); + } +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + void Insert(char16ptr_t aData, index_type aPos, + size_type aLength = size_type(-1)) + { + Insert(static_cast<const char16_t*>(aData), aPos, aLength); + } +#endif + void Insert(const self_type& aStr, index_type aPos) + { + Replace(aPos, 0, aStr); + } + void Insert(const substring_tuple_type& aTuple, index_type aPos) + { + Replace(aPos, 0, aTuple); + } + + // InsertLiteral must ONLY be applied to an actual literal string. + // Do not attempt to use it with a regular char* pointer, or with a char + // array variable. Use Insert for those. + template<int N> + void InsertLiteral(const char_type (&aStr)[N], index_type aPos) + { + ReplaceLiteral(aPos, 0, aStr, N - 1); + } + + void Cut(index_type aCutStart, size_type aCutLength) + { + Replace(aCutStart, aCutLength, char_traits::sEmptyBuffer, 0); + } + + + /** + * buffer sizing + */ + + /** + * Attempts to set the capacity to the given size in number of + * characters, without affecting the length of the string. + * There is no need to include room for the null terminator: it is + * the job of the string class. + * Also ensures that the buffer is mutable. + */ + void NS_FASTCALL SetCapacity(size_type aNewCapacity); + MOZ_MUST_USE bool NS_FASTCALL SetCapacity(size_type aNewCapacity, + const fallible_t&); + + void NS_FASTCALL SetLength(size_type aNewLength); + MOZ_MUST_USE bool NS_FASTCALL SetLength(size_type aNewLength, + const fallible_t&); + + void Truncate(size_type aNewLength = 0) + { + NS_ASSERTION(aNewLength <= mLength, "Truncate cannot make string longer"); + SetLength(aNewLength); + } + + + /** + * buffer access + */ + + + /** + * Get a const pointer to the string's internal buffer. The caller + * MUST NOT modify the characters at the returned address. + * + * @returns The length of the buffer in characters. + */ + inline size_type GetData(const char_type** aData) const + { + *aData = mData; + return mLength; + } + + /** + * Get a pointer to the string's internal buffer, optionally resizing + * the buffer first. If size_type(-1) is passed for newLen, then the + * current length of the string is used. The caller MAY modify the + * characters at the returned address (up to but not exceeding the + * length of the string). + * + * @returns The length of the buffer in characters or 0 if unable to + * satisfy the request due to low-memory conditions. + */ + size_type GetMutableData(char_type** aData, size_type aNewLen = size_type(-1)) + { + if (!EnsureMutable(aNewLen)) { + AllocFailed(aNewLen == size_type(-1) ? mLength : aNewLen); + } + + *aData = mData; + return mLength; + } + + size_type GetMutableData(char_type** aData, size_type aNewLen, const fallible_t&) + { + if (!EnsureMutable(aNewLen)) { + *aData = nullptr; + return 0; + } + + *aData = mData; + return mLength; + } + +#if defined(CharT_is_PRUnichar) && defined(MOZ_USE_CHAR16_WRAPPER) + size_type GetMutableData(wchar_t** aData, size_type aNewLen = size_type(-1)) + { + return GetMutableData(reinterpret_cast<char16_t**>(aData), aNewLen); + } + + size_type GetMutableData(wchar_t** aData, size_type aNewLen, + const fallible_t& aFallible) + { + return GetMutableData(reinterpret_cast<char16_t**>(aData), aNewLen, + aFallible); + } +#endif + + + /** + * string data is never null, but can be marked void. if true, the + * string will be truncated. @see nsTSubstring::IsVoid + */ + + void NS_FASTCALL SetIsVoid(bool); + + /** + * This method is used to remove all occurrences of aChar from this + * string. + * + * @param aChar -- char to be stripped + * @param aOffset -- where in this string to start stripping chars + */ + + void StripChar(char_type aChar, int32_t aOffset = 0); + + /** + * This method is used to remove all occurrences of aChars from this + * string. + * + * @param aChars -- chars to be stripped + * @param aOffset -- where in this string to start stripping chars + */ + + void StripChars(const char_type* aChars, uint32_t aOffset = 0); + + /** + * If the string uses a shared buffer, this method + * clears the pointer without releasing the buffer. + */ + void ForgetSharedBuffer() + { + if (mFlags & nsSubstring::F_SHARED) { + mData = char_traits::sEmptyBuffer; + mLength = 0; + mFlags = F_TERMINATED; + } + } + +public: + + /** + * this is public to support automatic conversion of tuple to string + * base type, which helps avoid converting to nsTAString. + */ + MOZ_IMPLICIT nsTSubstring_CharT(const substring_tuple_type& aTuple) + : mData(nullptr) + , mLength(0) + , mFlags(F_NONE) + { + Assign(aTuple); + } + + /** + * allows for direct initialization of a nsTSubstring object. + * + * NOTE: this constructor is declared public _only_ for convenience + * inside the string implementation. + */ + // XXXbz or can I just include nscore.h and use NS_BUILD_REFCNT_LOGGING? +#if defined(DEBUG) || defined(FORCE_BUILD_REFCNT_LOGGING) +#define XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE + nsTSubstring_CharT(char_type* aData, size_type aLength, uint32_t aFlags); +#else +#undef XPCOM_STRING_CONSTRUCTOR_OUT_OF_LINE + nsTSubstring_CharT(char_type* aData, size_type aLength, uint32_t aFlags) + : mData(aData) + , mLength(aLength) + , mFlags(aFlags) + { + MOZ_RELEASE_ASSERT(CheckCapacity(aLength), "String is too large."); + } +#endif /* DEBUG || FORCE_BUILD_REFCNT_LOGGING */ + + size_t SizeOfExcludingThisIfUnshared(mozilla::MallocSizeOf aMallocSizeOf) + const; + size_t SizeOfIncludingThisIfUnshared(mozilla::MallocSizeOf aMallocSizeOf) + const; + + /** + * WARNING: Only use these functions if you really know what you are + * doing, because they can easily lead to double-counting strings. If + * you do use them, please explain clearly in a comment why it's safe + * and won't lead to double-counting. + */ + size_t SizeOfExcludingThisEvenIfShared(mozilla::MallocSizeOf aMallocSizeOf) + const; + size_t SizeOfIncludingThisEvenIfShared(mozilla::MallocSizeOf aMallocSizeOf) + const; + + template<class T> + void NS_ABORT_OOM(T) + { + struct never {}; // a compiler-friendly way to do static_assert(false) + static_assert(mozilla::IsSame<T, never>::value, + "In string classes, use AllocFailed to account for sizeof(char_type). " + "Use the global ::NS_ABORT_OOM if you really have a count of bytes."); + } + + MOZ_ALWAYS_INLINE void AllocFailed(size_t aLength) + { + ::NS_ABORT_OOM(aLength * sizeof(char_type)); + } + +protected: + + friend class nsTObsoleteAStringThunk_CharT; + friend class nsTSubstringTuple_CharT; + + // XXX GCC 3.4 needs this :-( + friend class nsTPromiseFlatString_CharT; + + char_type* mData; + size_type mLength; + uint32_t mFlags; + + // default initialization + nsTSubstring_CharT() + : mData(char_traits::sEmptyBuffer) + , mLength(0) + , mFlags(F_TERMINATED) + { + } + + // version of constructor that leaves mData and mLength uninitialized + explicit + nsTSubstring_CharT(uint32_t aFlags) + : mFlags(aFlags) + { + } + + // copy-constructor, constructs as dependent on given object + // (NOTE: this is for internal use only) + nsTSubstring_CharT(const self_type& aStr) + : mData(aStr.mData) + , mLength(aStr.mLength) + , mFlags(aStr.mFlags & (F_TERMINATED | F_VOIDED)) + { + } + + /** + * this function releases mData and does not change the value of + * any of its member variables. in other words, this function acts + * like a destructor. + */ + void NS_FASTCALL Finalize(); + + /** + * this function prepares mData to be mutated. + * + * @param aCapacity specifies the required capacity of mData + * @param aOldData returns null or the old value of mData + * @param aOldFlags returns 0 or the old value of mFlags + * + * if mData is already mutable and of sufficient capacity, then this + * function will return immediately. otherwise, it will either resize + * mData or allocate a new shared buffer. if it needs to allocate a + * new buffer, then it will return the old buffer and the corresponding + * flags. this allows the caller to decide when to free the old data. + * + * this function returns false if is unable to allocate sufficient + * memory. + * + * XXX we should expose a way for subclasses to free old_data. + */ + bool NS_FASTCALL MutatePrep(size_type aCapacity, + char_type** aOldData, uint32_t* aOldFlags); + + /** + * this function prepares a section of mData to be modified. if + * necessary, this function will reallocate mData and possibly move + * existing data to open up the specified section. + * + * @param aCutStart specifies the starting offset of the section + * @param aCutLength specifies the length of the section to be replaced + * @param aNewLength specifies the length of the new section + * + * for example, suppose mData contains the string "abcdef" then + * + * ReplacePrep(2, 3, 4); + * + * would cause mData to look like "ab____f" where the characters + * indicated by '_' have an unspecified value and can be freely + * modified. this function will null-terminate mData upon return. + * + * this function returns false if is unable to allocate sufficient + * memory. + */ + MOZ_MUST_USE bool ReplacePrep(index_type aCutStart, + size_type aCutLength, + size_type aNewLength); + + MOZ_MUST_USE bool NS_FASTCALL ReplacePrepInternal( + index_type aCutStart, + size_type aCutLength, + size_type aNewFragLength, + size_type aNewTotalLength); + + /** + * returns the number of writable storage units starting at mData. + * the value does not include space for the null-terminator character. + * + * NOTE: this function returns 0 if mData is immutable (or the buffer + * is 0-sized). + */ + size_type NS_FASTCALL Capacity() const; + + /** + * this helper function can be called prior to directly manipulating + * the contents of mData. see, for example, BeginWriting. + */ + MOZ_MUST_USE bool NS_FASTCALL EnsureMutable( + size_type aNewLen = size_type(-1)); + + /** + * returns true if this string overlaps with the given string fragment. + */ + bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const + { + /** + * if it _isn't_ the case that one fragment starts after the other ends, + * or ends before the other starts, then, they conflict: + * + * !(f2.begin >= f1.aEnd || f2.aEnd <= f1.begin) + * + * Simplified, that gives us: + */ + return (aStart < (mData + mLength) && aEnd > mData); + } + + /** + * Checks if the given capacity is valid for this string type. + */ + static MOZ_MUST_USE bool CheckCapacity(size_type aCapacity) { + if (aCapacity > kMaxCapacity) { + // Also assert for |aCapacity| equal to |size_type(-1)|, since we used to + // use that value to flag immutability. + NS_ASSERTION(aCapacity != size_type(-1), "Bogus capacity"); + return false; + } + + return true; + } + + /** + * this helper function stores the specified dataFlags in mFlags + */ + void SetDataFlags(uint32_t aDataFlags) + { + NS_ASSERTION((aDataFlags & 0xFFFF0000) == 0, "bad flags"); + mFlags = aDataFlags | (mFlags & 0xFFFF0000); + } + + void NS_FASTCALL ReplaceLiteral(index_type aCutStart, size_type aCutLength, + const char_type* aData, size_type aLength); + + static int AppendFunc(void* aArg, const char* aStr, uint32_t aLen); + + static const size_type kMaxCapacity; +public: + + // NOTE: this method is declared public _only_ for convenience for + // callers who don't have access to the original nsLiteralString_CharT. + void NS_FASTCALL AssignLiteral(const char_type* aData, size_type aLength); + + // mFlags is a bitwise combination of the following flags. the meaning + // and interpretation of these flags is an implementation detail. + // + // NOTE: these flags are declared public _only_ for convenience inside + // the string implementation. + + enum + { + F_NONE = 0, // no flags + + // data flags are in the lower 16-bits + F_TERMINATED = 1 << 0, // IsTerminated returns true + F_VOIDED = 1 << 1, // IsVoid returns true + F_SHARED = 1 << 2, // mData points to a heap-allocated, shared buffer + F_OWNED = 1 << 3, // mData points to a heap-allocated, raw buffer + F_FIXED = 1 << 4, // mData points to a fixed-size writable, dependent buffer + F_LITERAL = 1 << 5, // mData points to a string literal; F_TERMINATED will also be set + + // class flags are in the upper 16-bits + F_CLASS_FIXED = 1 << 16 // indicates that |this| is of type nsTFixedString + }; + + // + // Some terminology: + // + // "dependent buffer" A dependent buffer is one that the string class + // does not own. The string class relies on some + // external code to ensure the lifetime of the + // dependent buffer. + // + // "shared buffer" A shared buffer is one that the string class + // allocates. When it allocates a shared string + // buffer, it allocates some additional space at + // the beginning of the buffer for additional + // fields, including a reference count and a + // buffer length. See nsStringHeader. + // + // "adopted buffer" An adopted buffer is a raw string buffer + // allocated on the heap (using moz_xmalloc) + // of which the string class subsumes ownership. + // + // Some comments about the string flags: + // + // F_SHARED, F_OWNED, and F_FIXED are all mutually exlusive. They + // indicate the allocation type of mData. If none of these flags + // are set, then the string buffer is dependent. + // + // F_SHARED, F_OWNED, or F_FIXED imply F_TERMINATED. This is because + // the string classes always allocate null-terminated buffers, and + // non-terminated substrings are always dependent. + // + // F_VOIDED implies F_TERMINATED, and moreover it implies that mData + // points to char_traits::sEmptyBuffer. Therefore, F_VOIDED is + // mutually exclusive with F_SHARED, F_OWNED, and F_FIXED. + // +}; + +int NS_FASTCALL +Compare(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs, + const nsTStringComparator_CharT& = nsTDefaultStringComparator_CharT()); + + +inline bool +operator!=(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs) +{ + return !aLhs.Equals(aRhs); +} + +inline bool +operator!=(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::char_type* aRhs) +{ + return !aLhs.Equals(aRhs); +} + +inline bool +operator<(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs) +{ + return Compare(aLhs, aRhs) < 0; +} + +inline bool +operator<=(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs) +{ + return Compare(aLhs, aRhs) <= 0; +} + +inline bool +operator==(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs) +{ + return aLhs.Equals(aRhs); +} + +inline bool +operator==(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::char_type* aRhs) +{ + return aLhs.Equals(aRhs); +} + + +inline bool +operator>=(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs) +{ + return Compare(aLhs, aRhs) >= 0; +} + +inline bool +operator>(const nsTSubstring_CharT::base_string_type& aLhs, + const nsTSubstring_CharT::base_string_type& aRhs) +{ + return Compare(aLhs, aRhs) > 0; +} diff --git a/xpcom/string/nsTSubstringTuple.cpp b/xpcom/string/nsTSubstringTuple.cpp new file mode 100644 index 000000000..2a84a9a4e --- /dev/null +++ b/xpcom/string/nsTSubstringTuple.cpp @@ -0,0 +1,96 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/CheckedInt.h" + +/** + * computes the aggregate string length + */ + +nsTSubstringTuple_CharT::size_type +nsTSubstringTuple_CharT::Length() const +{ + mozilla::CheckedInt<size_type> len; + if (mHead) { + len = mHead->Length(); + } else { + len = TO_SUBSTRING(mFragA).Length(); + } + + len += TO_SUBSTRING(mFragB).Length(); + MOZ_RELEASE_ASSERT(len.isValid(), "Substring tuple length is invalid"); + return len.value(); +} + + +/** + * writes the aggregate string to the given buffer. aBufLen is assumed + * to be equal to or greater than the value returned by the Length() + * method. the string written to |aBuf| is not null-terminated. + */ + +void +nsTSubstringTuple_CharT::WriteTo(char_type* aBuf, uint32_t aBufLen) const +{ + const substring_type& b = TO_SUBSTRING(mFragB); + + MOZ_RELEASE_ASSERT(aBufLen >= b.Length(), "buffer too small"); + uint32_t headLen = aBufLen - b.Length(); + if (mHead) { + mHead->WriteTo(aBuf, headLen); + } else { + const substring_type& a = TO_SUBSTRING(mFragA); + + MOZ_RELEASE_ASSERT(a.Length() == headLen, "buffer incorrectly sized"); + char_traits::copy(aBuf, a.Data(), a.Length()); + } + + char_traits::copy(aBuf + headLen, b.Data(), b.Length()); + +#if 0 + // we need to write out data into |aBuf|, ending at |aBuf + aBufLen|. So our + // data needs to precede |aBuf + aBufLen| exactly. We trust that the buffer + // was properly sized! + + const substring_type& b = TO_SUBSTRING(mFragB); + + NS_ASSERTION(aBufLen >= b.Length(), "buffer is too small"); + char_traits::copy(aBuf + aBufLen - b.Length(), b.Data(), b.Length()); + + aBufLen -= b.Length(); + + if (mHead) { + mHead->WriteTo(aBuf, aBufLen); + } else { + const substring_type& a = TO_SUBSTRING(mFragA); + NS_ASSERTION(aBufLen == a.Length(), "buffer is too small"); + char_traits::copy(aBuf, a.Data(), a.Length()); + } +#endif +} + + +/** + * returns true if this tuple is dependent on (i.e., overlapping with) + * the given char sequence. + */ + +bool +nsTSubstringTuple_CharT::IsDependentOn(const char_type* aStart, + const char_type* aEnd) const +{ + // we aStart with the right-most fragment since it is faster to check. + + if (TO_SUBSTRING(mFragB).IsDependentOn(aStart, aEnd)) { + return true; + } + + if (mHead) { + return mHead->IsDependentOn(aStart, aEnd); + } + + return TO_SUBSTRING(mFragA).IsDependentOn(aStart, aEnd); +} diff --git a/xpcom/string/nsTSubstringTuple.h b/xpcom/string/nsTSubstringTuple.h new file mode 100644 index 000000000..5d24e2159 --- /dev/null +++ b/xpcom/string/nsTSubstringTuple.h @@ -0,0 +1,84 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +/** + * nsTSubstringTuple_CharT + * + * Represents a tuple of string fragments. Built as a recursive binary tree. + * It is used to implement the concatenation of two or more string objects. + * + * NOTE: This class is a private implementation detail and should never be + * referenced outside the string code. + */ +class nsTSubstringTuple_CharT +{ +public: + + typedef CharT char_type; + typedef nsCharTraits<char_type> char_traits; + + typedef nsTSubstringTuple_CharT self_type; + typedef nsTSubstring_CharT substring_type; + typedef nsTSubstring_CharT base_string_type; + typedef uint32_t size_type; + +public: + + nsTSubstringTuple_CharT(const base_string_type* aStrA, + const base_string_type* aStrB) + : mHead(nullptr) + , mFragA(aStrA) + , mFragB(aStrB) + { + } + + nsTSubstringTuple_CharT(const self_type& aHead, + const base_string_type* aStrB) + : mHead(&aHead) + , mFragA(nullptr) // this fragment is ignored when aHead != nullptr + , mFragB(aStrB) + { + } + + /** + * computes the aggregate string length + */ + size_type Length() const; + + /** + * writes the aggregate string to the given buffer. bufLen is assumed + * to be equal to or greater than the value returned by the Length() + * method. the string written to |buf| is not null-terminated. + */ + void WriteTo(char_type* aBuf, uint32_t aBufLen) const; + + /** + * returns true if this tuple is dependent on (i.e., overlapping with) + * the given char sequence. + */ + bool IsDependentOn(const char_type* aStart, const char_type* aEnd) const; + +private: + + const self_type* mHead; + const base_string_type* mFragA; + const base_string_type* mFragB; +}; + +inline const nsTSubstringTuple_CharT +operator+(const nsTSubstringTuple_CharT::base_string_type& aStrA, + const nsTSubstringTuple_CharT::base_string_type& aStrB) +{ + return nsTSubstringTuple_CharT(&aStrA, &aStrB); +} + +inline const nsTSubstringTuple_CharT +operator+(const nsTSubstringTuple_CharT& aHead, + const nsTSubstringTuple_CharT::base_string_type& aStrB) +{ + return nsTSubstringTuple_CharT(aHead, &aStrB); +} diff --git a/xpcom/string/nsUTF8Utils.h b/xpcom/string/nsUTF8Utils.h new file mode 100644 index 000000000..9f38fa555 --- /dev/null +++ b/xpcom/string/nsUTF8Utils.h @@ -0,0 +1,742 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsUTF8Utils_h_ +#define nsUTF8Utils_h_ + +// This file may be used in two ways: if MOZILLA_INTERNAL_API is defined, this +// file will provide signatures for the Mozilla abstract string types. It will +// use XPCOM assertion/debugging macros, etc. + +#include "nscore.h" +#include "mozilla/Assertions.h" +#include "mozilla/SSE.h" +#include "mozilla/TypeTraits.h" + +#include "nsCharTraits.h" + +class UTF8traits +{ +public: + static bool isASCII(char aChar) + { + return (aChar & 0x80) == 0x00; + } + static bool isInSeq(char aChar) + { + return (aChar & 0xC0) == 0x80; + } + static bool is2byte(char aChar) + { + return (aChar & 0xE0) == 0xC0; + } + static bool is3byte(char aChar) + { + return (aChar & 0xF0) == 0xE0; + } + static bool is4byte(char aChar) + { + return (aChar & 0xF8) == 0xF0; + } + static bool is5byte(char aChar) + { + return (aChar & 0xFC) == 0xF8; + } + static bool is6byte(char aChar) + { + return (aChar & 0xFE) == 0xFC; + } +}; + +/** + * Extract the next UCS-4 character from the buffer and return it. The + * pointer passed in is advanced to the start of the next character in the + * buffer. If non-null, the parameters err and overlong are filled in to + * indicate that the character was represented by an overlong sequence, or + * that an error occurred. + */ + +class UTF8CharEnumerator +{ +public: + static uint32_t NextChar(const char** aBuffer, const char* aEnd, bool* aErr) + { + NS_ASSERTION(aBuffer && *aBuffer, "null buffer!"); + + const char* p = *aBuffer; + *aErr = false; + + if (p >= aEnd) { + *aErr = true; + + return 0; + } + + char c = *p++; + + if (UTF8traits::isASCII(c)) { + *aBuffer = p; + return c; + } + + uint32_t ucs4; + uint32_t minUcs4; + int32_t state = 0; + + if (!CalcState(c, ucs4, minUcs4, state)) { + NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings."); + *aErr = true; + + return 0; + } + + while (state--) { + if (p == aEnd) { + *aErr = true; + + return 0; + } + + c = *p++; + + if (!AddByte(c, state, ucs4)) { + *aErr = true; + + return 0; + } + } + + if (ucs4 < minUcs4) { + // Overlong sequence + ucs4 = UCS2_REPLACEMENT_CHAR; + } else if (ucs4 >= 0xD800 && + (ucs4 <= 0xDFFF || ucs4 >= UCS_END)) { + // Surrogates and code points outside the Unicode range. + ucs4 = UCS2_REPLACEMENT_CHAR; + } + + *aBuffer = p; + return ucs4; + } + +private: + static bool CalcState(char aChar, uint32_t& aUcs4, uint32_t& aMinUcs4, + int32_t& aState) + { + if (UTF8traits::is2byte(aChar)) { + aUcs4 = (uint32_t(aChar) << 6) & 0x000007C0L; + aState = 1; + aMinUcs4 = 0x00000080; + } else if (UTF8traits::is3byte(aChar)) { + aUcs4 = (uint32_t(aChar) << 12) & 0x0000F000L; + aState = 2; + aMinUcs4 = 0x00000800; + } else if (UTF8traits::is4byte(aChar)) { + aUcs4 = (uint32_t(aChar) << 18) & 0x001F0000L; + aState = 3; + aMinUcs4 = 0x00010000; + } else if (UTF8traits::is5byte(aChar)) { + aUcs4 = (uint32_t(aChar) << 24) & 0x03000000L; + aState = 4; + aMinUcs4 = 0x00200000; + } else if (UTF8traits::is6byte(aChar)) { + aUcs4 = (uint32_t(aChar) << 30) & 0x40000000L; + aState = 5; + aMinUcs4 = 0x04000000; + } else { + return false; + } + + return true; + } + + static bool AddByte(char aChar, int32_t aState, uint32_t& aUcs4) + { + if (UTF8traits::isInSeq(aChar)) { + int32_t shift = aState * 6; + aUcs4 |= (uint32_t(aChar) & 0x3F) << shift; + return true; + } + + return false; + } +}; + + +/** + * Extract the next UCS-4 character from the buffer and return it. The + * pointer passed in is advanced to the start of the next character in the + * buffer. If non-null, the err parameter is filled in if an error occurs. + * + * If an error occurs that causes UCS2_REPLACEMENT_CHAR to be returned, then + * the buffer will be updated to move only a single UCS-2 character. + * + * Any other error returns 0 and does not move the buffer position. + */ + + +class UTF16CharEnumerator +{ +public: + static uint32_t NextChar(const char16_t** aBuffer, const char16_t* aEnd, + bool* aErr = nullptr) + { + NS_ASSERTION(aBuffer && *aBuffer, "null buffer!"); + + const char16_t* p = *aBuffer; + + if (p >= aEnd) { + NS_ERROR("No input to work with"); + if (aErr) { + *aErr = true; + } + + return 0; + } + + char16_t c = *p++; + + if (!IS_SURROGATE(c)) { // U+0000 - U+D7FF,U+E000 - U+FFFF + if (aErr) { + *aErr = false; + } + *aBuffer = p; + return c; + } else if (NS_IS_HIGH_SURROGATE(c)) { // U+D800 - U+DBFF + if (p == aEnd) { + // Found a high surrogate at the end of the buffer. Flag this + // as an error and return the Unicode replacement + // character 0xFFFD. + + NS_WARNING("Unexpected end of buffer after high surrogate"); + + if (aErr) { + *aErr = true; + } + *aBuffer = p; + return 0xFFFD; + } + + // D800- DBFF - High Surrogate + char16_t h = c; + + c = *p++; + + if (NS_IS_LOW_SURROGATE(c)) { + // DC00- DFFF - Low Surrogate + // N = (H - D800) *400 + 10000 + (L - DC00) + uint32_t ucs4 = SURROGATE_TO_UCS4(h, c); + if (aErr) { + *aErr = false; + } + *aBuffer = p; + return ucs4; + } else { + // Found a high surrogate followed by something other than + // a low surrogate. Flag this as an error and return the + // Unicode replacement character 0xFFFD. Note that the + // pointer to the next character points to the second 16-bit + // value, not beyond it, as per Unicode 5.0.0 Chapter 3 C10, + // only the first code unit of an illegal sequence must be + // treated as an illegally terminated code unit sequence + // (also Chapter 3 D91, "isolated [not paired and ill-formed] + // UTF-16 code units in the range D800..DFFF are ill-formed"). + NS_WARNING("got a High Surrogate but no low surrogate"); + + if (aErr) { + *aErr = true; + } + *aBuffer = p - 1; + return 0xFFFD; + } + } else { // U+DC00 - U+DFFF + // DC00- DFFF - Low Surrogate + + // Found a low surrogate w/o a preceding high surrogate. Flag + // this as an error and return the Unicode replacement + // character 0xFFFD. + + NS_WARNING("got a low Surrogate but no high surrogate"); + if (aErr) { + *aErr = true; + } + *aBuffer = p; + return 0xFFFD; + } + + MOZ_ASSERT_UNREACHABLE("Impossible UCS-2 character value."); + } +}; + + +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for converting + * UTF-8 to UTF-16 + */ +class ConvertUTF8toUTF16 +{ +public: + typedef char value_type; + typedef char16_t buffer_type; + + explicit ConvertUTF8toUTF16(buffer_type* aBuffer) + : mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(false) + { + } + + size_t Length() const + { + return mBuffer - mStart; + } + + bool ErrorEncountered() const + { + return mErrorEncountered; + } + + void write(const value_type* aStart, uint32_t aN) + { + if (mErrorEncountered) { + return; + } + + // algorithm assumes utf8 units won't + // be spread across fragments + const value_type* p = aStart; + const value_type* end = aStart + aN; + buffer_type* out = mBuffer; + for (; p != end /* && *p */;) { + bool err; + uint32_t ucs4 = UTF8CharEnumerator::NextChar(&p, end, &err); + + if (err) { + mErrorEncountered = true; + mBuffer = out; + return; + } + + if (ucs4 >= PLANE1_BASE) { + *out++ = (buffer_type)H_SURROGATE(ucs4); + *out++ = (buffer_type)L_SURROGATE(ucs4); + } else { + *out++ = ucs4; + } + } + mBuffer = out; + } + + void write_terminator() + { + *mBuffer = buffer_type(0); + } + +private: + buffer_type* const mStart; + buffer_type* mBuffer; + bool mErrorEncountered; +}; + +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for computing + * the length of the UTF-16 string equivalent to a UTF-8 string. + */ +class CalculateUTF8Length +{ +public: + typedef char value_type; + + CalculateUTF8Length() + : mLength(0), mErrorEncountered(false) + { + } + + size_t Length() const + { + return mLength; + } + + void write(const value_type* aStart, uint32_t aN) + { + // ignore any further requests + if (mErrorEncountered) { + return; + } + + // algorithm assumes utf8 units won't + // be spread across fragments + const value_type* p = aStart; + const value_type* end = aStart + aN; + for (; p < end /* && *p */; ++mLength) { + if (UTF8traits::isASCII(*p)) { + p += 1; + } else if (UTF8traits::is2byte(*p)) { + p += 2; + } else if (UTF8traits::is3byte(*p)) { + p += 3; + } else if (UTF8traits::is4byte(*p)) { + // Because a UTF-8 sequence of 4 bytes represents a codepoint + // greater than 0xFFFF, it will become a surrogate pair in the + // UTF-16 string, so add 1 more to mLength. + // This doesn't happen with is5byte and is6byte because they + // are illegal UTF-8 sequences (greater than 0x10FFFF) so get + // converted to a single replacement character. + + // However, there is one case when a 4 byte UTF-8 sequence will + // only generate 2 UTF-16 bytes. If we have a properly encoded + // sequence, but with an invalid value (too small or too big), + // that will result in a replacement character being written + // This replacement character is encoded as just 1 single + // UTF-16 character, which is 2 bytes. + + // The below code therefore only adds 1 to mLength if the UTF8 + // data will produce a decoded character which is greater than + // or equal to 0x010000 and less than 0x0110000. + + // A 4byte UTF8 character is encoded as + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // Bit 1-3 on the first byte, and bit 5-6 on the second byte, + // map to bit 17-21 in the final result. If these bits are + // between 0x01 and 0x11, that means that the final result is + // between 0x010000 and 0x110000. The below code reads these + // bits out and assigns them to c, but shifted up 4 bits to + // avoid having to shift twice. + + // It doesn't matter what to do in the case where p + 4 > end + // since no UTF16 characters will be written in that case by + // ConvertUTF8toUTF16. Likewise it doesn't matter what we do if + // any of the surrogate bits are wrong since no UTF16 + // characters will be written in that case either. + + if (p + 4 <= end) { + uint32_t c = ((uint32_t)(p[0] & 0x07)) << 6 | + ((uint32_t)(p[1] & 0x30)); + if (c >= 0x010 && c < 0x110) { + ++mLength; + } + } + + p += 4; + } else if (UTF8traits::is5byte(*p)) { + p += 5; + } else if (UTF8traits::is6byte(*p)) { + p += 6; + } else { // error + ++mLength; // to account for the decrement below + break; + } + } + if (p != end) { + NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings."); + --mLength; // The last multi-byte char wasn't complete, discard it. + mErrorEncountered = true; + } + } + +private: + size_t mLength; + bool mErrorEncountered; +}; + +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for + * converting UTF-16 to UTF-8. Treats invalid UTF-16 data as 0xFFFD + * (0xEFBFBD in UTF-8). + */ +class ConvertUTF16toUTF8 +{ +public: + typedef char16_t value_type; + typedef char buffer_type; + + // The error handling here is more lenient than that in + // |ConvertUTF8toUTF16|, but it's that way for backwards + // compatibility. + + explicit ConvertUTF16toUTF8(buffer_type* aBuffer) + : mStart(aBuffer), mBuffer(aBuffer) + { + } + + size_t Size() const + { + return mBuffer - mStart; + } + + void write(const value_type* aStart, uint32_t aN) + { + buffer_type* out = mBuffer; // gcc isn't smart enough to do this! + + for (const value_type* p = aStart, *end = aStart + aN; p < end; ++p) { + value_type c = *p; + if (!(c & 0xFF80)) { // U+0000 - U+007F + *out++ = (char)c; + } else if (!(c & 0xF800)) { // U+0100 - U+07FF + *out++ = 0xC0 | (char)(c >> 6); + *out++ = 0x80 | (char)(0x003F & c); + } else if (!IS_SURROGATE(c)) { // U+0800 - U+D7FF,U+E000 - U+FFFF + *out++ = 0xE0 | (char)(c >> 12); + *out++ = 0x80 | (char)(0x003F & (c >> 6)); + *out++ = 0x80 | (char)(0x003F & c); + } else if (NS_IS_HIGH_SURROGATE(c)) { // U+D800 - U+DBFF + // D800- DBFF - High Surrogate + value_type h = c; + + ++p; + if (p == end) { + // Treat broken characters as the Unicode + // replacement character 0xFFFD (0xEFBFBD in + // UTF-8) + *out++ = '\xEF'; + *out++ = '\xBF'; + *out++ = '\xBD'; + + NS_WARNING("String ending in half a surrogate pair!"); + + break; + } + c = *p; + + if (NS_IS_LOW_SURROGATE(c)) { + // DC00- DFFF - Low Surrogate + // N = (H - D800) *400 + 10000 + ( L - DC00 ) + uint32_t ucs4 = SURROGATE_TO_UCS4(h, c); + + // 0001 0000-001F FFFF + *out++ = 0xF0 | (char)(ucs4 >> 18); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); + *out++ = 0x80 | (char)(0x003F & ucs4); + } else { + // Treat broken characters as the Unicode + // replacement character 0xFFFD (0xEFBFBD in + // UTF-8) + *out++ = '\xEF'; + *out++ = '\xBF'; + *out++ = '\xBD'; + + // The pointer to the next character points to the second + // 16-bit value, not beyond it, as per Unicode 5.0.0 + // Chapter 3 C10, only the first code unit of an illegal + // sequence must be treated as an illegally terminated + // code unit sequence (also Chapter 3 D91, "isolated [not + // paired and ill-formed] UTF-16 code units in the range + // D800..DFFF are ill-formed"). + p--; + + NS_WARNING("got a High Surrogate but no low surrogate"); + } + } else { // U+DC00 - U+DFFF + // Treat broken characters as the Unicode replacement + // character 0xFFFD (0xEFBFBD in UTF-8) + *out++ = '\xEF'; + *out++ = '\xBF'; + *out++ = '\xBD'; + + // DC00- DFFF - Low Surrogate + NS_WARNING("got a low Surrogate but no high surrogate"); + } + } + + mBuffer = out; + } + + void write_terminator() + { + *mBuffer = buffer_type(0); + } + +private: + buffer_type* const mStart; + buffer_type* mBuffer; +}; + +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for computing + * the number of bytes a UTF-16 would occupy in UTF-8. Treats invalid + * UTF-16 data as 0xFFFD (0xEFBFBD in UTF-8). + */ +class CalculateUTF8Size +{ +public: + typedef char16_t value_type; + + CalculateUTF8Size() + : mSize(0) + { + } + + size_t Size() const + { + return mSize; + } + + void write(const value_type* aStart, uint32_t aN) + { + // Assume UCS2 surrogate pairs won't be spread across fragments. + for (const value_type* p = aStart, *end = aStart + aN; p < end; ++p) { + value_type c = *p; + if (!(c & 0xFF80)) { // U+0000 - U+007F + mSize += 1; + } else if (!(c & 0xF800)) { // U+0100 - U+07FF + mSize += 2; + } else if (0xD800 != (0xF800 & c)) { // U+0800 - U+D7FF,U+E000 - U+FFFF + mSize += 3; + } else if (0xD800 == (0xFC00 & c)) { // U+D800 - U+DBFF + ++p; + if (p == end) { + // Treat broken characters as the Unicode + // replacement character 0xFFFD (0xEFBFBD in + // UTF-8) + mSize += 3; + + NS_WARNING("String ending in half a surrogate pair!"); + + break; + } + c = *p; + + if (0xDC00 == (0xFC00 & c)) { + mSize += 4; + } else { + // Treat broken characters as the Unicode + // replacement character 0xFFFD (0xEFBFBD in + // UTF-8) + mSize += 3; + + // The next code unit is the second 16-bit value, not + // the one beyond it, as per Unicode 5.0.0 Chapter 3 C10, + // only the first code unit of an illegal sequence must + // be treated as an illegally terminated code unit + // sequence (also Chapter 3 D91, "isolated [not paired and + // ill-formed] UTF-16 code units in the range D800..DFFF + // are ill-formed"). + p--; + + NS_WARNING("got a high Surrogate but no low surrogate"); + } + } else { // U+DC00 - U+DFFF + // Treat broken characters as the Unicode replacement + // character 0xFFFD (0xEFBFBD in UTF-8) + mSize += 3; + + NS_WARNING("got a low Surrogate but no high surrogate"); + } + } + } + +private: + size_t mSize; +}; + +#ifdef MOZILLA_INTERNAL_API +/** + * A character sink that performs a |reinterpret_cast|-style conversion + * from char to char16_t. + */ +class LossyConvertEncoding8to16 +{ +public: + typedef char value_type; + typedef char input_type; + typedef char16_t output_type; + +public: + explicit LossyConvertEncoding8to16(char16_t* aDestination) : + mDestination(aDestination) + { + } + + void + write(const char* aSource, uint32_t aSourceLength) + { +#ifdef MOZILLA_MAY_SUPPORT_SSE2 + if (mozilla::supports_sse2()) { + write_sse2(aSource, aSourceLength); + return; + } +#endif + const char* done_writing = aSource + aSourceLength; + while (aSource < done_writing) { + *mDestination++ = (char16_t)(unsigned char)(*aSource++); + } + } + + void + write_sse2(const char* aSource, uint32_t aSourceLength); + + void + write_terminator() + { + *mDestination = (char16_t)(0); + } + +private: + char16_t* mDestination; +}; + +/** + * A character sink that performs a |reinterpret_cast|-style conversion + * from char16_t to char. + */ +class LossyConvertEncoding16to8 +{ +public: + typedef char16_t value_type; + typedef char16_t input_type; + typedef char output_type; + + explicit LossyConvertEncoding16to8(char* aDestination) + : mDestination(aDestination) + { + } + + void + write(const char16_t* aSource, uint32_t aSourceLength) + { +#ifdef MOZILLA_MAY_SUPPORT_SSE2 + if (mozilla::supports_sse2()) { + write_sse2(aSource, aSourceLength); + return; + } +#endif + const char16_t* done_writing = aSource + aSourceLength; + while (aSource < done_writing) { + *mDestination++ = (char)(*aSource++); + } + } + +#ifdef MOZILLA_MAY_SUPPORT_SSE2 + void + write_sse2(const char16_t* aSource, uint32_t aSourceLength); +#endif + + void + write_terminator() + { + *mDestination = '\0'; + } + +private: + char* mDestination; +}; +#endif // MOZILLA_INTERNAL_API + + +template<typename Char, typename UnsignedT> +inline UnsignedT +RewindToPriorUTF8Codepoint(const Char* utf8Chars, UnsignedT index) +{ + static_assert(mozilla::IsSame<Char, char>::value || + mozilla::IsSame<Char, unsigned char>::value || + mozilla::IsSame<Char, signed char>::value, + "UTF-8 data must be in 8-bit units"); + static_assert(mozilla::IsUnsigned<UnsignedT>::value, "index type must be unsigned"); + while (index > 0 && (utf8Chars[index] & 0xC0) == 0x80) + --index; + + return index; +} + +#endif /* !defined(nsUTF8Utils_h_) */ diff --git a/xpcom/string/nsUTF8UtilsSSE2.cpp b/xpcom/string/nsUTF8UtilsSSE2.cpp new file mode 100644 index 000000000..daf2c56b0 --- /dev/null +++ b/xpcom/string/nsUTF8UtilsSSE2.cpp @@ -0,0 +1,105 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nscore.h" +#include "nsAlgorithm.h" +#include <emmintrin.h> +#include <nsUTF8Utils.h> + +void +LossyConvertEncoding16to8::write_sse2(const char16_t* aSource, + uint32_t aSourceLength) +{ + char* dest = mDestination; + + // Align source to a 16-byte boundary. + uint32_t i = 0; + uint32_t alignLen = + XPCOM_MIN<uint32_t>(aSourceLength, + uint32_t(-NS_PTR_TO_INT32(aSource) & 0xf) / sizeof(char16_t)); + for (; i < alignLen; ++i) { + dest[i] = static_cast<unsigned char>(aSource[i]); + } + + // Walk 64 bytes (four XMM registers) at a time. + __m128i vectmask = _mm_set1_epi16(0x00ff); + for (; aSourceLength - i > 31; i += 32) { + __m128i source1 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i)); + source1 = _mm_and_si128(source1, vectmask); + + __m128i source2 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 8)); + source2 = _mm_and_si128(source2, vectmask); + + __m128i source3 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 16)); + source3 = _mm_and_si128(source3, vectmask); + + __m128i source4 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 24)); + source4 = _mm_and_si128(source4, vectmask); + + + // Pack the source data. SSE2 views this as a saturating uint16_t to + // uint8_t conversion, but since we masked off the high-order byte of every + // uint16_t, we're really just grabbing the low-order bytes of source1 and + // source2. + __m128i packed1 = _mm_packus_epi16(source1, source2); + __m128i packed2 = _mm_packus_epi16(source3, source4); + + // This store needs to be unaligned since there's no guarantee that the + // alignment we did above for the source will align the destination. + _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i), packed1); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 16), packed2); + } + + // Finish up the rest. + for (; i < aSourceLength; ++i) { + dest[i] = static_cast<unsigned char>(aSource[i]); + } + + mDestination += i; +} + +void +LossyConvertEncoding8to16::write_sse2(const char* aSource, + uint32_t aSourceLength) +{ + char16_t* dest = mDestination; + + // Align source to a 16-byte boundary. We choose to align source rather than + // dest because we'd rather have our loads than our stores be fast. You have + // to wait for a load to complete, but you can keep on moving after issuing a + // store. + uint32_t i = 0; + uint32_t alignLen = XPCOM_MIN(aSourceLength, + uint32_t(-NS_PTR_TO_INT32(aSource) & 0xf)); + for (; i < alignLen; ++i) { + dest[i] = static_cast<unsigned char>(aSource[i]); + } + + // Walk 32 bytes (two XMM registers) at a time. + for (; aSourceLength - i > 31; i += 32) { + __m128i source1 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i)); + __m128i source2 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 16)); + + // Interleave 0s in with the bytes of source to create lo and hi. + __m128i lo1 = _mm_unpacklo_epi8(source1, _mm_setzero_si128()); + __m128i hi1 = _mm_unpackhi_epi8(source1, _mm_setzero_si128()); + __m128i lo2 = _mm_unpacklo_epi8(source2, _mm_setzero_si128()); + __m128i hi2 = _mm_unpackhi_epi8(source2, _mm_setzero_si128()); + + // store lo and hi into dest. + _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i), lo1); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 8), hi1); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 16), lo2); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 24), hi2); + } + + // Finish up whatever's left. + for (; i < aSourceLength; ++i) { + dest[i] = static_cast<unsigned char>(aSource[i]); + } + + mDestination += i; +} diff --git a/xpcom/string/nsXPCOMStrings.h b/xpcom/string/nsXPCOMStrings.h new file mode 100644 index 000000000..493e092d6 --- /dev/null +++ b/xpcom/string/nsXPCOMStrings.h @@ -0,0 +1,748 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsXPCOMStrings_h__ +#define nsXPCOMStrings_h__ + +#include <string.h> +#include "nscore.h" +#include <limits> + +/** + * nsXPCOMStrings.h + * + * This file describes a minimal API for working with XPCOM's abstract + * string classes. It divorces the consumer from having any run-time + * dependency on the implementation details of the abstract string types. + */ + +#include "nscore.h" + +/* The base string types */ +class nsAString; +class nsACString; + +/* ------------------------------------------------------------------------- */ + +/** + * nsStringContainer + * + * This is an opaque data type that is large enough to hold the canonical + * implementation of nsAString. The binary structure of this class is an + * implementation detail. + * + * The string data stored in a string container is always single fragment + * and may be null-terminated depending on how it is initialized. + * + * Typically, string containers are allocated on the stack for temporary + * use. However, they can also be malloc'd if necessary. In either case, + * a string container is not useful until it has been initialized with a + * call to NS_StringContainerInit. The following example shows how to use + * a string container to call a function that takes a |nsAString &| out-param. + * + * nsresult GetBlah(nsAString &aBlah); + * + * nsresult MyCode() + * { + * nsresult rv; + * + * nsStringContainer sc; + * rv = NS_StringContainerInit(sc); + * if (NS_FAILED(rv)) + * return rv; + * + * rv = GetBlah(sc); + * if (NS_SUCCEEDED(rv)) + * { + * const char16_t *data; + * NS_StringGetData(sc, &data); + * // + * // |data| now points to the result of the GetBlah function + * // + * } + * + * NS_StringContainerFinish(sc); + * return rv; + * } + * + * The following example show how to use a string container to pass a string + * parameter to a function taking a |const nsAString &| in-param. + * + * nsresult SetBlah(const nsAString &aBlah); + * + * nsresult MyCode() + * { + * nsresult rv; + * + * nsStringContainer sc; + * rv = NS_StringContainerInit(sc); + * if (NS_FAILED(rv)) + * return rv; + * + * const char16_t kData[] = {'x','y','z','\0'}; + * rv = NS_StringSetData(sc, kData, sizeof(kData)/2 - 1); + * if (NS_SUCCEEDED(rv)) + * rv = SetBlah(sc); + * + * NS_StringContainerFinish(sc); + * return rv; + * } + */ +class nsStringContainer; + + +/** + * This struct is never used directly. It is designed to have the same + * size as nsString. It can be stack and heap allocated and the internal + * functions cast it to nsString. + * While this practice is a strict aliasing violation, it doesn't seem to + * cause problems since the the struct is only accessed via the casts to + * nsString. + * We use protected instead of private to avoid compiler warnings about + * the members being unused. + */ +struct nsStringContainer_base +{ +protected: + void* d1; + uint32_t d2; + uint32_t d3; +}; + +/** + * Flags that may be OR'd together to pass to NS_StringContainerInit2: + */ +enum +{ + /* Data passed into NS_StringContainerInit2 is not copied; instead, the + * string references the passed in data pointer directly. The caller must + * ensure that the data is valid for the lifetime of the string container. + * This flag should not be combined with NS_STRING_CONTAINER_INIT_ADOPT. */ + NS_STRING_CONTAINER_INIT_DEPEND = (1 << 1), + + /* Data passed into NS_StringContainerInit2 is not copied; instead, the + * string takes ownership over the data pointer. The caller must have + * allocated the data array using the XPCOM memory allocator (nsMemory). + * This flag should not be combined with NS_STRING_CONTAINER_INIT_DEPEND. */ + NS_STRING_CONTAINER_INIT_ADOPT = (1 << 2), + + /* Data passed into NS_StringContainerInit2 is a substring that is not + * null-terminated. */ + NS_STRING_CONTAINER_INIT_SUBSTRING = (1 << 3) +}; + +/** + * NS_StringContainerInit + * + * @param aContainer string container reference + * @return NS_OK if string container successfully initialized + * + * This function may allocate additional memory for aContainer. When + * aContainer is no longer needed, NS_StringContainerFinish should be called. + */ +XPCOM_API(nsresult) NS_StringContainerInit(nsStringContainer& aContainer); + +/** + * NS_StringContainerInit2 + * + * @param aContainer string container reference + * @param aData character buffer (may be null) + * @param aDataLength number of characters stored at aData (may pass + * UINT32_MAX if aData is null-terminated) + * @param aFlags flags affecting how the string container is + * initialized. this parameter is ignored when aData + * is null. otherwise, if this parameter is 0, then + * aData is copied into the string. + * + * This function resembles NS_StringContainerInit but provides further + * options that permit more efficient memory usage. When aContainer is + * no longer needed, NS_StringContainerFinish should be called. + * + * NOTE: NS_StringContainerInit2(container, nullptr, 0, 0) is equivalent to + * NS_StringContainerInit(container). + */ +XPCOM_API(nsresult) NS_StringContainerInit2(nsStringContainer& aContainer, + const char16_t* aData = nullptr, + uint32_t aDataLength = UINT32_MAX, + uint32_t aFlags = 0); + +/** + * NS_StringContainerFinish + * + * @param aContainer string container reference + * + * This function frees any memory owned by aContainer. + */ +XPCOM_API(void) NS_StringContainerFinish(nsStringContainer& aContainer); + +/* ------------------------------------------------------------------------- */ + +/** + * NS_StringGetData + * + * This function returns a const character pointer to the string's internal + * buffer, the length of the string, and a boolean value indicating whether + * or not the buffer is null-terminated. + * + * @param aStr abstract string reference + * @param aData out param that will hold the address of aStr's + * internal buffer + * @param aTerminated if non-null, this out param will be set to indicate + * whether or not aStr's internal buffer is null- + * terminated + * @return length of aStr's internal buffer + */ +XPCOM_API(uint32_t) NS_StringGetData(const nsAString& aStr, + const char16_t** aData, + bool* aTerminated = nullptr); + +/** + * NS_StringGetMutableData + * + * This function provides mutable access to a string's internal buffer. It + * returns a pointer to an array of characters that may be modified. The + * returned pointer remains valid until the string object is passed to some + * other string function. + * + * Optionally, this function may be used to resize the string's internal + * buffer. The aDataLength parameter specifies the requested length of the + * string's internal buffer. By passing some value other than UINT32_MAX, + * the caller can request that the buffer be resized to the specified number of + * characters before returning. The caller is not responsible for writing a + * null-terminator. + * + * @param aStr abstract string reference + * @param aDataLength number of characters to resize the string's internal + * buffer to or UINT32_MAX if no resizing is needed + * @param aData out param that upon return holds the address of aStr's + * internal buffer or null if the function failed + * @return number of characters or zero if the function failed + * + * This function does not necessarily null-terminate aStr after resizing its + * internal buffer. The behavior depends on the implementation of the abstract + * string, aStr. If aStr is a reference to a nsStringContainer, then its data + * will be null-terminated by this function. + */ +XPCOM_API(uint32_t) NS_StringGetMutableData(nsAString& aStr, + uint32_t aDataLength, + char16_t** aData); + +/** + * NS_StringCloneData + * + * This function returns a null-terminated copy of the string's + * internal buffer. + * + * @param aStr abstract string reference + * @return null-terminated copy of the string's internal buffer + * (it must be free'd using using free) + */ +XPCOM_API(char16_t*) NS_StringCloneData(const nsAString& aStr); + +/** + * NS_StringSetData + * + * This function copies aData into aStr. + * + * @param aStr abstract string reference + * @param aData character buffer + * @param aDataLength number of characters to copy from source string (pass + * UINT32_MAX to copy until end of aData, designated by + * a null character) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr after copying data + * from aData. The behavior depends on the implementation of the abstract + * string, aStr. If aStr is a reference to a nsStringContainer, then its data + * will be null-terminated by this function. + */ +XPCOM_API(nsresult) NS_StringSetData(nsAString& aStr, const char16_t* aData, + uint32_t aDataLength = UINT32_MAX); + +/** + * NS_StringSetDataRange + * + * This function copies aData into a section of aStr. As a result it can be + * used to insert new characters into the string. + * + * @param aStr abstract string reference + * @param aCutOffset starting index where the string's existing data + * is to be overwritten (pass UINT32_MAX to cause + * aData to be appended to the end of aStr, in which + * case the value of aCutLength is ignored). + * @param aCutLength number of characters to overwrite starting at + * aCutOffset (pass UINT32_MAX to overwrite until the + * end of aStr). + * @param aData character buffer (pass null to cause this function + * to simply remove the "cut" range) + * @param aDataLength number of characters to copy from source string (pass + * UINT32_MAX to copy until end of aData, designated by + * a null character) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr after copying data + * from aData. The behavior depends on the implementation of the abstract + * string, aStr. If aStr is a reference to a nsStringContainer, then its data + * will be null-terminated by this function. + */ +XPCOM_API(nsresult) NS_StringSetDataRange(nsAString& aStr, + uint32_t aCutOffset, uint32_t aCutLength, + const char16_t* aData, + uint32_t aDataLength = UINT32_MAX); + +/** + * NS_StringCopy + * + * This function makes aDestStr have the same value as aSrcStr. It is + * provided as an optimization. + * + * @param aDestStr abstract string reference to be modified + * @param aSrcStr abstract string reference containing source string + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aDestStr after copying + * data from aSrcStr. The behavior depends on the implementation of the + * abstract string, aDestStr. If aDestStr is a reference to a + * nsStringContainer, then its data will be null-terminated by this function. + */ +XPCOM_API(nsresult) NS_StringCopy(nsAString& aDestStr, + const nsAString& aSrcStr); + +/** + * NS_StringAppendData + * + * This function appends data to the existing value of aStr. + * + * @param aStr abstract string reference to be modified + * @param aData character buffer + * @param aDataLength number of characters to append (pass UINT32_MAX to + * append until a null-character is encountered) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr upon completion. + * The behavior depends on the implementation of the abstract string, aStr. + * If aStr is a reference to a nsStringContainer, then its data will be null- + * terminated by this function. + */ +inline NS_HIDDEN_(nsresult) +NS_StringAppendData(nsAString& aStr, const char16_t* aData, + uint32_t aDataLength = UINT32_MAX) +{ + return NS_StringSetDataRange(aStr, UINT32_MAX, 0, aData, aDataLength); +} + +/** + * NS_StringInsertData + * + * This function inserts data into the existing value of aStr at the specified + * offset. + * + * @param aStr abstract string reference to be modified + * @param aOffset specifies where in the string to insert aData + * @param aData character buffer + * @param aDataLength number of characters to append (pass UINT32_MAX to + * append until a null-character is encountered) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr upon completion. + * The behavior depends on the implementation of the abstract string, aStr. + * If aStr is a reference to a nsStringContainer, then its data will be null- + * terminated by this function. + */ +inline NS_HIDDEN_(nsresult) +NS_StringInsertData(nsAString& aStr, uint32_t aOffset, const char16_t* aData, + uint32_t aDataLength = UINT32_MAX) +{ + return NS_StringSetDataRange(aStr, aOffset, 0, aData, aDataLength); +} + +/** + * NS_StringCutData + * + * This function shortens the existing value of aStr, by removing characters + * at the specified offset. + * + * @param aStr abstract string reference to be modified + * @param aCutOffset specifies where in the string to insert aData + * @param aCutLength number of characters to remove + * @return NS_OK if function succeeded + */ +inline NS_HIDDEN_(nsresult) +NS_StringCutData(nsAString& aStr, uint32_t aCutOffset, uint32_t aCutLength) +{ + return NS_StringSetDataRange(aStr, aCutOffset, aCutLength, nullptr, 0); +} + +/** + * NS_StringSetIsVoid + * + * This function marks a string as being a "void string". Any data in the + * string will be lost. + */ +XPCOM_API(void) NS_StringSetIsVoid(nsAString& aStr, const bool aIsVoid); + +/** + * NS_StringGetIsVoid + * + * This function provides a way to test if a string is a "void string", as + * marked by NS_StringSetIsVoid. + */ +XPCOM_API(bool) NS_StringGetIsVoid(const nsAString& aStr); + +/* ------------------------------------------------------------------------- */ + +/** + * nsCStringContainer + * + * This is an opaque data type that is large enough to hold the canonical + * implementation of nsACString. The binary structure of this class is an + * implementation detail. + * + * The string data stored in a string container is always single fragment + * and may be null-terminated depending on how it is initialized. + * + * @see nsStringContainer for use cases and further documentation. + */ +class nsCStringContainer; + +/** + * Flags that may be OR'd together to pass to NS_StringContainerInit2: + */ +enum +{ + /* Data passed into NS_CStringContainerInit2 is not copied; instead, the + * string references the passed in data pointer directly. The caller must + * ensure that the data is valid for the lifetime of the string container. + * This flag should not be combined with NS_CSTRING_CONTAINER_INIT_ADOPT. */ + NS_CSTRING_CONTAINER_INIT_DEPEND = (1 << 1), + + /* Data passed into NS_CStringContainerInit2 is not copied; instead, the + * string takes ownership over the data pointer. The caller must have + * allocated the data array using the XPCOM memory allocator (nsMemory). + * This flag should not be combined with NS_CSTRING_CONTAINER_INIT_DEPEND. */ + NS_CSTRING_CONTAINER_INIT_ADOPT = (1 << 2), + + /* Data passed into NS_CStringContainerInit2 is a substring that is not + * null-terminated. */ + NS_CSTRING_CONTAINER_INIT_SUBSTRING = (1 << 3) +}; + +/** + * NS_CStringContainerInit + * + * @param aContainer string container reference + * @return NS_OK if string container successfully initialized + * + * This function may allocate additional memory for aContainer. When + * aContainer is no longer needed, NS_CStringContainerFinish should be called. + */ +XPCOM_API(nsresult) NS_CStringContainerInit(nsCStringContainer& aContainer); + +/** + * NS_CStringContainerInit2 + * + * @param aContainer string container reference + * @param aData character buffer (may be null) + * @param aDataLength number of characters stored at aData (may pass + * UINT32_MAX if aData is null-terminated) + * @param aFlags flags affecting how the string container is + * initialized. this parameter is ignored when aData + * is null. otherwise, if this parameter is 0, then + * aData is copied into the string. + * + * This function resembles NS_CStringContainerInit but provides further + * options that permit more efficient memory usage. When aContainer is + * no longer needed, NS_CStringContainerFinish should be called. + * + * NOTE: NS_CStringContainerInit2(container, nullptr, 0, 0) is equivalent to + * NS_CStringContainerInit(container). + */ +XPCOM_API(nsresult) NS_CStringContainerInit2(nsCStringContainer& aContainer, + const char* aData = nullptr, + uint32_t aDataLength = UINT32_MAX, + uint32_t aFlags = 0); + +/** + * NS_CStringContainerFinish + * + * @param aContainer string container reference + * + * This function frees any memory owned by aContainer. + */ +XPCOM_API(void) NS_CStringContainerFinish(nsCStringContainer& aContainer); + +/* ------------------------------------------------------------------------- */ + +/** + * NS_CStringGetData + * + * This function returns a const character pointer to the string's internal + * buffer, the length of the string, and a boolean value indicating whether + * or not the buffer is null-terminated. + * + * @param aStr abstract string reference + * @param aData out param that will hold the address of aStr's + * internal buffer + * @param aTerminated if non-null, this out param will be set to indicate + * whether or not aStr's internal buffer is null- + * terminated + * @return length of aStr's internal buffer + */ +XPCOM_API(uint32_t) NS_CStringGetData(const nsACString& aStr, + const char** aData, + bool* aTerminated = nullptr); + +/** + * NS_CStringGetMutableData + * + * This function provides mutable access to a string's internal buffer. It + * returns a pointer to an array of characters that may be modified. The + * returned pointer remains valid until the string object is passed to some + * other string function. + * + * Optionally, this function may be used to resize the string's internal + * buffer. The aDataLength parameter specifies the requested length of the + * string's internal buffer. By passing some value other than UINT32_MAX, + * the caller can request that the buffer be resized to the specified number of + * characters before returning. The caller is not responsible for writing a + * null-terminator. + * + * @param aStr abstract string reference + * @param aDataLength number of characters to resize the string's internal + * buffer to or UINT32_MAX if no resizing is needed + * @param aData out param that upon return holds the address of aStr's + * internal buffer or null if the function failed + * @return number of characters or zero if the function failed + * + * This function does not necessarily null-terminate aStr after resizing its + * internal buffer. The behavior depends on the implementation of the abstract + * string, aStr. If aStr is a reference to a nsStringContainer, then its data + * will be null-terminated by this function. + */ +XPCOM_API(uint32_t) NS_CStringGetMutableData(nsACString& aStr, + uint32_t aDataLength, + char** aData); + +/** + * NS_CStringCloneData + * + * This function returns a null-terminated copy of the string's + * internal buffer. + * + * @param aStr abstract string reference + * @return null-terminated copy of the string's internal buffer + * (it must be free'd using using free) + */ +XPCOM_API(char*) NS_CStringCloneData(const nsACString& aStr); + +/** + * NS_CStringSetData + * + * This function copies aData into aStr. + * + * @param aStr abstract string reference + * @param aData character buffer + * @param aDataLength number of characters to copy from source string (pass + * UINT32_MAX to copy until end of aData, designated by + * a null character) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr after copying data + * from aData. The behavior depends on the implementation of the abstract + * string, aStr. If aStr is a reference to a nsStringContainer, then its data + * will be null-terminated by this function. + */ +XPCOM_API(nsresult) NS_CStringSetData(nsACString& aStr, const char* aData, + uint32_t aDataLength = UINT32_MAX); + +/** + * NS_CStringSetDataRange + * + * This function copies aData into a section of aStr. As a result it can be + * used to insert new characters into the string. + * + * @param aStr abstract string reference + * @param aCutOffset starting index where the string's existing data + * is to be overwritten (pass UINT32_MAX to cause + * aData to be appended to the end of aStr, in which + * case the value of aCutLength is ignored). + * @param aCutLength number of characters to overwrite starting at + * aCutOffset (pass UINT32_MAX to overwrite until the + * end of aStr). + * @param aData character buffer (pass null to cause this function + * to simply remove the "cut" range) + * @param aDataLength number of characters to copy from source string (pass + * UINT32_MAX to copy until end of aData, designated by + * a null character) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr after copying data + * from aData. The behavior depends on the implementation of the abstract + * string, aStr. If aStr is a reference to a nsStringContainer, then its data + * will be null-terminated by this function. + */ +XPCOM_API(nsresult) NS_CStringSetDataRange(nsACString& aStr, + uint32_t aCutOffset, + uint32_t aCutLength, + const char* aData, + uint32_t aDataLength = UINT32_MAX); + +/** + * NS_CStringCopy + * + * This function makes aDestStr have the same value as aSrcStr. It is + * provided as an optimization. + * + * @param aDestStr abstract string reference to be modified + * @param aSrcStr abstract string reference containing source string + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aDestStr after copying + * data from aSrcStr. The behavior depends on the implementation of the + * abstract string, aDestStr. If aDestStr is a reference to a + * nsStringContainer, then its data will be null-terminated by this function. + */ +XPCOM_API(nsresult) NS_CStringCopy(nsACString& aDestStr, + const nsACString& aSrcStr); + +/** + * NS_CStringAppendData + * + * This function appends data to the existing value of aStr. + * + * @param aStr abstract string reference to be modified + * @param aData character buffer + * @param aDataLength number of characters to append (pass UINT32_MAX to + * append until a null-character is encountered) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr upon completion. + * The behavior depends on the implementation of the abstract string, aStr. + * If aStr is a reference to a nsStringContainer, then its data will be null- + * terminated by this function. + */ +inline NS_HIDDEN_(nsresult) +NS_CStringAppendData(nsACString& aStr, const char* aData, + uint32_t aDataLength = UINT32_MAX) +{ + return NS_CStringSetDataRange(aStr, UINT32_MAX, 0, aData, aDataLength); +} + +/** + * NS_CStringInsertData + * + * This function inserts data into the existing value of aStr at the specified + * offset. + * + * @param aStr abstract string reference to be modified + * @param aOffset specifies where in the string to insert aData + * @param aData character buffer + * @param aDataLength number of characters to append (pass UINT32_MAX to + * append until a null-character is encountered) + * @return NS_OK if function succeeded + * + * This function does not necessarily null-terminate aStr upon completion. + * The behavior depends on the implementation of the abstract string, aStr. + * If aStr is a reference to a nsStringContainer, then its data will be null- + * terminated by this function. + */ +inline NS_HIDDEN_(nsresult) +NS_CStringInsertData(nsACString& aStr, uint32_t aOffset, const char* aData, + uint32_t aDataLength = UINT32_MAX) +{ + return NS_CStringSetDataRange(aStr, aOffset, 0, aData, aDataLength); +} + +/** + * NS_CStringCutData + * + * This function shortens the existing value of aStr, by removing characters + * at the specified offset. + * + * @param aStr abstract string reference to be modified + * @param aCutOffset specifies where in the string to insert aData + * @param aCutLength number of characters to remove + * @return NS_OK if function succeeded + */ +inline NS_HIDDEN_(nsresult) +NS_CStringCutData(nsACString& aStr, uint32_t aCutOffset, uint32_t aCutLength) +{ + return NS_CStringSetDataRange(aStr, aCutOffset, aCutLength, nullptr, 0); +} + +/** + * NS_CStringSetIsVoid + * + * This function marks a string as being a "void string". Any data in the + * string will be lost. + */ +XPCOM_API(void) NS_CStringSetIsVoid(nsACString& aStr, const bool aIsVoid); + +/** + * NS_CStringGetIsVoid + * + * This function provides a way to test if a string is a "void string", as + * marked by NS_CStringSetIsVoid. + */ +XPCOM_API(bool) NS_CStringGetIsVoid(const nsACString& aStr); + +/* ------------------------------------------------------------------------- */ + +/** + * Encodings that can be used with the following conversion routines. + */ +enum nsCStringEncoding +{ + /* Conversion between ASCII and UTF-16 assumes that all bytes in the source + * string are 7-bit ASCII and can be inflated to UTF-16 by inserting null + * bytes. Reverse conversion is done by truncating every other byte. The + * conversion may result in loss and/or corruption of information if the + * strings do not strictly contain ASCII data. */ + NS_CSTRING_ENCODING_ASCII = 0, + + /* Conversion between UTF-8 and UTF-16 is non-lossy. */ + NS_CSTRING_ENCODING_UTF8 = 1, + + /* Conversion from UTF-16 to the native filesystem charset may result in a + * loss of information. No attempt is made to protect against data loss in + * this case. The native filesystem charset applies to strings passed to + * the "Native" method variants on nsIFile. */ + NS_CSTRING_ENCODING_NATIVE_FILESYSTEM = 2 +}; + +/** + * NS_CStringToUTF16 + * + * This function converts the characters in a nsACString to an array of UTF-16 + * characters, in the platform endianness. The result is stored in a nsAString + * object. + * + * @param aSource abstract string reference containing source string + * @param aSrcEncoding character encoding of the source string + * @param aDest abstract string reference to hold the result + */ +XPCOM_API(nsresult) NS_CStringToUTF16(const nsACString& aSource, + nsCStringEncoding aSrcEncoding, + nsAString& aDest); + +/** + * NS_UTF16ToCString + * + * This function converts the UTF-16 characters in a nsAString to a single-byte + * encoding. The result is stored in a nsACString object. In some cases this + * conversion may be lossy. In such cases, the conversion may succeed with a + * return code indicating loss of information. The exact behavior is not + * specified at this time. + * + * @param aSource abstract string reference containing source string + * @param aDestEncoding character encoding of the resulting string + * @param aDest abstract string reference to hold the result + */ +XPCOM_API(nsresult) NS_UTF16ToCString(const nsAString& aSource, + nsCStringEncoding aDestEncoding, + nsACString& aDest); + +#endif // nsXPCOMStrings_h__ diff --git a/xpcom/string/nsXPIDLString.h b/xpcom/string/nsXPIDLString.h new file mode 100644 index 000000000..f5821cdb0 --- /dev/null +++ b/xpcom/string/nsXPIDLString.h @@ -0,0 +1,12 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsXPIDLString_h___ +#define nsXPIDLString_h___ + +#include "nsString.h" + +#endif /* !defined(nsXPIDLString_h___) */ diff --git a/xpcom/string/string-template-def-char.h b/xpcom/string/string-template-def-char.h new file mode 100644 index 000000000..82f70d0fb --- /dev/null +++ b/xpcom/string/string-template-def-char.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#define CharT char +#define CharT_is_char 1 +#define nsTAString_IncompatibleCharT nsAString +#define nsTString_CharT nsCString +#define nsTFixedString_CharT nsFixedCString +#define nsTAutoString_CharT nsAutoCString +#define nsTSubstring_CharT nsACString +#define nsTSubstringTuple_CharT nsCSubstringTuple +#define nsTStringComparator_CharT nsCStringComparator +#define nsTDefaultStringComparator_CharT nsDefaultCStringComparator +#define nsTDependentString_CharT nsDependentCString +#define nsTDependentSubstring_CharT nsDependentCSubstring +#define nsTLiteralString_CharT nsLiteralCString +#define nsTXPIDLString_CharT nsXPIDLCString +#define nsTGetterCopies_CharT nsCGetterCopies +#define nsTAdoptingString_CharT nsAdoptingCString +#define nsTPromiseFlatString_CharT nsPromiseFlatCString +#define TPromiseFlatString_CharT PromiseFlatCString diff --git a/xpcom/string/string-template-def-unichar.h b/xpcom/string/string-template-def-unichar.h new file mode 100644 index 000000000..a21e16d09 --- /dev/null +++ b/xpcom/string/string-template-def-unichar.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#define CharT char16_t +#define CharT_is_PRUnichar 1 +#define nsTAString_IncompatibleCharT nsACString +#define nsTString_CharT nsString +#define nsTFixedString_CharT nsFixedString +#define nsTAutoString_CharT nsAutoString +#define nsTSubstring_CharT nsAString +#define nsTSubstringTuple_CharT nsSubstringTuple +#define nsTStringComparator_CharT nsStringComparator +#define nsTDefaultStringComparator_CharT nsDefaultStringComparator +#define nsTDependentString_CharT nsDependentString +#define nsTDependentSubstring_CharT nsDependentSubstring +#define nsTLiteralString_CharT nsLiteralString +#define nsTXPIDLString_CharT nsXPIDLString +#define nsTGetterCopies_CharT nsGetterCopies +#define nsTAdoptingString_CharT nsAdoptingString +#define nsTPromiseFlatString_CharT nsPromiseFlatString +#define TPromiseFlatString_CharT PromiseFlatString diff --git a/xpcom/string/string-template-undef.h b/xpcom/string/string-template-undef.h new file mode 100644 index 000000000..d62cd2278 --- /dev/null +++ b/xpcom/string/string-template-undef.h @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "nsString.h" + +#undef CharT +#undef CharT_is_PRUnichar +#undef CharT_is_char +#undef nsTAString_IncompatibleCharT +#undef nsTString_CharT +#undef nsTFixedString_CharT +#undef nsTAutoString_CharT +#undef nsTSubstring_CharT +#undef nsTSubstringTuple_CharT +#undef nsTStringComparator_CharT +#undef nsTDefaultStringComparator_CharT +#undef nsTDependentString_CharT +#undef nsTDependentSubstring_CharT +#undef nsTLiteralString_CharT +#undef nsTXPIDLString_CharT +#undef nsTGetterCopies_CharT +#undef nsTAdoptingString_CharT +#undef nsTPromiseFlatString_CharT +#undef TPromiseFlatString_CharT |