summaryrefslogtreecommitdiffstats
path: root/xpcom/string/nsReadableUtils.cpp
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /xpcom/string/nsReadableUtils.cpp
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'xpcom/string/nsReadableUtils.cpp')
-rw-r--r--xpcom/string/nsReadableUtils.cpp1383
1 files changed, 1383 insertions, 0 deletions
diff --git a/xpcom/string/nsReadableUtils.cpp b/xpcom/string/nsReadableUtils.cpp
new file mode 100644
index 000000000..524b1d7fe
--- /dev/null
+++ b/xpcom/string/nsReadableUtils.cpp
@@ -0,0 +1,1383 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsReadableUtils.h"
+#include "nsReadableUtilsImpl.h"
+
+#include <algorithm>
+
+#include "mozilla/CheckedInt.h"
+
+#include "nscore.h"
+#include "nsMemory.h"
+#include "nsString.h"
+#include "nsTArray.h"
+#include "nsUTF8Utils.h"
+
+using mozilla::IsASCII;
+
+/**
+ * Fallback implementation for finding the first non-ASCII character in a
+ * UTF-16 string.
+ */
+static inline int32_t
+FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
+{
+ typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
+ const size_t kMask = p::mask();
+ const uintptr_t kAlignMask = p::alignMask();
+ const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
+
+ const char16_t* idx = aBegin;
+
+ // Align ourselves to a word boundary.
+ for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
+ if (!IsASCII(*idx)) {
+ return idx - aBegin;
+ }
+ }
+
+ // Check one word at a time.
+ const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
+ for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
+ const size_t word = *reinterpret_cast<const size_t*>(idx);
+ if (word & kMask) {
+ return idx - aBegin;
+ }
+ }
+
+ // Take care of the remainder one character at a time.
+ for (; idx != aEnd; idx++) {
+ if (!IsASCII(*idx)) {
+ return idx - aBegin;
+ }
+ }
+
+ return -1;
+}
+
+/*
+ * This function returns -1 if all characters in str are ASCII characters.
+ * Otherwise, it returns a value less than or equal to the index of the first
+ * ASCII character in str. For example, if first non-ASCII character is at
+ * position 25, it may return 25, 24, or 16. But it guarantees
+ * there are only ASCII characters before returned value.
+ */
+static inline int32_t
+FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
+{
+#ifdef MOZILLA_MAY_SUPPORT_SSE2
+ if (mozilla::supports_sse2()) {
+ return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
+ }
+#endif
+
+ return FirstNonASCIIUnvectorized(aBegin, aEnd);
+}
+
+void
+LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
+{
+ aDest.Truncate();
+ LossyAppendUTF16toASCII(aSource, aDest);
+}
+
+void
+CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
+{
+ aDest.Truncate();
+ AppendASCIItoUTF16(aSource, aDest);
+}
+
+void
+LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
+{
+ aDest.Truncate();
+ if (aSource) {
+ LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
+ }
+}
+
+void
+CopyASCIItoUTF16(const char* aSource, nsAString& aDest)
+{
+ aDest.Truncate();
+ if (aSource) {
+ AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
+ }
+}
+
+void
+CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
+{
+ if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
+ // Note that this may wildly underestimate the allocation that failed, as
+ // we report the length of aSource as UTF-16 instead of UTF-8.
+ aDest.AllocFailed(aDest.Length() + aSource.Length());
+ }
+}
+
+bool
+CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
+ const mozilla::fallible_t& aFallible)
+{
+ aDest.Truncate();
+ if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) {
+ return false;
+ }
+ return true;
+}
+
+void
+CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
+{
+ aDest.Truncate();
+ AppendUTF8toUTF16(aSource, aDest);
+}
+
+void
+CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
+{
+ aDest.Truncate();
+ AppendUTF16toUTF8(aSource, aDest);
+}
+
+void
+CopyUTF8toUTF16(const char* aSource, nsAString& aDest)
+{
+ aDest.Truncate();
+ AppendUTF8toUTF16(aSource, aDest);
+}
+
+void
+LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest)
+{
+ uint32_t old_dest_length = aDest.Length();
+ aDest.SetLength(old_dest_length + aSource.Length());
+
+ nsAString::const_iterator fromBegin, fromEnd;
+
+ nsACString::iterator dest;
+ aDest.BeginWriting(dest);
+
+ dest.advance(old_dest_length);
+
+ // right now, this won't work on multi-fragment destinations
+ LossyConvertEncoding16to8 converter(dest.get());
+
+ copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ converter);
+}
+
+void
+AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
+{
+ if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) {
+ aDest.AllocFailed(aDest.Length() + aSource.Length());
+ }
+}
+
+bool
+AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
+ const mozilla::fallible_t& aFallible)
+{
+ uint32_t old_dest_length = aDest.Length();
+ if (!aDest.SetLength(old_dest_length + aSource.Length(),
+ aFallible)) {
+ return false;
+ }
+
+ nsACString::const_iterator fromBegin, fromEnd;
+
+ nsAString::iterator dest;
+ aDest.BeginWriting(dest);
+
+ dest.advance(old_dest_length);
+
+ // right now, this won't work on multi-fragment destinations
+ LossyConvertEncoding8to16 converter(dest.get());
+
+ copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ converter);
+ return true;
+}
+
+void
+LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
+{
+ if (aSource) {
+ LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
+ }
+}
+
+bool
+AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible)
+{
+ if (aSource) {
+ return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible);
+ }
+
+ return true;
+}
+
+void
+AppendASCIItoUTF16(const char* aSource, nsAString& aDest)
+{
+ if (aSource) {
+ AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
+ }
+}
+
+void
+AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
+{
+ if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
+ // Note that this may wildly underestimate the allocation that failed, as
+ // we report the length of aSource as UTF-16 instead of UTF-8.
+ aDest.AllocFailed(aDest.Length() + aSource.Length());
+ }
+}
+
+bool
+AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
+ const mozilla::fallible_t& aFallible)
+{
+ // At 16 characters analysis showed better performance of both the all ASCII
+ // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
+ // that length.
+ const nsAString::size_type kFastPathMinLength = 16;
+
+ int32_t firstNonASCII = 0;
+ if (aSource.Length() >= kFastPathMinLength) {
+ firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
+ }
+
+ if (firstNonASCII == -1) {
+ // This is all ASCII, we can use the more efficient lossy append.
+ mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
+ new_length += aDest.Length();
+
+ if (!new_length.isValid() ||
+ !aDest.SetCapacity(new_length.value(), aFallible)) {
+ return false;
+ }
+
+ LossyAppendUTF16toASCII(aSource, aDest);
+ return true;
+ }
+
+ nsAString::const_iterator source_start, source_end;
+ CalculateUTF8Size calculator;
+ aSource.BeginReading(source_start);
+ aSource.EndReading(source_end);
+
+ // Skip the characters that we know are single byte.
+ source_start.advance(firstNonASCII);
+
+ copy_string(source_start,
+ source_end, calculator);
+
+ // Include the ASCII characters that were skipped in the count.
+ size_t count = calculator.Size() + firstNonASCII;
+
+ if (count) {
+ auto old_dest_length = aDest.Length();
+ // Grow the buffer if we need to.
+ mozilla::CheckedInt<nsACString::size_type> new_length(count);
+ new_length += old_dest_length;
+
+ if (!new_length.isValid() ||
+ !aDest.SetLength(new_length.value(), aFallible)) {
+ return false;
+ }
+
+ // All ready? Time to convert
+
+ nsAString::const_iterator ascii_end;
+ aSource.BeginReading(ascii_end);
+
+ if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
+ // Use the more efficient lossy converter for the ASCII portion.
+ LossyConvertEncoding16to8 lossy_converter(
+ aDest.BeginWriting() + old_dest_length);
+ nsAString::const_iterator ascii_start;
+ aSource.BeginReading(ascii_start);
+ ascii_end.advance(firstNonASCII);
+
+ copy_string(ascii_start, ascii_end, lossy_converter);
+ } else {
+ // Not using the lossy shortcut, we need to include the leading ASCII
+ // chars.
+ firstNonASCII = 0;
+ }
+
+ ConvertUTF16toUTF8 converter(
+ aDest.BeginWriting() + old_dest_length + firstNonASCII);
+ copy_string(ascii_end,
+ aSource.EndReading(source_end), converter);
+
+ NS_ASSERTION(converter.Size() == count - firstNonASCII,
+ "Unexpected disparity between CalculateUTF8Size and "
+ "ConvertUTF16toUTF8");
+ }
+
+ return true;
+}
+
+void
+AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
+{
+ if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) {
+ aDest.AllocFailed(aDest.Length() + aSource.Length());
+ }
+}
+
+bool
+AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest,
+ const mozilla::fallible_t& aFallible)
+{
+ nsACString::const_iterator source_start, source_end;
+ CalculateUTF8Length calculator;
+ copy_string(aSource.BeginReading(source_start),
+ aSource.EndReading(source_end), calculator);
+
+ uint32_t count = calculator.Length();
+
+ // Avoid making the string mutable if we're appending an empty string
+ if (count) {
+ uint32_t old_dest_length = aDest.Length();
+
+ // Grow the buffer if we need to.
+ if (!aDest.SetLength(old_dest_length + count, aFallible)) {
+ return false;
+ }
+
+ // All ready? Time to convert
+
+ ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
+ copy_string(aSource.BeginReading(source_start),
+ aSource.EndReading(source_end), converter);
+
+ NS_ASSERTION(converter.ErrorEncountered() ||
+ converter.Length() == count,
+ "CalculateUTF8Length produced the wrong length");
+
+ if (converter.ErrorEncountered()) {
+ NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
+ aDest.SetLength(old_dest_length);
+ }
+ }
+
+ return true;
+}
+
+void
+AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
+{
+ if (aSource) {
+ AppendUTF16toUTF8(nsDependentString(aSource), aDest);
+ }
+}
+
+void
+AppendUTF8toUTF16(const char* aSource, nsAString& aDest)
+{
+ if (aSource) {
+ AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
+ }
+}
+
+
+/**
+ * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
+ *
+ * @param aSource an string you will eventually be making a copy of
+ * @return a new buffer (of the type specified by the second parameter) which you must free with |free|.
+ *
+ */
+template <class FromStringT, class ToCharT>
+inline
+ToCharT*
+AllocateStringCopy(const FromStringT& aSource, ToCharT*)
+{
+ return static_cast<ToCharT*>(moz_xmalloc(
+ (aSource.Length() + 1) * sizeof(ToCharT)));
+}
+
+
+char*
+ToNewCString(const nsAString& aSource)
+{
+ char* result = AllocateStringCopy(aSource, (char*)0);
+ if (!result) {
+ return nullptr;
+ }
+
+ nsAString::const_iterator fromBegin, fromEnd;
+ LossyConvertEncoding16to8 converter(result);
+ copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ converter).write_terminator();
+ return result;
+}
+
+char*
+ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
+{
+ nsAString::const_iterator start, end;
+ CalculateUTF8Size calculator;
+ copy_string(aSource.BeginReading(start), aSource.EndReading(end),
+ calculator);
+
+ if (aUTF8Count) {
+ *aUTF8Count = calculator.Size();
+ }
+
+ char* result = static_cast<char*>
+ (moz_xmalloc(calculator.Size() + 1));
+ if (!result) {
+ return nullptr;
+ }
+
+ ConvertUTF16toUTF8 converter(result);
+ copy_string(aSource.BeginReading(start), aSource.EndReading(end),
+ converter).write_terminator();
+ NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
+
+ return result;
+}
+
+char*
+ToNewCString(const nsACString& aSource)
+{
+ // no conversion needed, just allocate a buffer of the correct length and copy into it
+
+ char* result = AllocateStringCopy(aSource, (char*)0);
+ if (!result) {
+ return nullptr;
+ }
+
+ nsACString::const_iterator fromBegin, fromEnd;
+ char* toBegin = result;
+ *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ toBegin) = char(0);
+ return result;
+}
+
+char16_t*
+ToNewUnicode(const nsAString& aSource)
+{
+ // no conversion needed, just allocate a buffer of the correct length and copy into it
+
+ char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
+ if (!result) {
+ return nullptr;
+ }
+
+ nsAString::const_iterator fromBegin, fromEnd;
+ char16_t* toBegin = result;
+ *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ toBegin) = char16_t(0);
+ return result;
+}
+
+char16_t*
+ToNewUnicode(const nsACString& aSource)
+{
+ char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
+ if (!result) {
+ return nullptr;
+ }
+
+ nsACString::const_iterator fromBegin, fromEnd;
+ LossyConvertEncoding8to16 converter(result);
+ copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ converter).write_terminator();
+ return result;
+}
+
+uint32_t
+CalcUTF8ToUnicodeLength(const nsACString& aSource)
+{
+ nsACString::const_iterator start, end;
+ CalculateUTF8Length calculator;
+ copy_string(aSource.BeginReading(start), aSource.EndReading(end),
+ calculator);
+ return calculator.Length();
+}
+
+char16_t*
+UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer,
+ uint32_t* aUTF16Count)
+{
+ nsACString::const_iterator start, end;
+ ConvertUTF8toUTF16 converter(aBuffer);
+ copy_string(aSource.BeginReading(start),
+ aSource.EndReading(end),
+ converter).write_terminator();
+ if (aUTF16Count) {
+ *aUTF16Count = converter.Length();
+ }
+ return aBuffer;
+}
+
+char16_t*
+UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
+{
+ const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
+ const size_t buffer_size = (length + 1) * sizeof(char16_t);
+ char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size));
+ if (!buffer) {
+ return nullptr;
+ }
+
+ uint32_t copied;
+ UTF8ToUnicodeBuffer(aSource, buffer, &copied);
+ NS_ASSERTION(length == copied, "length mismatch");
+
+ if (aUTF16Count) {
+ *aUTF16Count = copied;
+ }
+ return buffer;
+}
+
+char16_t*
+CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
+ uint32_t aLength)
+{
+ nsAString::const_iterator fromBegin, fromEnd;
+ char16_t* toBegin = aDest;
+ copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)),
+ aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)),
+ toBegin);
+ return aDest;
+}
+
+void
+CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
+ const nsAString::const_iterator& aSrcEnd,
+ nsAString& aDest)
+{
+ aDest.SetLength(Distance(aSrcStart, aSrcEnd));
+
+ nsAString::char_iterator dest = aDest.BeginWriting();
+ nsAString::const_iterator fromBegin(aSrcStart);
+
+ copy_string(fromBegin, aSrcEnd, dest);
+}
+
+void
+AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
+ const nsAString::const_iterator& aSrcEnd,
+ nsAString& aDest)
+{
+ uint32_t oldLength = aDest.Length();
+ aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
+
+ nsAString::char_iterator dest = aDest.BeginWriting() + oldLength;
+ nsAString::const_iterator fromBegin(aSrcStart);
+
+ copy_string(fromBegin, aSrcEnd, dest);
+}
+
+bool
+IsASCII(const nsAString& aString)
+{
+ static const char16_t NOT_ASCII = char16_t(~0x007F);
+
+
+ // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
+
+ nsAString::const_iterator iter, done_reading;
+ aString.BeginReading(iter);
+ aString.EndReading(done_reading);
+
+ const char16_t* c = iter.get();
+ const char16_t* end = done_reading.get();
+
+ while (c < end) {
+ if (*c++ & NOT_ASCII) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool
+IsASCII(const nsACString& aString)
+{
+ static const char NOT_ASCII = char(~0x7F);
+
+
+ // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
+
+ nsACString::const_iterator iter, done_reading;
+ aString.BeginReading(iter);
+ aString.EndReading(done_reading);
+
+ const char* c = iter.get();
+ const char* end = done_reading.get();
+
+ while (c < end) {
+ if (*c++ & NOT_ASCII) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool
+IsUTF8(const nsACString& aString, bool aRejectNonChar)
+{
+ nsReadingIterator<char> done_reading;
+ aString.EndReading(done_reading);
+
+ int32_t state = 0;
+ bool overlong = false;
+ bool surrogate = false;
+ bool nonchar = false;
+ uint16_t olupper = 0; // overlong byte upper bound.
+ uint16_t slower = 0; // surrogate byte lower bound.
+
+ nsReadingIterator<char> iter;
+ aString.BeginReading(iter);
+
+ const char* ptr = iter.get();
+ const char* end = done_reading.get();
+ while (ptr < end) {
+ uint8_t c;
+
+ if (0 == state) {
+ c = *ptr++;
+
+ if (UTF8traits::isASCII(c)) {
+ continue;
+ }
+
+ if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong.
+ return false;
+ } else if (UTF8traits::is2byte(c)) {
+ state = 1;
+ } else if (UTF8traits::is3byte(c)) {
+ state = 2;
+ if (c == 0xE0) { // to exclude E0[80-9F][80-BF]
+ overlong = true;
+ olupper = 0x9F;
+ } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint
+ surrogate = true;
+ slower = 0xA0;
+ } else if (c == 0xEF) { // EF BF [BE-BF] : non-character
+ nonchar = true;
+ }
+ } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
+ state = 3;
+ nonchar = true;
+ if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2}
+ overlong = true;
+ olupper = 0x8F;
+ } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF]
+ // actually not surrogates but codepoints beyond 0x10FFFF
+ surrogate = true;
+ slower = 0x90;
+ }
+ } else {
+ return false; // Not UTF-8 string
+ }
+ }
+
+ if (nonchar && !aRejectNonChar) {
+ nonchar = false;
+ }
+
+ while (ptr < end && state) {
+ c = *ptr++;
+ --state;
+
+ // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
+ if (nonchar &&
+ ((!state && c < 0xBE) ||
+ (state == 1 && c != 0xBF) ||
+ (state == 2 && 0x0F != (0x0F & c)))) {
+ nonchar = false;
+ }
+
+ if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) ||
+ (surrogate && slower <= c) || (nonchar && !state)) {
+ return false; // Not UTF-8 string
+ }
+
+ overlong = surrogate = false;
+ }
+ }
+ return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
+}
+
+/**
+ * A character sink for in-place case conversion.
+ */
+class ConvertToUpperCase
+{
+public:
+ typedef char value_type;
+
+ uint32_t
+ write(const char* aSource, uint32_t aSourceLength)
+ {
+ char* cp = const_cast<char*>(aSource);
+ const char* end = aSource + aSourceLength;
+ while (cp != end) {
+ char ch = *cp;
+ if (ch >= 'a' && ch <= 'z') {
+ *cp = ch - ('a' - 'A');
+ }
+ ++cp;
+ }
+ return aSourceLength;
+ }
+};
+
+void
+ToUpperCase(nsCSubstring& aCString)
+{
+ ConvertToUpperCase converter;
+ char* start;
+ converter.write(aCString.BeginWriting(start), aCString.Length());
+}
+
+/**
+ * A character sink for copying with case conversion.
+ */
+class CopyToUpperCase
+{
+public:
+ typedef char value_type;
+
+ explicit CopyToUpperCase(nsACString::iterator& aDestIter,
+ const nsACString::iterator& aEndIter)
+ : mIter(aDestIter)
+ , mEnd(aEndIter)
+ {
+ }
+
+ uint32_t
+ write(const char* aSource, uint32_t aSourceLength)
+ {
+ uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
+ char* cp = mIter.get();
+ const char* end = aSource + len;
+ while (aSource != end) {
+ char ch = *aSource;
+ if ((ch >= 'a') && (ch <= 'z')) {
+ *cp = ch - ('a' - 'A');
+ } else {
+ *cp = ch;
+ }
+ ++aSource;
+ ++cp;
+ }
+ mIter.advance(len);
+ return len;
+ }
+
+protected:
+ nsACString::iterator& mIter;
+ const nsACString::iterator& mEnd;
+};
+
+void
+ToUpperCase(const nsACString& aSource, nsACString& aDest)
+{
+ nsACString::const_iterator fromBegin, fromEnd;
+ nsACString::iterator toBegin, toEnd;
+ aDest.SetLength(aSource.Length());
+
+ CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
+ copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ converter);
+}
+
+/**
+ * A character sink for case conversion.
+ */
+class ConvertToLowerCase
+{
+public:
+ typedef char value_type;
+
+ uint32_t
+ write(const char* aSource, uint32_t aSourceLength)
+ {
+ char* cp = const_cast<char*>(aSource);
+ const char* end = aSource + aSourceLength;
+ while (cp != end) {
+ char ch = *cp;
+ if ((ch >= 'A') && (ch <= 'Z')) {
+ *cp = ch + ('a' - 'A');
+ }
+ ++cp;
+ }
+ return aSourceLength;
+ }
+};
+
+void
+ToLowerCase(nsCSubstring& aCString)
+{
+ ConvertToLowerCase converter;
+ char* start;
+ converter.write(aCString.BeginWriting(start), aCString.Length());
+}
+
+/**
+ * A character sink for copying with case conversion.
+ */
+class CopyToLowerCase
+{
+public:
+ typedef char value_type;
+
+ explicit CopyToLowerCase(nsACString::iterator& aDestIter,
+ const nsACString::iterator& aEndIter)
+ : mIter(aDestIter)
+ , mEnd(aEndIter)
+ {
+ }
+
+ uint32_t
+ write(const char* aSource, uint32_t aSourceLength)
+ {
+ uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
+ char* cp = mIter.get();
+ const char* end = aSource + len;
+ while (aSource != end) {
+ char ch = *aSource;
+ if ((ch >= 'A') && (ch <= 'Z')) {
+ *cp = ch + ('a' - 'A');
+ } else {
+ *cp = ch;
+ }
+ ++aSource;
+ ++cp;
+ }
+ mIter.advance(len);
+ return len;
+ }
+
+protected:
+ nsACString::iterator& mIter;
+ const nsACString::iterator& mEnd;
+};
+
+void
+ToLowerCase(const nsACString& aSource, nsACString& aDest)
+{
+ nsACString::const_iterator fromBegin, fromEnd;
+ nsACString::iterator toBegin, toEnd;
+ aDest.SetLength(aSource.Length());
+
+ CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
+ copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
+ converter);
+}
+
+bool
+ParseString(const nsACString& aSource, char aDelimiter,
+ nsTArray<nsCString>& aArray)
+{
+ nsACString::const_iterator start, end;
+ aSource.BeginReading(start);
+ aSource.EndReading(end);
+
+ uint32_t oldLength = aArray.Length();
+
+ for (;;) {
+ nsACString::const_iterator delimiter = start;
+ FindCharInReadable(aDelimiter, delimiter, end);
+
+ if (delimiter != start) {
+ if (!aArray.AppendElement(Substring(start, delimiter))) {
+ aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);
+ return false;
+ }
+ }
+
+ if (delimiter == end) {
+ break;
+ }
+ start = ++delimiter;
+ if (start == end) {
+ break;
+ }
+ }
+
+ return true;
+}
+
+template <class StringT, class IteratorT, class Comparator>
+bool
+FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
+ IteratorT& aSearchEnd, const Comparator& aCompare)
+{
+ bool found_it = false;
+
+ // only bother searching at all if we're given a non-empty range to search
+ if (aSearchStart != aSearchEnd) {
+ IteratorT aPatternStart, aPatternEnd;
+ aPattern.BeginReading(aPatternStart);
+ aPattern.EndReading(aPatternEnd);
+
+ // outer loop keeps searching till we find it or run out of string to search
+ while (!found_it) {
+ // fast inner loop (that's what it's called, not what it is) looks for a potential match
+ while (aSearchStart != aSearchEnd &&
+ aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
+ ++aSearchStart;
+ }
+
+ // if we broke out of the `fast' loop because we're out of string ... we're done: no match
+ if (aSearchStart == aSearchEnd) {
+ break;
+ }
+
+ // otherwise, we're at a potential match, let's see if we really hit one
+ IteratorT testPattern(aPatternStart);
+ IteratorT testSearch(aSearchStart);
+
+ // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
+ for (;;) {
+ // we already compared the first character in the outer loop,
+ // so we'll advance before the next comparison
+ ++testPattern;
+ ++testSearch;
+
+ // if we verified all the way to the end of the pattern, then we found it!
+ if (testPattern == aPatternEnd) {
+ found_it = true;
+ aSearchEnd = testSearch; // return the exact found range through the parameters
+ break;
+ }
+
+ // if we got to end of the string we're searching before we hit the end of the
+ // pattern, we'll never find what we're looking for
+ if (testSearch == aSearchEnd) {
+ aSearchStart = aSearchEnd;
+ break;
+ }
+
+ // else if we mismatched ... it's time to advance to the next search position
+ // and get back into the `fast' loop
+ if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
+ ++aSearchStart;
+ break;
+ }
+ }
+ }
+ }
+
+ return found_it;
+}
+
+/**
+ * This searches the entire string from right to left, and returns the first match found, if any.
+ */
+template <class StringT, class IteratorT, class Comparator>
+bool
+RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
+ IteratorT& aSearchEnd, const Comparator& aCompare)
+{
+ IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
+ aPattern.BeginReading(patternStart);
+ aPattern.EndReading(patternEnd);
+
+ // Point to the last character in the pattern
+ --patternEnd;
+ // outer loop keeps searching till we run out of string to search
+ while (aSearchStart != searchEnd) {
+ // Point to the end position of the next possible match
+ --searchEnd;
+
+ // Check last character, if a match, explore further from here
+ if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
+ // We're at a potential match, let's see if we really hit one
+ IteratorT testPattern(patternEnd);
+ IteratorT testSearch(searchEnd);
+
+ // inner loop verifies the potential match at the current position
+ do {
+ // if we verified all the way to the end of the pattern, then we found it!
+ if (testPattern == patternStart) {
+ aSearchStart = testSearch; // point to start of match
+ aSearchEnd = ++searchEnd; // point to end of match
+ return true;
+ }
+
+ // if we got to end of the string we're searching before we hit the end of the
+ // pattern, we'll never find what we're looking for
+ if (testSearch == aSearchStart) {
+ aSearchStart = aSearchEnd;
+ return false;
+ }
+
+ // test previous character for a match
+ --testPattern;
+ --testSearch;
+ } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
+ }
+ }
+
+ aSearchStart = aSearchEnd;
+ return false;
+}
+
+bool
+FindInReadable(const nsAString& aPattern,
+ nsAString::const_iterator& aSearchStart,
+ nsAString::const_iterator& aSearchEnd,
+ const nsStringComparator& aComparator)
+{
+ return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
+}
+
+bool
+FindInReadable(const nsACString& aPattern,
+ nsACString::const_iterator& aSearchStart,
+ nsACString::const_iterator& aSearchEnd,
+ const nsCStringComparator& aComparator)
+{
+ return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
+}
+
+bool
+CaseInsensitiveFindInReadable(const nsACString& aPattern,
+ nsACString::const_iterator& aSearchStart,
+ nsACString::const_iterator& aSearchEnd)
+{
+ return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
+ nsCaseInsensitiveCStringComparator());
+}
+
+bool
+RFindInReadable(const nsAString& aPattern,
+ nsAString::const_iterator& aSearchStart,
+ nsAString::const_iterator& aSearchEnd,
+ const nsStringComparator& aComparator)
+{
+ return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
+}
+
+bool
+RFindInReadable(const nsACString& aPattern,
+ nsACString::const_iterator& aSearchStart,
+ nsACString::const_iterator& aSearchEnd,
+ const nsCStringComparator& aComparator)
+{
+ return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
+}
+
+bool
+FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
+ const nsAString::const_iterator& aSearchEnd)
+{
+ int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
+
+ const char16_t* charFoundAt =
+ nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
+ if (charFoundAt) {
+ aSearchStart.advance(charFoundAt - aSearchStart.get());
+ return true;
+ }
+
+ aSearchStart.advance(fragmentLength);
+ return false;
+}
+
+bool
+FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
+ const nsACString::const_iterator& aSearchEnd)
+{
+ int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
+
+ const char* charFoundAt =
+ nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
+ if (charFoundAt) {
+ aSearchStart.advance(charFoundAt - aSearchStart.get());
+ return true;
+ }
+
+ aSearchStart.advance(fragmentLength);
+ return false;
+}
+
+uint32_t
+CountCharInReadable(const nsAString& aStr, char16_t aChar)
+{
+ uint32_t count = 0;
+ nsAString::const_iterator begin, end;
+
+ aStr.BeginReading(begin);
+ aStr.EndReading(end);
+
+ while (begin != end) {
+ if (*begin == aChar) {
+ ++count;
+ }
+ ++begin;
+ }
+
+ return count;
+}
+
+uint32_t
+CountCharInReadable(const nsACString& aStr, char aChar)
+{
+ uint32_t count = 0;
+ nsACString::const_iterator begin, end;
+
+ aStr.BeginReading(begin);
+ aStr.EndReading(end);
+
+ while (begin != end) {
+ if (*begin == aChar) {
+ ++count;
+ }
+ ++begin;
+ }
+
+ return count;
+}
+
+bool
+StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring)
+{
+ nsAString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, 0, sub_len).Equals(aSubstring);
+}
+
+bool
+StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
+ const nsStringComparator& aComparator)
+{
+ nsAString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
+}
+
+bool
+StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring)
+{
+ nsACString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, 0, sub_len).Equals(aSubstring);
+}
+
+bool
+StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
+ const nsCStringComparator& aComparator)
+{
+ nsACString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
+}
+
+bool
+StringEndsWith(const nsAString& aSource, const nsAString& aSubstring)
+{
+ nsAString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
+}
+
+bool
+StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
+ const nsStringComparator& aComparator)
+{
+ nsAString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
+ aComparator);
+}
+
+bool
+StringEndsWith(const nsACString& aSource, const nsACString& aSubstring)
+{
+ nsACString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
+}
+
+bool
+StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
+ const nsCStringComparator& aComparator)
+{
+ nsACString::size_type src_len = aSource.Length(),
+ sub_len = aSubstring.Length();
+ if (sub_len > src_len) {
+ return false;
+ }
+ return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
+ aComparator);
+}
+
+
+
+static const char16_t empty_buffer[1] = { '\0' };
+
+const nsAFlatString&
+EmptyString()
+{
+ static const nsDependentString sEmpty(empty_buffer);
+
+ return sEmpty;
+}
+
+const nsAFlatCString&
+EmptyCString()
+{
+ static const nsDependentCString sEmpty((const char*)empty_buffer);
+
+ return sEmpty;
+}
+
+const nsAFlatString&
+NullString()
+{
+ static const nsXPIDLString sNull;
+
+ return sNull;
+}
+
+const nsAFlatCString&
+NullCString()
+{
+ static const nsXPIDLCString sNull;
+
+ return sNull;
+}
+
+int32_t
+CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
+ const nsASingleFragmentString& aUTF16String)
+{
+ static const uint32_t NOT_ASCII = uint32_t(~0x7F);
+
+ const char* u8;
+ const char* u8end;
+ aUTF8String.BeginReading(u8);
+ aUTF8String.EndReading(u8end);
+
+ const char16_t* u16;
+ const char16_t* u16end;
+ aUTF16String.BeginReading(u16);
+ aUTF16String.EndReading(u16end);
+
+ while (u8 != u8end && u16 != u16end) {
+ // Cast away the signedness of *u8 to prevent signextension when
+ // converting to uint32_t
+ uint32_t c8_32 = (uint8_t)*u8;
+
+ if (c8_32 & NOT_ASCII) {
+ bool err;
+ c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
+ if (err) {
+ return INT32_MIN;
+ }
+
+ uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
+ // The above UTF16CharEnumerator::NextChar() calls can
+ // fail, but if it does for anything other than no data to
+ // look at (which can't happen here), it returns the
+ // Unicode replacement character 0xFFFD for the invalid
+ // data they were fed. Ignore that error and treat invalid
+ // UTF16 as 0xFFFD.
+ //
+ // This matches what our UTF16 to UTF8 conversion code
+ // does, and thus a UTF8 string that came from an invalid
+ // UTF16 string will compare equal to the invalid UTF16
+ // string it came from. Same is true for any other UTF16
+ // string differs only in the invalid part of the string.
+
+ if (c8_32 != c16_32) {
+ return c8_32 < c16_32 ? -1 : 1;
+ }
+ } else {
+ if (c8_32 != *u16) {
+ return c8_32 > *u16 ? 1 : -1;
+ }
+
+ ++u8;
+ ++u16;
+ }
+ }
+
+ if (u8 != u8end) {
+ // We get to the end of the UTF16 string, but no to the end of
+ // the UTF8 string. The UTF8 string is longer than the UTF16
+ // string
+
+ return 1;
+ }
+
+ if (u16 != u16end) {
+ // We get to the end of the UTF8 string, but no to the end of
+ // the UTF16 string. The UTF16 string is longer than the UTF8
+ // string
+
+ return -1;
+ }
+
+ // The two strings match.
+
+ return 0;
+}
+
+void
+AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
+{
+ NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
+ if (IS_IN_BMP(aSource)) {
+ aDest.Append(char16_t(aSource));
+ } else {
+ aDest.Append(H_SURROGATE(aSource));
+ aDest.Append(L_SURROGATE(aSource));
+ }
+}
+
+extern "C" {
+
+void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
+{
+ AppendUTF16toUTF8(*aOther, *aThis);
+}
+
+void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther)
+{
+ AppendUTF8toUTF16(*aOther, *aThis);
+}
+
+}