diff options
Diffstat (limited to 'xpcom/string/nsReadableUtils.cpp')
-rw-r--r-- | xpcom/string/nsReadableUtils.cpp | 1383 |
1 files changed, 1383 insertions, 0 deletions
diff --git a/xpcom/string/nsReadableUtils.cpp b/xpcom/string/nsReadableUtils.cpp new file mode 100644 index 000000000..524b1d7fe --- /dev/null +++ b/xpcom/string/nsReadableUtils.cpp @@ -0,0 +1,1383 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsReadableUtils.h" +#include "nsReadableUtilsImpl.h" + +#include <algorithm> + +#include "mozilla/CheckedInt.h" + +#include "nscore.h" +#include "nsMemory.h" +#include "nsString.h" +#include "nsTArray.h" +#include "nsUTF8Utils.h" + +using mozilla::IsASCII; + +/** + * Fallback implementation for finding the first non-ASCII character in a + * UTF-16 string. + */ +static inline int32_t +FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd) +{ + typedef mozilla::NonASCIIParameters<sizeof(size_t)> p; + const size_t kMask = p::mask(); + const uintptr_t kAlignMask = p::alignMask(); + const size_t kNumUnicharsPerWord = p::numUnicharsPerWord(); + + const char16_t* idx = aBegin; + + // Align ourselves to a word boundary. + for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) { + if (!IsASCII(*idx)) { + return idx - aBegin; + } + } + + // Check one word at a time. + const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask); + for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) { + const size_t word = *reinterpret_cast<const size_t*>(idx); + if (word & kMask) { + return idx - aBegin; + } + } + + // Take care of the remainder one character at a time. + for (; idx != aEnd; idx++) { + if (!IsASCII(*idx)) { + return idx - aBegin; + } + } + + return -1; +} + +/* + * This function returns -1 if all characters in str are ASCII characters. + * Otherwise, it returns a value less than or equal to the index of the first + * ASCII character in str. For example, if first non-ASCII character is at + * position 25, it may return 25, 24, or 16. But it guarantees + * there are only ASCII characters before returned value. + */ +static inline int32_t +FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd) +{ +#ifdef MOZILLA_MAY_SUPPORT_SSE2 + if (mozilla::supports_sse2()) { + return mozilla::SSE2::FirstNonASCII(aBegin, aEnd); + } +#endif + + return FirstNonASCIIUnvectorized(aBegin, aEnd); +} + +void +LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest) +{ + aDest.Truncate(); + LossyAppendUTF16toASCII(aSource, aDest); +} + +void +CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest) +{ + aDest.Truncate(); + AppendASCIItoUTF16(aSource, aDest); +} + +void +LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest) +{ + aDest.Truncate(); + if (aSource) { + LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); + } +} + +void +CopyASCIItoUTF16(const char* aSource, nsAString& aDest) +{ + aDest.Truncate(); + if (aSource) { + AppendASCIItoUTF16(nsDependentCString(aSource), aDest); + } +} + +void +CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest) +{ + if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) { + // Note that this may wildly underestimate the allocation that failed, as + // we report the length of aSource as UTF-16 instead of UTF-8. + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest, + const mozilla::fallible_t& aFallible) +{ + aDest.Truncate(); + if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) { + return false; + } + return true; +} + +void +CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest) +{ + aDest.Truncate(); + AppendUTF8toUTF16(aSource, aDest); +} + +void +CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest) +{ + aDest.Truncate(); + AppendUTF16toUTF8(aSource, aDest); +} + +void +CopyUTF8toUTF16(const char* aSource, nsAString& aDest) +{ + aDest.Truncate(); + AppendUTF8toUTF16(aSource, aDest); +} + +void +LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest) +{ + uint32_t old_dest_length = aDest.Length(); + aDest.SetLength(old_dest_length + aSource.Length()); + + nsAString::const_iterator fromBegin, fromEnd; + + nsACString::iterator dest; + aDest.BeginWriting(dest); + + dest.advance(old_dest_length); + + // right now, this won't work on multi-fragment destinations + LossyConvertEncoding16to8 converter(dest.get()); + + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); +} + +void +AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest) +{ + if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest, + const mozilla::fallible_t& aFallible) +{ + uint32_t old_dest_length = aDest.Length(); + if (!aDest.SetLength(old_dest_length + aSource.Length(), + aFallible)) { + return false; + } + + nsACString::const_iterator fromBegin, fromEnd; + + nsAString::iterator dest; + aDest.BeginWriting(dest); + + dest.advance(old_dest_length); + + // right now, this won't work on multi-fragment destinations + LossyConvertEncoding8to16 converter(dest.get()); + + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); + return true; +} + +void +LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest) +{ + if (aSource) { + LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); + } +} + +bool +AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible) +{ + if (aSource) { + return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible); + } + + return true; +} + +void +AppendASCIItoUTF16(const char* aSource, nsAString& aDest) +{ + if (aSource) { + AppendASCIItoUTF16(nsDependentCString(aSource), aDest); + } +} + +void +AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest) +{ + if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) { + // Note that this may wildly underestimate the allocation that failed, as + // we report the length of aSource as UTF-16 instead of UTF-8. + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest, + const mozilla::fallible_t& aFallible) +{ + // At 16 characters analysis showed better performance of both the all ASCII + // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of + // that length. + const nsAString::size_type kFastPathMinLength = 16; + + int32_t firstNonASCII = 0; + if (aSource.Length() >= kFastPathMinLength) { + firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading()); + } + + if (firstNonASCII == -1) { + // This is all ASCII, we can use the more efficient lossy append. + mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length()); + new_length += aDest.Length(); + + if (!new_length.isValid() || + !aDest.SetCapacity(new_length.value(), aFallible)) { + return false; + } + + LossyAppendUTF16toASCII(aSource, aDest); + return true; + } + + nsAString::const_iterator source_start, source_end; + CalculateUTF8Size calculator; + aSource.BeginReading(source_start); + aSource.EndReading(source_end); + + // Skip the characters that we know are single byte. + source_start.advance(firstNonASCII); + + copy_string(source_start, + source_end, calculator); + + // Include the ASCII characters that were skipped in the count. + size_t count = calculator.Size() + firstNonASCII; + + if (count) { + auto old_dest_length = aDest.Length(); + // Grow the buffer if we need to. + mozilla::CheckedInt<nsACString::size_type> new_length(count); + new_length += old_dest_length; + + if (!new_length.isValid() || + !aDest.SetLength(new_length.value(), aFallible)) { + return false; + } + + // All ready? Time to convert + + nsAString::const_iterator ascii_end; + aSource.BeginReading(ascii_end); + + if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) { + // Use the more efficient lossy converter for the ASCII portion. + LossyConvertEncoding16to8 lossy_converter( + aDest.BeginWriting() + old_dest_length); + nsAString::const_iterator ascii_start; + aSource.BeginReading(ascii_start); + ascii_end.advance(firstNonASCII); + + copy_string(ascii_start, ascii_end, lossy_converter); + } else { + // Not using the lossy shortcut, we need to include the leading ASCII + // chars. + firstNonASCII = 0; + } + + ConvertUTF16toUTF8 converter( + aDest.BeginWriting() + old_dest_length + firstNonASCII); + copy_string(ascii_end, + aSource.EndReading(source_end), converter); + + NS_ASSERTION(converter.Size() == count - firstNonASCII, + "Unexpected disparity between CalculateUTF8Size and " + "ConvertUTF16toUTF8"); + } + + return true; +} + +void +AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest) +{ + if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) { + aDest.AllocFailed(aDest.Length() + aSource.Length()); + } +} + +bool +AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest, + const mozilla::fallible_t& aFallible) +{ + nsACString::const_iterator source_start, source_end; + CalculateUTF8Length calculator; + copy_string(aSource.BeginReading(source_start), + aSource.EndReading(source_end), calculator); + + uint32_t count = calculator.Length(); + + // Avoid making the string mutable if we're appending an empty string + if (count) { + uint32_t old_dest_length = aDest.Length(); + + // Grow the buffer if we need to. + if (!aDest.SetLength(old_dest_length + count, aFallible)) { + return false; + } + + // All ready? Time to convert + + ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length); + copy_string(aSource.BeginReading(source_start), + aSource.EndReading(source_end), converter); + + NS_ASSERTION(converter.ErrorEncountered() || + converter.Length() == count, + "CalculateUTF8Length produced the wrong length"); + + if (converter.ErrorEncountered()) { + NS_ERROR("Input wasn't UTF8 or incorrect length was calculated"); + aDest.SetLength(old_dest_length); + } + } + + return true; +} + +void +AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest) +{ + if (aSource) { + AppendUTF16toUTF8(nsDependentString(aSource), aDest); + } +} + +void +AppendUTF8toUTF16(const char* aSource, nsAString& aDest) +{ + if (aSource) { + AppendUTF8toUTF16(nsDependentCString(aSource), aDest); + } +} + + +/** + * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator). + * + * @param aSource an string you will eventually be making a copy of + * @return a new buffer (of the type specified by the second parameter) which you must free with |free|. + * + */ +template <class FromStringT, class ToCharT> +inline +ToCharT* +AllocateStringCopy(const FromStringT& aSource, ToCharT*) +{ + return static_cast<ToCharT*>(moz_xmalloc( + (aSource.Length() + 1) * sizeof(ToCharT))); +} + + +char* +ToNewCString(const nsAString& aSource) +{ + char* result = AllocateStringCopy(aSource, (char*)0); + if (!result) { + return nullptr; + } + + nsAString::const_iterator fromBegin, fromEnd; + LossyConvertEncoding16to8 converter(result); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter).write_terminator(); + return result; +} + +char* +ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) +{ + nsAString::const_iterator start, end; + CalculateUTF8Size calculator; + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + calculator); + + if (aUTF8Count) { + *aUTF8Count = calculator.Size(); + } + + char* result = static_cast<char*> + (moz_xmalloc(calculator.Size() + 1)); + if (!result) { + return nullptr; + } + + ConvertUTF16toUTF8 converter(result); + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + converter).write_terminator(); + NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); + + return result; +} + +char* +ToNewCString(const nsACString& aSource) +{ + // no conversion needed, just allocate a buffer of the correct length and copy into it + + char* result = AllocateStringCopy(aSource, (char*)0); + if (!result) { + return nullptr; + } + + nsACString::const_iterator fromBegin, fromEnd; + char* toBegin = result; + *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + toBegin) = char(0); + return result; +} + +char16_t* +ToNewUnicode(const nsAString& aSource) +{ + // no conversion needed, just allocate a buffer of the correct length and copy into it + + char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); + if (!result) { + return nullptr; + } + + nsAString::const_iterator fromBegin, fromEnd; + char16_t* toBegin = result; + *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + toBegin) = char16_t(0); + return result; +} + +char16_t* +ToNewUnicode(const nsACString& aSource) +{ + char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); + if (!result) { + return nullptr; + } + + nsACString::const_iterator fromBegin, fromEnd; + LossyConvertEncoding8to16 converter(result); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter).write_terminator(); + return result; +} + +uint32_t +CalcUTF8ToUnicodeLength(const nsACString& aSource) +{ + nsACString::const_iterator start, end; + CalculateUTF8Length calculator; + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + calculator); + return calculator.Length(); +} + +char16_t* +UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer, + uint32_t* aUTF16Count) +{ + nsACString::const_iterator start, end; + ConvertUTF8toUTF16 converter(aBuffer); + copy_string(aSource.BeginReading(start), + aSource.EndReading(end), + converter).write_terminator(); + if (aUTF16Count) { + *aUTF16Count = converter.Length(); + } + return aBuffer; +} + +char16_t* +UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) +{ + const uint32_t length = CalcUTF8ToUnicodeLength(aSource); + const size_t buffer_size = (length + 1) * sizeof(char16_t); + char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size)); + if (!buffer) { + return nullptr; + } + + uint32_t copied; + UTF8ToUnicodeBuffer(aSource, buffer, &copied); + NS_ASSERTION(length == copied, "length mismatch"); + + if (aUTF16Count) { + *aUTF16Count = copied; + } + return buffer; +} + +char16_t* +CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, + uint32_t aLength) +{ + nsAString::const_iterator fromBegin, fromEnd; + char16_t* toBegin = aDest; + copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)), + aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)), + toBegin); + return aDest; +} + +void +CopyUnicodeTo(const nsAString::const_iterator& aSrcStart, + const nsAString::const_iterator& aSrcEnd, + nsAString& aDest) +{ + aDest.SetLength(Distance(aSrcStart, aSrcEnd)); + + nsAString::char_iterator dest = aDest.BeginWriting(); + nsAString::const_iterator fromBegin(aSrcStart); + + copy_string(fromBegin, aSrcEnd, dest); +} + +void +AppendUnicodeTo(const nsAString::const_iterator& aSrcStart, + const nsAString::const_iterator& aSrcEnd, + nsAString& aDest) +{ + uint32_t oldLength = aDest.Length(); + aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); + + nsAString::char_iterator dest = aDest.BeginWriting() + oldLength; + nsAString::const_iterator fromBegin(aSrcStart); + + copy_string(fromBegin, aSrcEnd, dest); +} + +bool +IsASCII(const nsAString& aString) +{ + static const char16_t NOT_ASCII = char16_t(~0x007F); + + + // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character + + nsAString::const_iterator iter, done_reading; + aString.BeginReading(iter); + aString.EndReading(done_reading); + + const char16_t* c = iter.get(); + const char16_t* end = done_reading.get(); + + while (c < end) { + if (*c++ & NOT_ASCII) { + return false; + } + } + + return true; +} + +bool +IsASCII(const nsACString& aString) +{ + static const char NOT_ASCII = char(~0x7F); + + + // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character + + nsACString::const_iterator iter, done_reading; + aString.BeginReading(iter); + aString.EndReading(done_reading); + + const char* c = iter.get(); + const char* end = done_reading.get(); + + while (c < end) { + if (*c++ & NOT_ASCII) { + return false; + } + } + + return true; +} + +bool +IsUTF8(const nsACString& aString, bool aRejectNonChar) +{ + nsReadingIterator<char> done_reading; + aString.EndReading(done_reading); + + int32_t state = 0; + bool overlong = false; + bool surrogate = false; + bool nonchar = false; + uint16_t olupper = 0; // overlong byte upper bound. + uint16_t slower = 0; // surrogate byte lower bound. + + nsReadingIterator<char> iter; + aString.BeginReading(iter); + + const char* ptr = iter.get(); + const char* end = done_reading.get(); + while (ptr < end) { + uint8_t c; + + if (0 == state) { + c = *ptr++; + + if (UTF8traits::isASCII(c)) { + continue; + } + + if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong. + return false; + } else if (UTF8traits::is2byte(c)) { + state = 1; + } else if (UTF8traits::is3byte(c)) { + state = 2; + if (c == 0xE0) { // to exclude E0[80-9F][80-BF] + overlong = true; + olupper = 0x9F; + } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint + surrogate = true; + slower = 0xA0; + } else if (c == 0xEF) { // EF BF [BE-BF] : non-character + nonchar = true; + } + } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090) + state = 3; + nonchar = true; + if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2} + overlong = true; + olupper = 0x8F; + } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF] + // actually not surrogates but codepoints beyond 0x10FFFF + surrogate = true; + slower = 0x90; + } + } else { + return false; // Not UTF-8 string + } + } + + if (nonchar && !aRejectNonChar) { + nonchar = false; + } + + while (ptr < end && state) { + c = *ptr++; + --state; + + // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] + if (nonchar && + ((!state && c < 0xBE) || + (state == 1 && c != 0xBF) || + (state == 2 && 0x0F != (0x0F & c)))) { + nonchar = false; + } + + if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) || + (surrogate && slower <= c) || (nonchar && !state)) { + return false; // Not UTF-8 string + } + + overlong = surrogate = false; + } + } + return !state; // state != 0 at the end indicates an invalid UTF-8 seq. +} + +/** + * A character sink for in-place case conversion. + */ +class ConvertToUpperCase +{ +public: + typedef char value_type; + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + char* cp = const_cast<char*>(aSource); + const char* end = aSource + aSourceLength; + while (cp != end) { + char ch = *cp; + if (ch >= 'a' && ch <= 'z') { + *cp = ch - ('a' - 'A'); + } + ++cp; + } + return aSourceLength; + } +}; + +void +ToUpperCase(nsCSubstring& aCString) +{ + ConvertToUpperCase converter; + char* start; + converter.write(aCString.BeginWriting(start), aCString.Length()); +} + +/** + * A character sink for copying with case conversion. + */ +class CopyToUpperCase +{ +public: + typedef char value_type; + + explicit CopyToUpperCase(nsACString::iterator& aDestIter, + const nsACString::iterator& aEndIter) + : mIter(aDestIter) + , mEnd(aEndIter) + { + } + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength); + char* cp = mIter.get(); + const char* end = aSource + len; + while (aSource != end) { + char ch = *aSource; + if ((ch >= 'a') && (ch <= 'z')) { + *cp = ch - ('a' - 'A'); + } else { + *cp = ch; + } + ++aSource; + ++cp; + } + mIter.advance(len); + return len; + } + +protected: + nsACString::iterator& mIter; + const nsACString::iterator& mEnd; +}; + +void +ToUpperCase(const nsACString& aSource, nsACString& aDest) +{ + nsACString::const_iterator fromBegin, fromEnd; + nsACString::iterator toBegin, toEnd; + aDest.SetLength(aSource.Length()); + + CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd)); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); +} + +/** + * A character sink for case conversion. + */ +class ConvertToLowerCase +{ +public: + typedef char value_type; + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + char* cp = const_cast<char*>(aSource); + const char* end = aSource + aSourceLength; + while (cp != end) { + char ch = *cp; + if ((ch >= 'A') && (ch <= 'Z')) { + *cp = ch + ('a' - 'A'); + } + ++cp; + } + return aSourceLength; + } +}; + +void +ToLowerCase(nsCSubstring& aCString) +{ + ConvertToLowerCase converter; + char* start; + converter.write(aCString.BeginWriting(start), aCString.Length()); +} + +/** + * A character sink for copying with case conversion. + */ +class CopyToLowerCase +{ +public: + typedef char value_type; + + explicit CopyToLowerCase(nsACString::iterator& aDestIter, + const nsACString::iterator& aEndIter) + : mIter(aDestIter) + , mEnd(aEndIter) + { + } + + uint32_t + write(const char* aSource, uint32_t aSourceLength) + { + uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength); + char* cp = mIter.get(); + const char* end = aSource + len; + while (aSource != end) { + char ch = *aSource; + if ((ch >= 'A') && (ch <= 'Z')) { + *cp = ch + ('a' - 'A'); + } else { + *cp = ch; + } + ++aSource; + ++cp; + } + mIter.advance(len); + return len; + } + +protected: + nsACString::iterator& mIter; + const nsACString::iterator& mEnd; +}; + +void +ToLowerCase(const nsACString& aSource, nsACString& aDest) +{ + nsACString::const_iterator fromBegin, fromEnd; + nsACString::iterator toBegin, toEnd; + aDest.SetLength(aSource.Length()); + + CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd)); + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), + converter); +} + +bool +ParseString(const nsACString& aSource, char aDelimiter, + nsTArray<nsCString>& aArray) +{ + nsACString::const_iterator start, end; + aSource.BeginReading(start); + aSource.EndReading(end); + + uint32_t oldLength = aArray.Length(); + + for (;;) { + nsACString::const_iterator delimiter = start; + FindCharInReadable(aDelimiter, delimiter, end); + + if (delimiter != start) { + if (!aArray.AppendElement(Substring(start, delimiter))) { + aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength); + return false; + } + } + + if (delimiter == end) { + break; + } + start = ++delimiter; + if (start == end) { + break; + } + } + + return true; +} + +template <class StringT, class IteratorT, class Comparator> +bool +FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart, + IteratorT& aSearchEnd, const Comparator& aCompare) +{ + bool found_it = false; + + // only bother searching at all if we're given a non-empty range to search + if (aSearchStart != aSearchEnd) { + IteratorT aPatternStart, aPatternEnd; + aPattern.BeginReading(aPatternStart); + aPattern.EndReading(aPatternEnd); + + // outer loop keeps searching till we find it or run out of string to search + while (!found_it) { + // fast inner loop (that's what it's called, not what it is) looks for a potential match + while (aSearchStart != aSearchEnd && + aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) { + ++aSearchStart; + } + + // if we broke out of the `fast' loop because we're out of string ... we're done: no match + if (aSearchStart == aSearchEnd) { + break; + } + + // otherwise, we're at a potential match, let's see if we really hit one + IteratorT testPattern(aPatternStart); + IteratorT testSearch(aSearchStart); + + // slow inner loop verifies the potential match (found by the `fast' loop) at the current position + for (;;) { + // we already compared the first character in the outer loop, + // so we'll advance before the next comparison + ++testPattern; + ++testSearch; + + // if we verified all the way to the end of the pattern, then we found it! + if (testPattern == aPatternEnd) { + found_it = true; + aSearchEnd = testSearch; // return the exact found range through the parameters + break; + } + + // if we got to end of the string we're searching before we hit the end of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchEnd) { + aSearchStart = aSearchEnd; + break; + } + + // else if we mismatched ... it's time to advance to the next search position + // and get back into the `fast' loop + if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) { + ++aSearchStart; + break; + } + } + } + } + + return found_it; +} + +/** + * This searches the entire string from right to left, and returns the first match found, if any. + */ +template <class StringT, class IteratorT, class Comparator> +bool +RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart, + IteratorT& aSearchEnd, const Comparator& aCompare) +{ + IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; + aPattern.BeginReading(patternStart); + aPattern.EndReading(patternEnd); + + // Point to the last character in the pattern + --patternEnd; + // outer loop keeps searching till we run out of string to search + while (aSearchStart != searchEnd) { + // Point to the end position of the next possible match + --searchEnd; + + // Check last character, if a match, explore further from here + if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) { + // We're at a potential match, let's see if we really hit one + IteratorT testPattern(patternEnd); + IteratorT testSearch(searchEnd); + + // inner loop verifies the potential match at the current position + do { + // if we verified all the way to the end of the pattern, then we found it! + if (testPattern == patternStart) { + aSearchStart = testSearch; // point to start of match + aSearchEnd = ++searchEnd; // point to end of match + return true; + } + + // if we got to end of the string we're searching before we hit the end of the + // pattern, we'll never find what we're looking for + if (testSearch == aSearchStart) { + aSearchStart = aSearchEnd; + return false; + } + + // test previous character for a match + --testPattern; + --testSearch; + } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0); + } + } + + aSearchStart = aSearchEnd; + return false; +} + +bool +FindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + const nsStringComparator& aComparator) +{ + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +FindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + const nsCStringComparator& aComparator) +{ + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +CaseInsensitiveFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd) +{ + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, + nsCaseInsensitiveCStringComparator()); +} + +bool +RFindInReadable(const nsAString& aPattern, + nsAString::const_iterator& aSearchStart, + nsAString::const_iterator& aSearchEnd, + const nsStringComparator& aComparator) +{ + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +RFindInReadable(const nsACString& aPattern, + nsACString::const_iterator& aSearchStart, + nsACString::const_iterator& aSearchEnd, + const nsCStringComparator& aComparator) +{ + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); +} + +bool +FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, + const nsAString::const_iterator& aSearchEnd) +{ + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char16_t* charFoundAt = + nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +bool +FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, + const nsACString::const_iterator& aSearchEnd) +{ + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); + + const char* charFoundAt = + nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); + if (charFoundAt) { + aSearchStart.advance(charFoundAt - aSearchStart.get()); + return true; + } + + aSearchStart.advance(fragmentLength); + return false; +} + +uint32_t +CountCharInReadable(const nsAString& aStr, char16_t aChar) +{ + uint32_t count = 0; + nsAString::const_iterator begin, end; + + aStr.BeginReading(begin); + aStr.EndReading(end); + + while (begin != end) { + if (*begin == aChar) { + ++count; + } + ++begin; + } + + return count; +} + +uint32_t +CountCharInReadable(const nsACString& aStr, char aChar) +{ + uint32_t count = 0; + nsACString::const_iterator begin, end; + + aStr.BeginReading(begin); + aStr.EndReading(end); + + while (begin != end) { + if (*begin == aChar) { + ++count; + } + ++begin; + } + + return count; +} + +bool +StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool +StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, + const nsStringComparator& aComparator) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool +StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring); +} + +bool +StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, + const nsCStringComparator& aComparator) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); +} + +bool +StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool +StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, + const nsStringComparator& aComparator) +{ + nsAString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, + aComparator); +} + +bool +StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); +} + +bool +StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, + const nsCStringComparator& aComparator) +{ + nsACString::size_type src_len = aSource.Length(), + sub_len = aSubstring.Length(); + if (sub_len > src_len) { + return false; + } + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, + aComparator); +} + + + +static const char16_t empty_buffer[1] = { '\0' }; + +const nsAFlatString& +EmptyString() +{ + static const nsDependentString sEmpty(empty_buffer); + + return sEmpty; +} + +const nsAFlatCString& +EmptyCString() +{ + static const nsDependentCString sEmpty((const char*)empty_buffer); + + return sEmpty; +} + +const nsAFlatString& +NullString() +{ + static const nsXPIDLString sNull; + + return sNull; +} + +const nsAFlatCString& +NullCString() +{ + static const nsXPIDLCString sNull; + + return sNull; +} + +int32_t +CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, + const nsASingleFragmentString& aUTF16String) +{ + static const uint32_t NOT_ASCII = uint32_t(~0x7F); + + const char* u8; + const char* u8end; + aUTF8String.BeginReading(u8); + aUTF8String.EndReading(u8end); + + const char16_t* u16; + const char16_t* u16end; + aUTF16String.BeginReading(u16); + aUTF16String.EndReading(u16end); + + while (u8 != u8end && u16 != u16end) { + // Cast away the signedness of *u8 to prevent signextension when + // converting to uint32_t + uint32_t c8_32 = (uint8_t)*u8; + + if (c8_32 & NOT_ASCII) { + bool err; + c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); + if (err) { + return INT32_MIN; + } + + uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); + // The above UTF16CharEnumerator::NextChar() calls can + // fail, but if it does for anything other than no data to + // look at (which can't happen here), it returns the + // Unicode replacement character 0xFFFD for the invalid + // data they were fed. Ignore that error and treat invalid + // UTF16 as 0xFFFD. + // + // This matches what our UTF16 to UTF8 conversion code + // does, and thus a UTF8 string that came from an invalid + // UTF16 string will compare equal to the invalid UTF16 + // string it came from. Same is true for any other UTF16 + // string differs only in the invalid part of the string. + + if (c8_32 != c16_32) { + return c8_32 < c16_32 ? -1 : 1; + } + } else { + if (c8_32 != *u16) { + return c8_32 > *u16 ? 1 : -1; + } + + ++u8; + ++u16; + } + } + + if (u8 != u8end) { + // We get to the end of the UTF16 string, but no to the end of + // the UTF8 string. The UTF8 string is longer than the UTF16 + // string + + return 1; + } + + if (u16 != u16end) { + // We get to the end of the UTF8 string, but no to the end of + // the UTF16 string. The UTF16 string is longer than the UTF8 + // string + + return -1; + } + + // The two strings match. + + return 0; +} + +void +AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) +{ + NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); + if (IS_IN_BMP(aSource)) { + aDest.Append(char16_t(aSource)); + } else { + aDest.Append(H_SURROGATE(aSource)); + aDest.Append(L_SURROGATE(aSource)); + } +} + +extern "C" { + +void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther) +{ + AppendUTF16toUTF8(*aOther, *aThis); +} + +void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther) +{ + AppendUTF8toUTF16(*aOther, *aThis); +} + +} |