diff options
Diffstat (limited to 'intl/uconv/nsNCRFallbackEncoderWrapper.cpp')
-rw-r--r-- | intl/uconv/nsNCRFallbackEncoderWrapper.cpp | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/intl/uconv/nsNCRFallbackEncoderWrapper.cpp b/intl/uconv/nsNCRFallbackEncoderWrapper.cpp new file mode 100644 index 000000000..7b83b72e8 --- /dev/null +++ b/intl/uconv/nsNCRFallbackEncoderWrapper.cpp @@ -0,0 +1,156 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsNCRFallbackEncoderWrapper.h" + +#include "mozilla/dom/EncodingUtils.h" + +nsNCRFallbackEncoderWrapper::nsNCRFallbackEncoderWrapper(const nsACString& aEncoding) + : mEncoder(mozilla::dom::EncodingUtils::EncoderForEncoding(aEncoding)) +{ +} + +nsNCRFallbackEncoderWrapper::~nsNCRFallbackEncoderWrapper() +{ +} + +bool +nsNCRFallbackEncoderWrapper::WriteNCR(nsACString& aBytes, + uint32_t& aDstWritten, + int32_t aUnmappable) +{ + // To avoid potentially shrinking aBytes and then growing it back, use + // another string for number formatting. + nsAutoCString ncr("&#"); + ncr.AppendInt(aUnmappable); + ncr.Append(';'); + uint32_t ncrLen = ncr.Length(); + uint32_t needed = aDstWritten + ncrLen; + if (needed > INT32_MAX) { + return false; + } + if (needed > aBytes.Length() && !aBytes.SetLength(needed, + mozilla::fallible_t())) { + return false; + } + memcpy(aBytes.BeginWriting() + aDstWritten, + ncr.BeginReading(), + ncrLen); + aDstWritten += ncrLen; + return true; +} + +bool +nsNCRFallbackEncoderWrapper::Encode(const nsAString& aUtf16, + nsACString& aBytes) +{ + // mozilla::dom::EncodingUtils::EncoderForEncoding fails during shutdown + if (!mEncoder) { + return false; + } + // nsIUnicodeEncoder uses int32_t for sizes :-( + if (aUtf16.Length() > INT32_MAX) { + return false; + } + const char16_t* src = aUtf16.BeginReading(); + const char16_t* srcEnd = aUtf16.EndReading(); + uint32_t dstWritten = 0; + for (;;) { + int32_t srcLen = srcEnd - src; + int32_t dstLen = 0; + nsresult rv = mEncoder->GetMaxLength(src, srcLen, &dstLen); + if (NS_FAILED(rv)) { + return false; + } + uint32_t needed = dstWritten + dstLen; + if (needed > INT32_MAX) { + return false; + } + // Behind the scenes SetLength() makes the underlying allocation not have + // slop, so we don't need to round up here. + if (needed > aBytes.Length() && !aBytes.SetLength(needed, + mozilla::fallible_t())) { + return false; + } + // We need to re-obtain the destination pointer on every iteration, because + // SetLength() invalidates it. + char* dst = aBytes.BeginWriting() + dstWritten; + dstLen = aBytes.Length() - dstWritten; + mEncoder->Reset(); + rv = mEncoder->Convert(src, &srcLen, dst, &dstLen); + // Update state tracking + src += srcLen; + dstWritten += dstLen; + if (rv == NS_OK_UENC_MOREOUTPUT) { + MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length."); + return false; + } + if (rv == NS_ERROR_UENC_NOMAPPING) { + int32_t unmappable; + // The unmappable code unit or the first half of an unmappable surrogate + // pair is consumed by the encoder. + MOZ_ASSERT(srcLen > 0, "Encoder should have consumed some input."); + char16_t codeUnit = src[-1]; + // Let's see if it is a surrogate + size_t highBits = (codeUnit & 0xFC00); + if (highBits == 0xD800) { + // high surrogate + // Let's see if we actually have a surrogate pair. + char16_t next; + if (src < srcEnd && NS_IS_LOW_SURROGATE((next = *src))) { + src++; // consume the low surrogate + unmappable = SURROGATE_TO_UCS4(codeUnit, next); + } else { + // unpaired surrogate. + unmappable = 0xFFFD; + } + } else if (highBits == 0xDC00) { + // low surrogate + // This must be an unpaired surrogate. + unmappable = 0xFFFD; + } else { + // not a surrogate + unmappable = codeUnit; + } + // If we are encoding to ISO-2022-JP, we need to let the encoder to + // generate a transition to the ASCII state if not already there. + dst = aBytes.BeginWriting() + dstWritten; + dstLen = aBytes.Length() - dstWritten; + rv = mEncoder->Finish(dst, &dstLen); + dstWritten += dstLen; + if (rv != NS_OK) { + // Failures should be impossible if GetMaxLength works. Big5 is the + // only case where Finish() may return NS_ERROR_UENC_NOMAPPING but + // that should never happen right after Convert() has returned it. + MOZ_ASSERT_UNREACHABLE("Broken encoder."); + return false; + } + if (!WriteNCR(aBytes, dstWritten, unmappable)) { + return false; + } + continue; + } + if (!(rv == NS_OK || rv == NS_OK_UENC_MOREINPUT)) { + return false; + } + MOZ_ASSERT(src == srcEnd, "Converter did not consume all input."); + dst = aBytes.BeginWriting() + dstWritten; + dstLen = aBytes.Length() - dstWritten; + rv = mEncoder->Finish(dst, &dstLen); + dstWritten += dstLen; + if (rv == NS_OK_UENC_MOREOUTPUT) { + MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length."); + return false; + } + if (rv == NS_ERROR_UENC_NOMAPPING) { + // Big5 + if (!WriteNCR(aBytes, dstWritten, 0xFFFD)) { + return false; + } + } + return aBytes.SetLength(dstWritten, mozilla::fallible_t()); + } +} + |