diff options
Diffstat (limited to 'intl/uconv/nsUnicodeToUTF8.cpp')
-rw-r--r-- | intl/uconv/nsUnicodeToUTF8.cpp | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/intl/uconv/nsUnicodeToUTF8.cpp b/intl/uconv/nsUnicodeToUTF8.cpp new file mode 100644 index 000000000..aff52d176 --- /dev/null +++ b/intl/uconv/nsUnicodeToUTF8.cpp @@ -0,0 +1,165 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//---------------------------------------------------------------------- +// Global functions and data [declaration] +#include "nsUnicodeToUTF8.h" +#include "mozilla/CheckedInt.h" + +NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder) + +//---------------------------------------------------------------------- +// nsUnicodeToUTF8 class [implementation] + +NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t* aSrc, + int32_t aSrcLength, + int32_t* aDestLength) +{ + MOZ_ASSERT(aDestLength); + + // aSrc is interpreted as UTF16, 3 is normally enough. + // But when previous buffer only contains part of the surrogate pair, we + // need to complete it here. If the first word in following buffer is not + // in valid surrogate range, we need to convert the remaining of last buffer + // to 3 bytes. + mozilla::CheckedInt32 length = aSrcLength; + length *= 3; + length += 3; + + if (!length.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + + *aDestLength = length.value(); + return NS_OK; +} + +NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t* aSrc, + int32_t* aSrcLength, + char* aDest, + int32_t* aDestLength) +{ + const char16_t* src = aSrc; + const char16_t* srcEnd = aSrc + *aSrcLength; + char* dest = aDest; + int32_t destLen = *aDestLength; + uint32_t n; + + //complete remaining of last conversion + if (mHighSurrogate) { + if (src < srcEnd) { + *aDestLength = 0; + return NS_OK_UENC_MOREINPUT; + } + if (*aDestLength < 4) { + *aSrcLength = 0; + *aDestLength = 0; + return NS_OK_UENC_MOREOUTPUT; + } + if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair + *dest++ = (char)0xef; //replacement character + *dest++ = (char)0xbf; + *dest++ = (char)0xbd; + destLen -= 3; + } else { + n = ((mHighSurrogate - (char16_t)0xd800) << 10) + + (*src - (char16_t)0xdc00) + 0x10000; + *dest++ = (char)0xf0 | (n >> 18); + *dest++ = (char)0x80 | ((n >> 12) & 0x3f); + *dest++ = (char)0x80 | ((n >> 6) & 0x3f); + *dest++ = (char)0x80 | (n & 0x3f); + ++src; + destLen -= 4; + } + mHighSurrogate = 0; + } + + while (src < srcEnd) { + if ( *src <= 0x007f) { + if (destLen < 1) + goto error_more_output; + *dest++ = (char)*src; + --destLen; + } else if (*src <= 0x07ff) { + if (destLen < 2) + goto error_more_output; + *dest++ = (char)0xc0 | (*src >> 6); + *dest++ = (char)0x80 | (*src & 0x003f); + destLen -= 2; + } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) { + if (*src >= (char16_t)0xdc00) { //not a pair + if (destLen < 3) + goto error_more_output; + *dest++ = (char)0xef; //replacement character + *dest++ = (char)0xbf; + *dest++ = (char)0xbd; + destLen -= 3; + ++src; + continue; + } + if ((src+1) >= srcEnd) { + //we need another surrogate to complete this unicode char + mHighSurrogate = *src; + *aDestLength = dest - aDest; + return NS_OK_UENC_MOREINPUT; + } + //handle surrogate + if (destLen < 4) + goto error_more_output; + if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair + *dest++ = (char)0xef; //replacement character + *dest++ = (char)0xbf; + *dest++ = (char)0xbd; + destLen -= 3; + } else { + n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000; + *dest++ = (char)0xf0 | (n >> 18); + *dest++ = (char)0x80 | ((n >> 12) & 0x3f); + *dest++ = (char)0x80 | ((n >> 6) & 0x3f); + *dest++ = (char)0x80 | (n & 0x3f); + destLen -= 4; + ++src; + } + } else { + if (destLen < 3) + goto error_more_output; + //treat rest of the character as BMP + *dest++ = (char)0xe0 | (*src >> 12); + *dest++ = (char)0x80 | ((*src >> 6) & 0x003f); + *dest++ = (char)0x80 | (*src & 0x003f); + destLen -= 3; + } + ++src; + } + + *aDestLength = dest - aDest; + return NS_OK; + +error_more_output: + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return NS_OK_UENC_MOREOUTPUT; +} + +NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength) +{ + char * dest = aDest; + + if (mHighSurrogate) { + if (*aDestLength < 3) { + *aDestLength = 0; + return NS_OK_UENC_MOREOUTPUT; + } + *dest++ = (char)0xef; //replacement character + *dest++ = (char)0xbf; + *dest++ = (char)0xbd; + mHighSurrogate = 0; + *aDestLength = 3; + return NS_OK; + } + + *aDestLength = 0; + return NS_OK; +} |