diff options
Diffstat (limited to 'intl/uconv/util')
-rw-r--r-- | intl/uconv/util/nsUCConstructors.cpp | 134 | ||||
-rw-r--r-- | intl/uconv/util/nsUCConstructors.h | 70 | ||||
-rw-r--r-- | intl/uconv/util/nsUCSupport.cpp | 621 | ||||
-rw-r--r-- | intl/uconv/util/nsUnicodeDecodeHelper.cpp | 234 | ||||
-rw-r--r-- | intl/uconv/util/nsUnicodeDecodeHelper.h | 55 | ||||
-rw-r--r-- | intl/uconv/util/nsUnicodeEncodeHelper.cpp | 121 | ||||
-rw-r--r-- | intl/uconv/util/nsUnicodeEncodeHelper.h | 42 | ||||
-rw-r--r-- | intl/uconv/util/ugen.c | 712 | ||||
-rw-r--r-- | intl/uconv/util/umap.c | 175 | ||||
-rw-r--r-- | intl/uconv/util/umap.h | 53 | ||||
-rw-r--r-- | intl/uconv/util/unicpriv.h | 52 | ||||
-rw-r--r-- | intl/uconv/util/uscan.c | 759 |
12 files changed, 3028 insertions, 0 deletions
diff --git a/intl/uconv/util/nsUCConstructors.cpp b/intl/uconv/util/nsUCConstructors.cpp new file mode 100644 index 000000000..e8ab5f89d --- /dev/null +++ b/intl/uconv/util/nsUCConstructors.cpp @@ -0,0 +1,134 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUCSupport.h" +#include "nsUCConstructors.h" + +template<class T> +inline nsresult StabilizedQueryInterface(T* aNewObject, + REFNSIID aIID, + void **aResult) +{ + NS_ADDREF(aNewObject); + nsresult rv = aNewObject->QueryInterface(aIID, aResult); + NS_RELEASE(aNewObject); + return rv; +} + +nsresult +CreateMultiTableDecoder(int32_t aTableCount, const uRange * aRangeArray, + uScanClassID * aScanClassArray, + uMappingTable ** aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult) +{ + + if (aOuter) + return NS_ERROR_NO_AGGREGATION; + + nsMultiTableDecoderSupport* decoder = + new nsMultiTableDecoderSupport(aTableCount, aRangeArray, + aScanClassArray, aMappingTable, + aMaxLengthFactor); + if (!decoder) + return NS_ERROR_OUT_OF_MEMORY; + + return StabilizedQueryInterface(decoder, aIID, aResult); +} + +nsresult +CreateMultiTableEncoder(int32_t aTableCount, + uScanClassID * aScanClassArray, + uShiftOutTable ** aShiftOutTable, + uMappingTable ** aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult) +{ + + if (aOuter) + return NS_ERROR_NO_AGGREGATION; + + nsMultiTableEncoderSupport* encoder = + new nsMultiTableEncoderSupport(aTableCount, + aScanClassArray, + aShiftOutTable, + aMappingTable, + aMaxLengthFactor); + if (!encoder) + return NS_ERROR_OUT_OF_MEMORY; + + return StabilizedQueryInterface(encoder, aIID, aResult); +} + +nsresult +CreateMultiTableEncoder(int32_t aTableCount, + uScanClassID * aScanClassArray, + uMappingTable ** aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult) +{ + return CreateMultiTableEncoder(aTableCount, aScanClassArray, + nullptr, + aMappingTable, aMaxLengthFactor, + aOuter, aIID, aResult); +} + +nsresult +CreateTableEncoder(uScanClassID aScanClass, + uShiftOutTable * aShiftOutTable, + uMappingTable * aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult) +{ + if (aOuter) + return NS_ERROR_NO_AGGREGATION; + + nsTableEncoderSupport* encoder = + new nsTableEncoderSupport(aScanClass, + aShiftOutTable, aMappingTable, + aMaxLengthFactor); + if (!encoder) + return NS_ERROR_OUT_OF_MEMORY; + + return StabilizedQueryInterface(encoder, aIID, aResult); +} + +nsresult +CreateTableEncoder(uScanClassID aScanClass, + uMappingTable * aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult) +{ + return CreateTableEncoder(aScanClass, nullptr, + aMappingTable, aMaxLengthFactor, + aOuter, aIID, aResult); +} + +nsresult +CreateOneByteDecoder(uMappingTable * aMappingTable, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult) +{ + if (aOuter) return NS_ERROR_NO_AGGREGATION; + + nsOneByteDecoderSupport* decoder = + new nsOneByteDecoderSupport(aMappingTable); + + if (!decoder) + return NS_ERROR_OUT_OF_MEMORY; + + return StabilizedQueryInterface(decoder, aIID, aResult); +} diff --git a/intl/uconv/util/nsUCConstructors.h b/intl/uconv/util/nsUCConstructors.h new file mode 100644 index 000000000..360bf0305 --- /dev/null +++ b/intl/uconv/util/nsUCConstructors.h @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsUCConstructors_h +#define __nsUCConstructors_h + +#include <stdint.h> +#include "nscore.h" +#include "nsID.h" +#include "uconvutil.h" + +class nsISupports; + +// all the useful constructors +nsresult +CreateMultiTableDecoder(int32_t aTableCount, + const uRange * aRangeArray, + uScanClassID * aScanClassArray, + uMappingTable ** aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult); + +nsresult +CreateMultiTableEncoder(int32_t aTableCount, + uScanClassID * aScanClassArray, + uShiftOutTable ** aShiftOutTable, + uMappingTable ** aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult); + +nsresult +CreateTableEncoder(uScanClassID aScanClass, + uShiftOutTable * aShiftOutTable, + uMappingTable * aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult); + +nsresult +CreateMultiTableEncoder(int32_t aTableCount, + uScanClassID * aScanClassArray, + uMappingTable ** aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult); + +nsresult +CreateTableEncoder(uScanClassID aScanClass, + uMappingTable * aMappingTable, + uint32_t aMaxLengthFactor, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult); + +nsresult +CreateOneByteDecoder(uMappingTable * aMappingTable, + nsISupports* aOuter, + REFNSIID aIID, + void** aResult); + + +#endif diff --git a/intl/uconv/util/nsUCSupport.cpp b/intl/uconv/util/nsUCSupport.cpp new file mode 100644 index 000000000..d6893f442 --- /dev/null +++ b/intl/uconv/util/nsUCSupport.cpp @@ -0,0 +1,621 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUCSupport.h" +#include "nsUnicodeDecodeHelper.h" +#include "nsUnicodeEncodeHelper.h" +#include "mozilla/CheckedInt.h" +#include <algorithm> + +#define DEFAULT_BUFFER_CAPACITY 16 + +// XXX review the buffer growth limitation code + +//---------------------------------------------------------------------- +// Class nsBasicDecoderSupport [implementation] + +nsBasicDecoderSupport::nsBasicDecoderSupport() + : mErrBehavior(kOnError_Recover) +{ +} + +nsBasicDecoderSupport::~nsBasicDecoderSupport() +{ +} + +//---------------------------------------------------------------------- +// Interface nsISupports [implementation] + +#ifdef DEBUG +NS_IMPL_ISUPPORTS(nsBasicDecoderSupport, + nsIUnicodeDecoder, + nsIBasicDecoder) +#else +NS_IMPL_ISUPPORTS(nsBasicDecoderSupport, nsIUnicodeDecoder) +#endif + +//---------------------------------------------------------------------- +// Interface nsIUnicodeDecoder [implementation] + +void +nsBasicDecoderSupport::SetInputErrorBehavior(int32_t aBehavior) +{ + MOZ_ASSERT(aBehavior == kOnError_Recover || aBehavior == kOnError_Signal, + "Unknown behavior for SetInputErrorBehavior"); + mErrBehavior = aBehavior; +} + +char16_t +nsBasicDecoderSupport::GetCharacterForUnMapped() +{ + return char16_t(0xfffd); // Unicode REPLACEMENT CHARACTER +} + +//---------------------------------------------------------------------- +// Class nsBufferDecoderSupport [implementation] + +nsBufferDecoderSupport::nsBufferDecoderSupport(uint32_t aMaxLengthFactor) + : nsBasicDecoderSupport(), + mMaxLengthFactor(aMaxLengthFactor) +{ + mBufferCapacity = DEFAULT_BUFFER_CAPACITY; + mBuffer = new char[mBufferCapacity]; + + Reset(); +} + +nsBufferDecoderSupport::~nsBufferDecoderSupport() +{ + delete [] mBuffer; +} + +void nsBufferDecoderSupport::FillBuffer(const char ** aSrc, int32_t aSrcLength) +{ + int32_t bcr = std::min(mBufferCapacity - mBufferLength, aSrcLength); + memcpy(mBuffer + mBufferLength, *aSrc, bcr); + mBufferLength += bcr; + (*aSrc) += bcr; +} + +//---------------------------------------------------------------------- +// Subclassing of nsBasicDecoderSupport class [implementation] + +NS_IMETHODIMP nsBufferDecoderSupport::Convert(const char* aSrc, + int32_t* aSrcLength, + char16_t* aDest, + int32_t* aDestLength) +{ + // we do all operations using pointers internally + const char* src = aSrc; + const char* srcEnd = aSrc + *aSrcLength; + char16_t* dest = aDest; + char16_t* destEnd = aDest + *aDestLength; + + int32_t bcr, bcw; // byte counts for read & write; + nsresult res = NS_OK; + + // do we have some residual data from the last conversion? + if (mBufferLength > 0) { + if (dest == destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + } else { + for (;;) { + // we need new data to add to the buffer + if (src == srcEnd) { + res = NS_OK_UDEC_MOREINPUT; + break; + } + + // fill that buffer + int32_t buffLen = mBufferLength; // initial buffer length + FillBuffer(&src, srcEnd - src); + + // convert that buffer + bcr = mBufferLength; + bcw = destEnd - dest; + res = ConvertNoBuff(mBuffer, &bcr, dest, &bcw); + dest += bcw; + + // Detect invalid input character + if (res == NS_ERROR_ILLEGAL_INPUT && mErrBehavior == kOnError_Signal) { + break; + } + + if ((res == NS_OK_UDEC_MOREINPUT) && (bcw == 0)) { + res = NS_ERROR_UNEXPECTED; +#if defined(DEBUG_yokoyama) || defined(DEBUG_ftang) + NS_ERROR("This should not happen. Internal buffer may be corrupted."); +#endif + break; + } else { + if (bcr < buffLen) { + // we didn't convert that residual data - unfill the buffer + src -= mBufferLength - buffLen; + mBufferLength = buffLen; +#if defined(DEBUG_yokoyama) || defined(DEBUG_ftang) + NS_ERROR("This should not happen. Internal buffer may be corrupted."); +#endif + } else { + // the buffer and some extra data was converted - unget the rest + src -= mBufferLength - bcr; + mBufferLength = 0; + res = NS_OK; + } + break; + } + } + } + } + + if (res == NS_OK) { + bcr = srcEnd - src; + bcw = destEnd - dest; + res = ConvertNoBuff(src, &bcr, dest, &bcw); + src += bcr; + dest += bcw; + + // if we have partial input, store it in our internal buffer. + if (res == NS_OK_UDEC_MOREINPUT) { + bcr = srcEnd - src; + // make sure buffer is large enough + if (bcr > mBufferCapacity) { + // somehow we got into an error state and the buffer is growing out + // of control + res = NS_ERROR_UNEXPECTED; + } else { + FillBuffer(&src, bcr); + } + } + } + + *aSrcLength -= srcEnd - src; + *aDestLength -= destEnd - dest; + return res; +} + +NS_IMETHODIMP nsBufferDecoderSupport::Reset() +{ + mBufferLength = 0; + return NS_OK; +} + +NS_IMETHODIMP nsBufferDecoderSupport::GetMaxLength(const char* aSrc, + int32_t aSrcLength, + int32_t* aDestLength) +{ + NS_ASSERTION(mMaxLengthFactor != 0, "Must override GetMaxLength!"); + + mozilla::CheckedInt32 length = aSrcLength; + length *= mMaxLengthFactor; + + if (!length.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + + *aDestLength = length.value(); + return NS_OK; +} + +//---------------------------------------------------------------------- +// Class nsMultiTableDecoderSupport [implementation] + +nsMultiTableDecoderSupport::nsMultiTableDecoderSupport( + int32_t aTableCount, + const uRange* aRangeArray, + uScanClassID* aScanClassArray, + uMappingTable** aMappingTable, + uint32_t aMaxLengthFactor) +: nsBufferDecoderSupport(aMaxLengthFactor) +{ + mTableCount = aTableCount; + mRangeArray = aRangeArray; + mScanClassArray = aScanClassArray; + mMappingTable = aMappingTable; +} + +nsMultiTableDecoderSupport::~nsMultiTableDecoderSupport() +{ +} + +//---------------------------------------------------------------------- +// Subclassing of nsBufferDecoderSupport class [implementation] + +NS_IMETHODIMP nsMultiTableDecoderSupport::ConvertNoBuff(const char* aSrc, + int32_t* aSrcLength, + char16_t* aDest, + int32_t* aDestLength) +{ + return nsUnicodeDecodeHelper::ConvertByMultiTable(aSrc, aSrcLength, + aDest, aDestLength, + mTableCount, mRangeArray, + mScanClassArray, + mMappingTable, + mErrBehavior == kOnError_Signal); +} + +//---------------------------------------------------------------------- +// Class nsOneByteDecoderSupport [implementation] + +nsOneByteDecoderSupport::nsOneByteDecoderSupport( + uMappingTable* aMappingTable) + : nsBasicDecoderSupport() + , mMappingTable(aMappingTable) + , mFastTableCreated(false) + , mFastTableMutex("nsOneByteDecoderSupport mFastTableMutex") +{ +} + +nsOneByteDecoderSupport::~nsOneByteDecoderSupport() +{ +} + +//---------------------------------------------------------------------- +// Subclassing of nsBasicDecoderSupport class [implementation] + +NS_IMETHODIMP nsOneByteDecoderSupport::Convert(const char* aSrc, + int32_t* aSrcLength, + char16_t* aDest, + int32_t* aDestLength) +{ + if (!mFastTableCreated) { + // Probably better to make this non-lazy and get rid of the mutex + mozilla::MutexAutoLock autoLock(mFastTableMutex); + if (!mFastTableCreated) { + nsresult res = nsUnicodeDecodeHelper::CreateFastTable( + mMappingTable, mFastTable, ONE_BYTE_TABLE_SIZE); + if (NS_FAILED(res)) return res; + mFastTableCreated = true; + } + } + + return nsUnicodeDecodeHelper::ConvertByFastTable(aSrc, aSrcLength, + aDest, aDestLength, + mFastTable, + ONE_BYTE_TABLE_SIZE, + mErrBehavior == kOnError_Signal); +} + +NS_IMETHODIMP nsOneByteDecoderSupport::GetMaxLength(const char* aSrc, + int32_t aSrcLength, + int32_t* aDestLength) +{ + // single byte to Unicode converter + *aDestLength = aSrcLength; + return NS_OK_UDEC_EXACTLENGTH; +} + +NS_IMETHODIMP nsOneByteDecoderSupport::Reset() +{ + // nothing to reset, no internal state in this case + return NS_OK; +} + +//---------------------------------------------------------------------- +// Class nsBasicEncoder [implementation] +nsBasicEncoder::nsBasicEncoder() +{ +} + +nsBasicEncoder::~nsBasicEncoder() +{ +} + +//---------------------------------------------------------------------- +// Interface nsISupports [implementation] + +NS_IMPL_ADDREF(nsBasicEncoder) +NS_IMPL_RELEASE(nsBasicEncoder) +#ifdef DEBUG +NS_IMPL_QUERY_INTERFACE(nsBasicEncoder, + nsIUnicodeEncoder, + nsIBasicEncoder) +#else +NS_IMPL_QUERY_INTERFACE(nsBasicEncoder, + nsIUnicodeEncoder) +#endif +//---------------------------------------------------------------------- +// Class nsEncoderSupport [implementation] + +nsEncoderSupport::nsEncoderSupport(uint32_t aMaxLengthFactor) : + mMaxLengthFactor(aMaxLengthFactor) +{ + mBufferCapacity = DEFAULT_BUFFER_CAPACITY; + mBuffer = new char[mBufferCapacity]; + + mErrBehavior = kOnError_Signal; + mErrChar = 0; + + Reset(); +} + +nsEncoderSupport::~nsEncoderSupport() +{ + delete [] mBuffer; +} + +NS_IMETHODIMP nsEncoderSupport::ConvertNoBuff(const char16_t* aSrc, + int32_t* aSrcLength, + char* aDest, + int32_t* aDestLength) +{ + // we do all operations using pointers internally + const char16_t* src = aSrc; + const char16_t* srcEnd = aSrc + *aSrcLength; + char* dest = aDest; + char* destEnd = aDest + *aDestLength; + + int32_t bcr, bcw; // byte counts for read & write; + nsresult res; + + for (;;) { + bcr = srcEnd - src; + bcw = destEnd - dest; + res = ConvertNoBuffNoErr(src, &bcr, dest, &bcw); + src += bcr; + dest += bcw; + + if (res == NS_ERROR_UENC_NOMAPPING) { + if (mErrBehavior == kOnError_Replace) { + const char16_t buff[] = {mErrChar}; + bcr = 1; + bcw = destEnd - dest; + src--; // back the input: maybe the guy won't consume consume anything. + res = ConvertNoBuffNoErr(buff, &bcr, dest, &bcw); + src += bcr; + dest += bcw; + if (res != NS_OK) break; + } else if (mErrBehavior == kOnError_CallBack) { + bcw = destEnd - dest; + src--; + res = mErrEncoder->Convert(*src, dest, &bcw); + dest += bcw; + // if enought output space then the last char was used + if (res != NS_OK_UENC_MOREOUTPUT) src++; + if (res != NS_OK) break; + } else break; + } + else break; + } + + *aSrcLength -= srcEnd - src; + *aDestLength -= destEnd - dest; + return res; +} + +NS_IMETHODIMP nsEncoderSupport::FinishNoBuff(char* aDest, + int32_t* aDestLength) +{ + *aDestLength = 0; + return NS_OK; +} + +nsresult nsEncoderSupport::FlushBuffer(char** aDest, const char* aDestEnd) +{ + int32_t bcr, bcw; // byte counts for read & write; + nsresult res = NS_OK; + char* dest = *aDest; + + if (mBufferStart < mBufferEnd) { + bcr = mBufferEnd - mBufferStart; + bcw = aDestEnd - dest; + if (bcw < bcr) bcr = bcw; + memcpy(dest, mBufferStart, bcr); + dest += bcr; + mBufferStart += bcr; + + if (mBufferStart < mBufferEnd) res = NS_OK_UENC_MOREOUTPUT; + } + + *aDest = dest; + return res; +} + + +//---------------------------------------------------------------------- +// Interface nsIUnicodeEncoder [implementation] + +NS_IMETHODIMP nsEncoderSupport::Convert(const char16_t* aSrc, + int32_t* aSrcLength, + char* aDest, + int32_t* aDestLength) +{ + // we do all operations using pointers internally + const char16_t* src = aSrc; + const char16_t* srcEnd = aSrc + *aSrcLength; + char* dest = aDest; + char* destEnd = aDest + *aDestLength; + + int32_t bcr, bcw; // byte counts for read & write; + nsresult res; + + res = FlushBuffer(&dest, destEnd); + if (res == NS_OK_UENC_MOREOUTPUT) goto final; + + bcr = srcEnd - src; + bcw = destEnd - dest; + res = ConvertNoBuff(src, &bcr, dest, &bcw); + src += bcr; + dest += bcw; + if ((res == NS_OK_UENC_MOREOUTPUT) && (dest < destEnd)) { + // convert exactly one character into the internal buffer + // at this point, there should be at least a char in the input + for (;;) { + bcr = 1; + bcw = mBufferCapacity; + res = ConvertNoBuff(src, &bcr, mBuffer, &bcw); + + if (res == NS_OK_UENC_MOREOUTPUT) { + delete [] mBuffer; + mBufferCapacity *= 2; + mBuffer = new char [mBufferCapacity]; + } else { + src += bcr; + mBufferStart = mBufferEnd = mBuffer; + mBufferEnd += bcw; + break; + } + } + + res = FlushBuffer(&dest, destEnd); + } + +final: + *aSrcLength -= srcEnd - src; + *aDestLength -= destEnd - dest; + return res; +} + +NS_IMETHODIMP nsEncoderSupport::Finish(char* aDest, int32_t* aDestLength) +{ + // we do all operations using pointers internally + char* dest = aDest; + char* destEnd = aDest + *aDestLength; + + int32_t bcw; // byte count for write; + nsresult res; + + res = FlushBuffer(&dest, destEnd); + if (res == NS_OK_UENC_MOREOUTPUT) goto final; + + // do the finish into the internal buffer. + for (;;) { + bcw = mBufferCapacity; + res = FinishNoBuff(mBuffer, &bcw); + + if (res == NS_OK_UENC_MOREOUTPUT) { + delete [] mBuffer; + mBufferCapacity *= 2; + mBuffer = new char [mBufferCapacity]; + } else { + mBufferStart = mBufferEnd = mBuffer; + mBufferEnd += bcw; + break; + } + } + + res = FlushBuffer(&dest, destEnd); + +final: + *aDestLength -= destEnd - dest; + return res; +} + +NS_IMETHODIMP nsEncoderSupport::Reset() +{ + mBufferStart = mBufferEnd = mBuffer; + return NS_OK; +} + +NS_IMETHODIMP nsEncoderSupport::SetOutputErrorBehavior( + int32_t aBehavior, + nsIUnicharEncoder* aEncoder, + char16_t aChar) +{ + if (aBehavior == kOnError_CallBack && !aEncoder) + return NS_ERROR_NULL_POINTER; + + mErrEncoder = aEncoder; + mErrBehavior = aBehavior; + mErrChar = aChar; + return NS_OK; +} + +NS_IMETHODIMP +nsEncoderSupport::GetMaxLength(const char16_t* aSrc, + int32_t aSrcLength, + int32_t* aDestLength) +{ + mozilla::CheckedInt32 length = aSrcLength; + length *= mMaxLengthFactor; + + if (!length.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + + *aDestLength = length.value(); + return NS_OK; +} + + +//---------------------------------------------------------------------- +// Class nsTableEncoderSupport [implementation] + +nsTableEncoderSupport::nsTableEncoderSupport(uScanClassID aScanClass, + uShiftOutTable* aShiftOutTable, + uMappingTable* aMappingTable, + uint32_t aMaxLengthFactor) +: nsEncoderSupport(aMaxLengthFactor) +{ + mScanClass = aScanClass; + mShiftOutTable = aShiftOutTable, + mMappingTable = aMappingTable; +} + +nsTableEncoderSupport::nsTableEncoderSupport(uScanClassID aScanClass, + uMappingTable* aMappingTable, + uint32_t aMaxLengthFactor) +: nsEncoderSupport(aMaxLengthFactor) +{ + mScanClass = aScanClass; + mShiftOutTable = nullptr; + mMappingTable = aMappingTable; +} + +nsTableEncoderSupport::~nsTableEncoderSupport() +{ +} + +//---------------------------------------------------------------------- +// Subclassing of nsEncoderSupport class [implementation] + +NS_IMETHODIMP nsTableEncoderSupport::ConvertNoBuffNoErr( + const char16_t* aSrc, + int32_t* aSrcLength, + char* aDest, + int32_t* aDestLength) +{ + return nsUnicodeEncodeHelper::ConvertByTable(aSrc, aSrcLength, + aDest, aDestLength, + mScanClass, + mShiftOutTable, mMappingTable); +} + +//---------------------------------------------------------------------- +// Class nsMultiTableEncoderSupport [implementation] + +nsMultiTableEncoderSupport::nsMultiTableEncoderSupport( + int32_t aTableCount, + uScanClassID* aScanClassArray, + uShiftOutTable** aShiftOutTable, + uMappingTable** aMappingTable, + uint32_t aMaxLengthFactor) +: nsEncoderSupport(aMaxLengthFactor) +{ + mTableCount = aTableCount; + mScanClassArray = aScanClassArray; + mShiftOutTable = aShiftOutTable; + mMappingTable = aMappingTable; +} + +nsMultiTableEncoderSupport::~nsMultiTableEncoderSupport() +{ +} + +//---------------------------------------------------------------------- +// Subclassing of nsEncoderSupport class [implementation] + +NS_IMETHODIMP nsMultiTableEncoderSupport::ConvertNoBuffNoErr( + const char16_t* aSrc, + int32_t* aSrcLength, + char* aDest, + int32_t* aDestLength) +{ + return nsUnicodeEncodeHelper::ConvertByMultiTable(aSrc, aSrcLength, + aDest, aDestLength, + mTableCount, + mScanClassArray, + mShiftOutTable, + mMappingTable); +} diff --git a/intl/uconv/util/nsUnicodeDecodeHelper.cpp b/intl/uconv/util/nsUnicodeDecodeHelper.cpp new file mode 100644 index 000000000..9d3491d86 --- /dev/null +++ b/intl/uconv/util/nsUnicodeDecodeHelper.cpp @@ -0,0 +1,234 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "unicpriv.h" +#include "nsUnicodeDecodeHelper.h" +#include "mozilla/UniquePtr.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeDecodeHelper [implementation] +nsresult nsUnicodeDecodeHelper::ConvertByTable( + const char * aSrc, + int32_t * aSrcLength, + char16_t * aDest, + int32_t * aDestLength, + uScanClassID aScanClass, + uShiftInTable * aShiftInTable, + uMappingTable * aMappingTable, + bool aErrorSignal) +{ + const char * src = aSrc; + int32_t srcLen = *aSrcLength; + char16_t * dest = aDest; + char16_t * destEnd = aDest + *aDestLength; + + char16_t med; + int32_t bcr; // byte count for read + nsresult res = NS_OK; + + while ((srcLen > 0) && (dest < destEnd)) { + bool charFound; + if (aScanClass == uMultibytesCharset) { + NS_ASSERTION(aShiftInTable, "shift table missing"); + charFound = uScanShift(aShiftInTable, nullptr, (uint8_t *)src, + reinterpret_cast<uint16_t*>(&med), srcLen, + (uint32_t *)&bcr); + } else { + charFound = uScan(aScanClass, nullptr, (uint8_t *)src, + reinterpret_cast<uint16_t*>(&med), + srcLen, (uint32_t *)&bcr); + } + if (!charFound) { + res = NS_OK_UDEC_MOREINPUT; + break; + } + + if (!uMapCode((uTable*) aMappingTable, static_cast<uint16_t>(med), reinterpret_cast<uint16_t*>(dest))) { + if (med < 0x20) { + // somehow some table miss the 0x00 - 0x20 part + *dest = med; + } else { + if (aErrorSignal) { + res = NS_ERROR_ILLEGAL_INPUT; + break; + } + // Unicode replacement value for unmappable chars + *dest = 0xfffd; + } + } + + src += bcr; + srcLen -= bcr; + dest++; + } + + if ((srcLen > 0) && (res == NS_OK)) res = NS_OK_UDEC_MOREOUTPUT; + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsUnicodeDecodeHelper::ConvertByMultiTable( + const char * aSrc, + int32_t * aSrcLength, + char16_t * aDest, + int32_t * aDestLength, + int32_t aTableCount, + const uRange * aRangeArray, + uScanClassID * aScanClassArray, + uMappingTable ** aMappingTable, + bool aErrorSignal) +{ + uint8_t * src = (uint8_t *)aSrc; + int32_t srcLen = *aSrcLength; + char16_t * dest = aDest; + char16_t * destEnd = aDest + *aDestLength; + + char16_t med; + int32_t bcr; // byte count for read + nsresult res = NS_OK; + int32_t i; + + while ((srcLen > 0) && (dest < destEnd)) + { + bool done= false; + bool passRangeCheck = false; + bool passScan = false; + for (i=0; (!done) && (i<aTableCount); i++) + { + if ((aRangeArray[i].min <= *src) && (*src <= aRangeArray[i].max)) + { + passRangeCheck = true; + if (uScan(aScanClassArray[i], nullptr, src, + reinterpret_cast<uint16_t*>(&med), srcLen, + (uint32_t *)&bcr)) + { + passScan = true; + done = uMapCode((uTable*) aMappingTable[i], + static_cast<uint16_t>(med), + reinterpret_cast<uint16_t*>(dest)); + } // if (uScan ... ) + } // if Range + } // for loop + + if(passRangeCheck && (! passScan)) + { + if (res != NS_ERROR_ILLEGAL_INPUT) + res = NS_OK_UDEC_MOREINPUT; + break; + } + if(! done) + { + bcr = 1; + if ((uint8_t)*src < 0x20) { + // somehow some table miss the 0x00 - 0x20 part + *dest = *src; + } else if(*src == (uint8_t) 0xa0) { + // handle nbsp + *dest = 0x00a0; + } else { + // we need to decide how many byte we skip. We can use uScan to do this + for (i=0; i<aTableCount; i++) + { + if ((aRangeArray[i].min <= *src) && (*src <= aRangeArray[i].max)) + { + if (uScan(aScanClassArray[i], nullptr, src, + reinterpret_cast<uint16_t*>(&med), srcLen, + (uint32_t*)&bcr)) + { + // match the patten + + int32_t k; + for(k = 1; k < bcr; k++) + { + if(0 == (src[k] & 0x80)) + { // only skip if all bytes > 0x80 + // if we hit bytes <= 0x80, skip only one byte + bcr = 1; + break; + } + } + break; + } + } + } + // treat it as NSBR if bcr == 1 and it is 0xa0 + if ((1==bcr)&&(*src == (uint8_t)0xa0 )) { + *dest = 0x00a0; + } else { + if (aErrorSignal) { + res = NS_ERROR_ILLEGAL_INPUT; + break; + } + *dest = 0xfffd; + } + } + } + + src += bcr; + srcLen -= bcr; + dest++; + } // while + + if ((srcLen > 0) && (res == NS_OK)) res = NS_OK_UDEC_MOREOUTPUT; + + *aSrcLength = src - (uint8_t *)aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsUnicodeDecodeHelper::ConvertByFastTable( + const char * aSrc, + int32_t * aSrcLength, + char16_t * aDest, + int32_t * aDestLength, + const char16_t * aFastTable, + int32_t aTableSize, + bool aErrorSignal) +{ + uint8_t * src = (uint8_t *)aSrc; + uint8_t * srcEnd = src; + char16_t * dest = aDest; + + nsresult res; + if (*aSrcLength > *aDestLength) { + srcEnd += (*aDestLength); + res = NS_PARTIAL_MORE_OUTPUT; + } else { + srcEnd += (*aSrcLength); + res = NS_OK; + } + + for (; src<srcEnd;) { + *dest = aFastTable[*src]; + if (*dest == 0xfffd && aErrorSignal) { + res = NS_ERROR_ILLEGAL_INPUT; + break; + } + src++; + dest++; + } + + *aSrcLength = src - (uint8_t *)aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsUnicodeDecodeHelper::CreateFastTable( + uMappingTable * aMappingTable, + char16_t * aFastTable, + int32_t aTableSize) +{ + int32_t tableSize = aTableSize; + int32_t buffSize = aTableSize; + auto buff = mozilla::MakeUnique<char[]>(buffSize); + + char * p = buff.get(); + for (int32_t i=0; i<aTableSize; i++) *(p++) = i; + return ConvertByTable(buff.get(), &buffSize, aFastTable, &tableSize, + u1ByteCharset, nullptr, aMappingTable); +} + diff --git a/intl/uconv/util/nsUnicodeDecodeHelper.h b/intl/uconv/util/nsUnicodeDecodeHelper.h new file mode 100644 index 000000000..f1ce56b19 --- /dev/null +++ b/intl/uconv/util/nsUnicodeDecodeHelper.h @@ -0,0 +1,55 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsUnicodeDecodeHelper_h__ +#define nsUnicodeDecodeHelper_h__ + +#include "nsError.h" +#include "uconvutil.h" +//---------------------------------------------------------------------- +// Class nsUnicodeDecodeHelper [declaration] + +/** + * + * @created 18/Mar/1998 + * @author Catalin Rotaru [CATA] + */ +class nsUnicodeDecodeHelper +{ +public: + /** + * Converts data using a lookup table and optional shift table + */ + static nsresult ConvertByTable(const char * aSrc, int32_t * aSrcLength, + char16_t * aDest, int32_t * aDestLength, + uScanClassID aScanClass, + uShiftInTable * aShiftInTable, + uMappingTable * aMappingTable, + bool aErrorSignal = false); + + /** + * Converts data using a set of lookup tables. + */ + static nsresult ConvertByMultiTable(const char * aSrc, int32_t * aSrcLength, + char16_t * aDest, int32_t * aDestLength, int32_t aTableCount, + const uRange * aRangeArray, uScanClassID * aScanClassArray, + uMappingTable ** aMappingTable, bool aErrorSignal = false); + + /** + * Converts data using a fast lookup table. + */ + static nsresult ConvertByFastTable(const char * aSrc, int32_t * aSrcLength, + char16_t * aDest, int32_t * aDestLength, const char16_t * aFastTable, + int32_t aTableSize, bool aErrorSignal); + + /** + * Create a cache-like fast lookup table from a normal one. + */ + static nsresult CreateFastTable(uMappingTable * aMappingTable, + char16_t * aFastTable, int32_t aTableSize); +}; + +#endif // nsUnicodeDecodeHelper_h__ + + diff --git a/intl/uconv/util/nsUnicodeEncodeHelper.cpp b/intl/uconv/util/nsUnicodeEncodeHelper.cpp new file mode 100644 index 000000000..f801e8166 --- /dev/null +++ b/intl/uconv/util/nsUnicodeEncodeHelper.cpp @@ -0,0 +1,121 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "unicpriv.h" +#include "nsUnicodeEncodeHelper.h" +#include "nsDebug.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeEncodeHelper [implementation] +nsresult nsUnicodeEncodeHelper::ConvertByTable( + const char16_t * aSrc, + int32_t * aSrcLength, + char * aDest, + int32_t * aDestLength, + uScanClassID aScanClass, + uShiftOutTable * aShiftOutTable, + uMappingTable * aMappingTable) +{ + const char16_t * src = aSrc; + const char16_t * srcEnd = aSrc + *aSrcLength; + char * dest = aDest; + int32_t destLen = *aDestLength; + + char16_t med; + int32_t bcw; // byte count for write; + nsresult res = NS_OK; + + while (src < srcEnd) { + if (!uMapCode((uTable*) aMappingTable, static_cast<char16_t>(*(src++)), reinterpret_cast<uint16_t*>(&med))) { + if (aScanClass == u1ByteCharset && *(src - 1) < 0x20) { + // some tables are missing the 0x00 - 0x20 part + med = *(src - 1); + } else { + res = NS_ERROR_UENC_NOMAPPING; + break; + } + } + + bool charFound; + if (aScanClass == uMultibytesCharset) { + NS_ASSERTION(aShiftOutTable, "shift table missing"); + charFound = uGenerateShift(aShiftOutTable, 0, med, + (uint8_t *)dest, destLen, + (uint32_t *)&bcw); + } else { + charFound = uGenerate(aScanClass, 0, med, + (uint8_t *)dest, destLen, + (uint32_t *)&bcw); + } + if (!charFound) { + src--; + res = NS_OK_UENC_MOREOUTPUT; + break; + } + + dest += bcw; + destLen -= bcw; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsUnicodeEncodeHelper::ConvertByMultiTable( + const char16_t * aSrc, + int32_t * aSrcLength, + char * aDest, + int32_t * aDestLength, + int32_t aTableCount, + uScanClassID * aScanClassArray, + uShiftOutTable ** aShiftOutTable, + uMappingTable ** aMappingTable) +{ + const char16_t * src = aSrc; + const char16_t * srcEnd = aSrc + *aSrcLength; + char * dest = aDest; + int32_t destLen = *aDestLength; + + char16_t med; + int32_t bcw; // byte count for write; + nsresult res = NS_OK; + int32_t i; + + while (src < srcEnd) { + for (i=0; i<aTableCount; i++) + if (uMapCode((uTable*) aMappingTable[i], static_cast<uint16_t>(*src), reinterpret_cast<uint16_t*>(&med))) break; + + src++; + if (i == aTableCount) { + res = NS_ERROR_UENC_NOMAPPING; + break; + } + + bool charFound; + if (aScanClassArray[i] == uMultibytesCharset) { + NS_ASSERTION(aShiftOutTable[i], "shift table missing"); + charFound = uGenerateShift(aShiftOutTable[i], 0, med, + (uint8_t *)dest, destLen, + (uint32_t *)&bcw); + } + else + charFound = uGenerate(aScanClassArray[i], 0, med, + (uint8_t *)dest, destLen, + (uint32_t *)&bcw); + if (!charFound) { + src--; + res = NS_OK_UENC_MOREOUTPUT; + break; + } + + dest += bcw; + destLen -= bcw; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} diff --git a/intl/uconv/util/nsUnicodeEncodeHelper.h b/intl/uconv/util/nsUnicodeEncodeHelper.h new file mode 100644 index 000000000..456c277ba --- /dev/null +++ b/intl/uconv/util/nsUnicodeEncodeHelper.h @@ -0,0 +1,42 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsUnicodeEncodeHelper_h__ +#define nsUnicodeEncodeHelper_h__ + +#include "nsError.h" +#include "uconvutil.h" +//---------------------------------------------------------------------- +// Class nsUnicodeEncodeHelper [declaration] + +/** + * + * @created 22/Nov/1998 + * @author Catalin Rotaru [CATA] + */ +class nsUnicodeEncodeHelper +{ + +public: + //-------------------------------------------------------------------- + + /** + * Converts data using a lookup table and optional shift table. + */ + static nsresult ConvertByTable(const char16_t * aSrc, int32_t * aSrcLength, + char * aDest, int32_t * aDestLength, uScanClassID aScanClass, + uShiftOutTable * aShiftOutTable, uMappingTable * aMappingTable); + + /** + * Converts data using a set of lookup tables and optional shift tables. + */ + static nsresult ConvertByMultiTable(const char16_t * aSrc, int32_t * aSrcLength, + char * aDest, int32_t * aDestLength, int32_t aTableCount, + uScanClassID * aScanClassArray, + uShiftOutTable ** aShiftOutTable, uMappingTable ** aMappingTable); +}; + +#endif // nsUnicodeEncodeHelper_h__ + + diff --git a/intl/uconv/util/ugen.c b/intl/uconv/util/ugen.c new file mode 100644 index 000000000..9a11b9f39 --- /dev/null +++ b/intl/uconv/util/ugen.c @@ -0,0 +1,712 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "unicpriv.h" +/*================================================================================= + +=================================================================================*/ +typedef int (*uSubGeneratorFunc) (uint16_t in, unsigned char* out); +/*================================================================================= + +=================================================================================*/ + +typedef int (*uGeneratorFunc) ( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uGenerate( + uScanClassID scanClass, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +#define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out)) + +int uCheckAndGenAlways1Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGenAlways2Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGenAlways2ByteShiftGR( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uGenerateShift( + uShiftOutTable *shift, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGen2ByteGRPrefix8F( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGen2ByteGRPrefix8EA2( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA3( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA4( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA5( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA6( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA7( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCnGAlways8BytesDecomposedHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGenJohabHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGenJohabSymbol( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + + +int uCheckAndGen4BytesGB18030( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uGenAlways2Byte( + uint16_t in, + unsigned char* out + ); +int uGenAlways2ByteShiftGR( + uint16_t in, + unsigned char* out + ); +int uGenAlways1Byte( + uint16_t in, + unsigned char* out + ); +int uGenAlways1BytePrefix8E( + uint16_t in, + unsigned char* out + ); +/*================================================================================= + +=================================================================================*/ +const uGeneratorFunc m_generator[uNumOfCharsetType] = +{ + uCheckAndGenAlways1Byte, + uCheckAndGenAlways2Byte, + uCheckAndGenAlways2ByteShiftGR, + uCheckAndGen2ByteGRPrefix8F, + uCheckAndGen2ByteGRPrefix8EA2, + uCheckAndGen2ByteGRPrefix8EA3, + uCheckAndGen2ByteGRPrefix8EA4, + uCheckAndGen2ByteGRPrefix8EA5, + uCheckAndGen2ByteGRPrefix8EA6, + uCheckAndGen2ByteGRPrefix8EA7, + uCnGAlways8BytesDecomposedHangul, + uCheckAndGenJohabHangul, + uCheckAndGenJohabSymbol, + uCheckAndGen4BytesGB18030, + uCheckAndGenAlways2Byte /* place-holder for GR128 */ +}; + +/*================================================================================= + +=================================================================================*/ + +const uSubGeneratorFunc m_subgenerator[uNumOfCharType] = +{ + uGenAlways1Byte, + uGenAlways2Byte, + uGenAlways2ByteShiftGR, + uGenAlways1BytePrefix8E +}; +/*================================================================================= + +=================================================================================*/ +int uGenerate( + uScanClassID scanClass, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen); +} +/*================================================================================= + +=================================================================================*/ +int uGenAlways1Byte( + uint16_t in, + unsigned char* out + ) +{ + out[0] = (unsigned char)in; + return 1; +} + +/*================================================================================= + +=================================================================================*/ +int uGenAlways2Byte( + uint16_t in, + unsigned char* out + ) +{ + out[0] = (unsigned char)((in >> 8) & 0xff); + out[1] = (unsigned char)(in & 0xff); + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uGenAlways2ByteShiftGR( + uint16_t in, + unsigned char* out + ) +{ + out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80); + out[1] = (unsigned char)((in & 0xff) | 0x80); + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uGenAlways1BytePrefix8E( + uint16_t in, + unsigned char* out + ) +{ + out[0] = 0x8E; + out[1] = (unsigned char)(in & 0xff); + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGenAlways1Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + /* Don't check inlen. The caller should ensure it is larger than 0 */ + /* Oops, I don't agree. Code changed to check every time. [CATA] */ + if(outbuflen < 1) + return 0; + else + { + *outlen = 1; + out[0] = in & 0xff; + return 1; + } +} + +/*================================================================================= + +=================================================================================*/ +int uCheckAndGenAlways2Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + *outlen = 2; + out[0] = ((in >> 8 ) & 0xff); + out[1] = in & 0xff; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGenAlways2ByteShiftGR( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + *outlen = 2; + out[0] = ((in >> 8 ) & 0xff) | 0x80; + out[1] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uGenerateShift( + uShiftOutTable *shift, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + int16_t i; + const uShiftOutCell* cell = &(shift->shiftcell[0]); + int16_t itemnum = shift->numOfItem; + unsigned char inH, inL; + inH = (in >> 8) & 0xff; + inL = (in & 0xff ); + for(i=0;i<itemnum;i++) + { + if( ( inL >= cell[i].shiftout_MinLB) && + ( inL <= cell[i].shiftout_MaxLB) && + ( inH >= cell[i].shiftout_MinHB) && + ( inH <= cell[i].shiftout_MaxHB) ) + { + if(outbuflen < cell[i].reserveLen) + { + return 0; + } + else + { + *outlen = cell[i].reserveLen; + return (uSubGenerator(cell[i].classID,in,out)); + } + } + } + return 0; +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8F(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 3) + return 0; + else + { + *outlen = 3; + out[0] = 0x8F; + out[1] = ((in >> 8 ) & 0xff) | 0x80; + out[2] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA2(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA2; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} + + +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA3(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA3; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA4(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA4; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA5(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA5; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA6(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA6; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA7(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA7; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +#define SBase 0xAC00 +#define LCount 19 +#define VCount 21 +#define TCount 28 +#define NCount (VCount * TCount) +/*================================================================================= + +=================================================================================*/ +int uCnGAlways8BytesDecomposedHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + static const uint8_t lMap[LCount] = { + 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5, + 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe + }; + + static const uint8_t tMap[TCount] = { + 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa, + 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5, + 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe + }; + + uint16_t SIndex, LIndex, VIndex, TIndex; + + if(outbuflen < 8) + return 0; + + /* the following line are copy from Unicode 2.0 page 3-13 */ + /* item 1 of Hangul Syllabel Decomposition */ + SIndex = in - SBase; + + /* the following lines are copy from Unicode 2.0 page 3-14 */ + /* item 2 of Hangul Syllabel Decomposition w/ modification */ + LIndex = SIndex / NCount; + VIndex = (SIndex % NCount) / TCount; + TIndex = SIndex % TCount; + + /* + * A Hangul syllable not enumerated in KS X 1001 is represented + * by a sequence of 8 bytes beginning with Hangul-filler + * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three + * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making + * up the syllable. ref. KS X 1001:1998 Annex 3 + */ + *outlen = 8; + out[0] = out[2] = out[4] = out[6] = 0xa4; + out[1] = 0xd4; + out[3] = lMap[LIndex] ; + out[5] = (VIndex + 0xbf); + out[7] = tMap[TIndex]; + + return 1; +} + +int uCheckAndGenJohabHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + /* + See Table 4-45 (page 183) of CJKV Information Processing + for detail explanation of the following table. + */ + /* + static const uint8_t lMap[LCount] = { + 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20 + }; + Therefore lMap[i] == i+2; + */ + + static const uint8_t vMap[VCount] = { + /* no 0,1,2 */ + 3,4,5,6,7, /* no 8,9 */ + 10,11,12,13,14,15, /* no 16,17 */ + 18,19,20,21,22,23, /* no 24,25 */ + 26,27,28,29 + }; + static const uint8_t tMap[TCount] = { + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */ + 19,20,21,22,23,24,25,26,27,28,29 + }; + uint16_t SIndex, LIndex, VIndex, TIndex, ch; + /* the following line are copy from Unicode 2.0 page 3-13 */ + /* item 1 of Hangul Syllabel Decomposition */ + SIndex = in - SBase; + + /* the following lines are copy from Unicode 2.0 page 3-14 */ + /* item 2 of Hangul Syllabel Decomposition w/ modification */ + LIndex = SIndex / NCount; + VIndex = (SIndex % NCount) / TCount; + TIndex = SIndex % TCount; + + *outlen = 2; + ch = 0x8000 | + ((LIndex+2)<<10) | + (vMap[VIndex]<<5)| + tMap[TIndex]; + out[0] = (ch >> 8); + out[1] = ch & 0x00FF; +#if 0 + printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex); +#endif + return 1; + } +} +int uCheckAndGenJohabSymbol( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + /* The following code are based on the Perl code listed under + * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013) + * in the book "CJKV Information Processing" by + * Ken Lunde <lunde@adobe.com> + * + * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab + * my @euc = unpack("C*", $_[0]); + * my ($fe_off, $hi_off, $lo_off) = (0,0,1); + * my @out = (); + * while(($hi, $lo) = splice(@euc, 0, 2)) { + * $hi &= 127; $lo &= 127; + * $fe_off = 21 if $hi == 73; + * $fe_off = 34 if $hi == 126; + * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125); + * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off), + * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128)); + * } + * return pack("C*", @out); + */ + + unsigned char fe_off = 0; + unsigned char hi_off = 0; + unsigned char lo_off = 1; + unsigned char hi = (in >> 8) & 0x7F; + unsigned char lo = in & 0x7F; + if(73 == hi) + fe_off = 21; + if(126 == hi) + fe_off = 34; + if( (hi < 74) || ( hi > 125) ) + { + hi_off = 1; + lo_off = 0; + } + *outlen = 2; + out[0] = ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off; + out[1] = lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) : + 128); +#if 0 + printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in); +#endif + return 1; + } +} +int uCheckAndGen4BytesGB18030( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + out[0] = (in / (10*126*10)) + 0x81; + in %= (10*126*10); + out[1] = (in / (10*126)) + 0x30; + in %= (10*126); + out[2] = (in / (10)) + 0x81; + out[3] = (in % 10) + 0x30; + *outlen = 4; + return 1; +} diff --git a/intl/uconv/util/umap.c b/intl/uconv/util/umap.c new file mode 100644 index 000000000..43a41e591 --- /dev/null +++ b/intl/uconv/util/umap.c @@ -0,0 +1,175 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* #include "PRIntlpriv.h" */ +#include "unicpriv.h" + + +typedef uint16_t (* MapFormatFunc)(uint16_t in,const uTable *uT,const uMapCell *cell); +typedef int (* HitFormateFunc)(uint16_t in,const uMapCell *cell); +typedef void (* FillInfoFormateFunc)(const uTable *uT, const uMapCell *cell, uint32_t* info); + + +int uHitFormate0(uint16_t in,const uMapCell *cell); +int uHitFormate2(uint16_t in,const uMapCell *cell); +uint16_t uMapFormate0(uint16_t in,const uTable *uT,const uMapCell *cell); +uint16_t uMapFormate1(uint16_t in,const uTable *uT,const uMapCell *cell); +uint16_t uMapFormate2(uint16_t in,const uTable *uT,const uMapCell *cell); +void uFillInfoFormate0(const uTable *uT,const uMapCell *cell,uint32_t* aInfo); +void uFillInfoFormate1(const uTable *uT,const uMapCell *cell,uint32_t* aInfo); +void uFillInfoFormate2(const uTable *uT,const uMapCell *cell,uint32_t* aInfo); + + +const uMapCell *uGetMapCell(const uTable *uT, int16_t item); +char uGetFormat(const uTable *uT, int16_t item); + + +/*================================================================================= + +=================================================================================*/ +const MapFormatFunc m_map[uNumFormatTag] = +{ + uMapFormate0, + uMapFormate1, + uMapFormate2, +}; + +/*================================================================================= + +=================================================================================*/ +const FillInfoFormateFunc m_fillinfo[uNumFormatTag] = +{ + uFillInfoFormate0, + uFillInfoFormate1, + uFillInfoFormate2, +}; + +/*================================================================================= + +=================================================================================*/ +const HitFormateFunc m_hit[uNumFormatTag] = +{ + uHitFormate0, + uHitFormate0, + uHitFormate2, +}; + +#define uHit(format,in,cell) (* m_hit[(format)])((in),(cell)) +#define uMap(format,in,uT,cell) (* m_map[(format)])((in),(uT),(cell)) +#define uGetMapCell(uT, item) ((uMapCell *)(((uint16_t *)uT) + (uT)->offsetToMapCellArray + (item)*(UMAPCELL_SIZE/sizeof(uint16_t)))) +#define uGetFormat(uT, item) (((((uint16_t *)uT) + (uT)->offsetToFormatArray)[(item)>> 2 ] >> (((item)% 4 ) << 2)) & 0x0f) + +/*================================================================================= + +=================================================================================*/ +int uMapCode(const uTable *uT, uint16_t in, uint16_t* out) +{ + int done = 0; + uint16_t itemOfList = uT->itemOfList; + uint16_t i; + *out = NOMAPPING; + for(i=0;i<itemOfList;i++) + { + const uMapCell* uCell; + int8_t format = uGetFormat(uT,i); + uCell = uGetMapCell(uT,i); + if(uHit(format, in, uCell)) + { + *out = uMap(format, in, uT,uCell); + done = 1; + break; + } + } + return ( done && (*out != NOMAPPING)); +} + + +/* +member function +*/ +/*================================================================================= + +=================================================================================*/ +int uHitFormate0(uint16_t in,const uMapCell *cell) +{ + return ( (in >= cell->fmt.format0.srcBegin) && + (in <= cell->fmt.format0.srcEnd) ) ; +} +/*================================================================================= + +=================================================================================*/ +int uHitFormate2(uint16_t in,const uMapCell *cell) +{ + return (in == cell->fmt.format2.srcBegin); +} +/*================================================================================= + +=================================================================================*/ +uint16_t uMapFormate0(uint16_t in,const uTable *uT,const uMapCell *cell) +{ + return ((in - cell->fmt.format0.srcBegin) + cell->fmt.format0.destBegin); +} +/*================================================================================= + +=================================================================================*/ +uint16_t uMapFormate1(uint16_t in,const uTable *uT,const uMapCell *cell) +{ + return (*(((uint16_t *)uT) + uT->offsetToMappingTable + + cell->fmt.format1.mappingOffset + in - cell->fmt.format1.srcBegin)); +} +/*================================================================================= + +=================================================================================*/ +uint16_t uMapFormate2(uint16_t in,const uTable *uT,const uMapCell *cell) +{ + return (cell->fmt.format2.destBegin); +} + +#define SET_REPRESENTABLE(info, c) (info)[(c) >> 5] |= (1L << ((c) & 0x1f)) +/*================================================================================= + +=================================================================================*/ +void uFillInfoFormate0(const uTable *uT,const uMapCell *cell,uint32_t* info) +{ + uint16_t begin, end, i; + begin = cell->fmt.format0.srcBegin; + end = cell->fmt.format0.srcEnd; + if( (begin >> 5) == (end >> 5)) /* High 17 bits are the same */ + { + for(i = begin; i <= end; i++) + SET_REPRESENTABLE(info, i); + } + else { + uint32_t b = begin >> 5; + uint32_t e = end >> 5; + info[ b ] |= (0xFFFFFFFFL << ((begin) & 0x1f)); + info[ e ] |= (0xFFFFFFFFL >> (31 - ((end) & 0x1f))); + for(b++ ; b < e ; b++) + info[b] |= 0xFFFFFFFFL; + } +} +/*================================================================================= + +=================================================================================*/ +void uFillInfoFormate1(const uTable *uT,const uMapCell *cell,uint32_t* info) +{ + uint16_t begin, end, i; + uint16_t *base; + begin = cell->fmt.format0.srcBegin; + end = cell->fmt.format0.srcEnd; + base = (((uint16_t *)uT) + uT->offsetToMappingTable + cell->fmt.format1.mappingOffset); + for(i = begin; i <= end; i++) + { + if(0xFFFD != base[i - begin]) /* check every item */ + SET_REPRESENTABLE(info, i); + } +} +/*================================================================================= + +=================================================================================*/ +void uFillInfoFormate2(const uTable *uT,const uMapCell *cell,uint32_t* info) +{ + SET_REPRESENTABLE(info, cell->fmt.format2.srcBegin); +} + diff --git a/intl/uconv/util/umap.h b/intl/uconv/util/umap.h new file mode 100644 index 000000000..24266e7bf --- /dev/null +++ b/intl/uconv/util/umap.h @@ -0,0 +1,53 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef __UMAP__ +#define __UMAP__ + +#define NOMAPPING 0xfffd + +enum { + uFormat0Tag = 0, + uFormat1Tag, + uFormat2Tag, + uNumFormatTag +}; + +typedef struct { + uint16_t srcBegin; /* 2 byte */ + uint16_t srcEnd; /* 2 byte */ + uint16_t destBegin; /* 2 byte */ +} uFormat0; + +typedef struct { + uint16_t srcBegin; /* 2 byte */ + uint16_t srcEnd; /* 2 byte */ + uint16_t mappingOffset; /* 2 byte */ +} uFormat1; + +typedef struct { + uint16_t srcBegin; /* 2 byte */ + uint16_t srcEnd; /* 2 byte -waste */ + uint16_t destBegin; /* 2 byte */ +} uFormat2; + +typedef struct { + union { + uFormat0 format0; + uFormat1 format1; + uFormat2 format2; + } fmt; +} uMapCell; + +#define UMAPCELL_SIZE (3*sizeof(uint16_t)) + +typedef struct { + uint16_t itemOfList; + uint16_t offsetToFormatArray; + uint16_t offsetToMapCellArray; + uint16_t offsetToMappingTable; + uint16_t data[1]; +} uTable; + +#endif diff --git a/intl/uconv/util/unicpriv.h b/intl/uconv/util/unicpriv.h new file mode 100644 index 000000000..b14f10c35 --- /dev/null +++ b/intl/uconv/util/unicpriv.h @@ -0,0 +1,52 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef __UNIPRIV__ +#define __UNIPRIV__ + +#include <stdint.h> +#include "umap.h" +#include "uconvutil.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int uMapCode(const uTable *uT, + uint16_t in, + uint16_t* out); + +int uGenerate(uScanClassID scanClass, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen); + +int uScan(uScanClassID scanClass, + int32_t *state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen); + +int uGenerateShift(uShiftOutTable *shift, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen); + +int uScanShift(uShiftInTable *shift, + int32_t *state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen); + +#ifdef __cplusplus +} +#endif + +#endif /* __UNIPRIV__ */ diff --git a/intl/uconv/util/uscan.c b/intl/uconv/util/uscan.c new file mode 100644 index 000000000..0abdd0c00 --- /dev/null +++ b/intl/uconv/util/uscan.c @@ -0,0 +1,759 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "unicpriv.h" +#define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff ) +#define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2)) +/*================================================================================= + +=================================================================================*/ +typedef int (*uSubScannerFunc) (unsigned char* in, uint16_t* out); +/*================================================================================= + +=================================================================================*/ + +typedef int (*uScannerFunc) ( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); + +int uScan( + uScanClassID scanClass, + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); + +#define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out)) + +int uCheckAndScanAlways1Byte( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScanAlways2Byte( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScanAlways2ByteShiftGR( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScanAlways2ByteGR128( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uScanShift( + uShiftInTable *shift, + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); + +int uCheckAndScan2ByteGRPrefix8F( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScan2ByteGRPrefix8EA2( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScan2ByteGRPrefix8EA3( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScan2ByteGRPrefix8EA4( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScan2ByteGRPrefix8EA5( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScan2ByteGRPrefix8EA6( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScan2ByteGRPrefix8EA7( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCnSAlways8BytesDecomposedHangul( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScanJohabHangul( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); +int uCheckAndScanJohabSymbol( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); + +int uCheckAndScan4BytesGB18030( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ); + +int uScanAlways2Byte( + unsigned char* in, + uint16_t* out + ); +int uScanAlways2ByteShiftGR( + unsigned char* in, + uint16_t* out + ); +int uScanAlways1Byte( + unsigned char* in, + uint16_t* out + ); +int uScanAlways1BytePrefix8E( + unsigned char* in, + uint16_t* out + ); +/*================================================================================= + +=================================================================================*/ +const uScannerFunc m_scanner[uNumOfCharsetType] = +{ + uCheckAndScanAlways1Byte, + uCheckAndScanAlways2Byte, + uCheckAndScanAlways2ByteShiftGR, + uCheckAndScan2ByteGRPrefix8F, + uCheckAndScan2ByteGRPrefix8EA2, + uCheckAndScan2ByteGRPrefix8EA3, + uCheckAndScan2ByteGRPrefix8EA4, + uCheckAndScan2ByteGRPrefix8EA5, + uCheckAndScan2ByteGRPrefix8EA6, + uCheckAndScan2ByteGRPrefix8EA7, + uCnSAlways8BytesDecomposedHangul, + uCheckAndScanJohabHangul, + uCheckAndScanJohabSymbol, + uCheckAndScan4BytesGB18030, + uCheckAndScanAlways2ByteGR128 +}; + +/*================================================================================= + +=================================================================================*/ + +const uSubScannerFunc m_subscanner[uNumOfCharType] = +{ + uScanAlways1Byte, + uScanAlways2Byte, + uScanAlways2ByteShiftGR, + uScanAlways1BytePrefix8E +}; +/*================================================================================= + +=================================================================================*/ +int uScan( + uScanClassID scanClass, + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen); +} +/*================================================================================= + +=================================================================================*/ +int uScanAlways1Byte( + unsigned char* in, + uint16_t* out + ) +{ + *out = (uint16_t) in[0]; + return 1; +} + +/*================================================================================= + +=================================================================================*/ +int uScanAlways2Byte( + unsigned char* in, + uint16_t* out + ) +{ + *out = (uint16_t) (( in[0] << 8) | (in[1])); + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uScanAlways2ByteShiftGR( + unsigned char* in, + uint16_t* out + ) +{ + *out = (uint16_t) ((( in[0] << 8) | (in[1])) & 0x7F7F); + return 1; +} + +/*================================================================================= + +=================================================================================*/ +int uScanAlways1BytePrefix8E( + unsigned char* in, + uint16_t* out + ) +{ + *out = (uint16_t) in[1]; + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScanAlways1Byte( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + /* Don't check inlen. The caller should ensure it is larger than 0 */ + *inscanlen = 1; + *out = (uint16_t) in[0]; + + return 1; +} + +/*================================================================================= + +=================================================================================*/ +int uCheckAndScanAlways2Byte( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + if(inbuflen < 2) + return 0; + else + { + *inscanlen = 2; + *out = ((in[0] << 8) | ( in[1])) ; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScanAlways2ByteShiftGR( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + /* + * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets + * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets. + * Only 2nd byte range needs to be checked because + * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp + */ + if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ + return 0; + else if (! CHK_GR94(in[1])) + { + *inscanlen = 2; + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ + return 1; + } + else + { + *inscanlen = 2; + *out = (((in[0] << 8) | ( in[1])) & 0x7F7F); + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScanAlways2ByteGR128( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + /* + * The first byte should be in [0xa1,0xfe] + * and the second byte in [0x41,0xfe] + * Used by CP949 -> Unicode converter. + * Only 2nd byte range needs to be checked because + * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp + */ + if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ + return 0; + else if (in[1] < 0x41) /* 2nd byte range check */ + { + *inscanlen = 2; + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ + return 1; + } + else + { + *inscanlen = 2; + *out = (in[0] << 8) | in[1]; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uScanShift( + uShiftInTable *shift, + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + int16_t i; + const uShiftInCell* cell = &(shift->shiftcell[0]); + int16_t itemnum = shift->numOfItem; + for(i=0;i<itemnum;i++) + { + if( ( in[0] >= cell[i].shiftin_Min) && + ( in[0] <= cell[i].shiftin_Max)) + { + if(inbuflen < cell[i].reserveLen) + return 0; + else + { + *inscanlen = cell[i].reserveLen; + return (uSubScanner(cell[i].classID,in,out)); + } + } + } + return 0; +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScan2ByteGRPrefix8F( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + if((inbuflen < 3) ||(in[0] != 0x8F)) + return 0; + else if (! CHK_GR94(in[1])) /* 2nd byte range check */ + { + *inscanlen = 2; + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ + return 1; + } + else if (! CHK_GR94(in[2])) /* 3rd byte range check */ + { + *inscanlen = 3; + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ + return 1; + } + else + { + *inscanlen = 3; + *out = (((in[1] << 8) | ( in[2])) & 0x7F7F); + return 1; + } +} +/*================================================================================= + +=================================================================================*/ + +/* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX() + * where X is 2,3,4,5,6,7 + */ +#define CNS_8EAX_4BYTE(PREFIX) \ + if((inbuflen < 4) || (in[0] != 0x8E)) \ + return 0; \ + else if((in[1] != (PREFIX))) \ + { \ + *inscanlen = 2; \ + *out = 0xFF; \ + return 1; \ + } \ + else if(! CHK_GR94(in[2])) \ + { \ + *inscanlen = 3; \ + *out = 0xFF; \ + return 1; \ + } \ + else if(! CHK_GR94(in[3])) \ + { \ + *inscanlen = 4; \ + *out = 0xFF; \ + return 1; \ + } \ + else \ + { \ + *inscanlen = 4; \ + *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \ + return 1; \ + } + +int uCheckAndScan2ByteGRPrefix8EA2( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + CNS_8EAX_4BYTE(0xA2) +} + +/*================================================================================= + +=================================================================================*/ +int uCheckAndScan2ByteGRPrefix8EA3( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + CNS_8EAX_4BYTE(0xA3) +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScan2ByteGRPrefix8EA4( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + CNS_8EAX_4BYTE(0xA4) +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScan2ByteGRPrefix8EA5( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + CNS_8EAX_4BYTE(0xA5) +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScan2ByteGRPrefix8EA6( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + CNS_8EAX_4BYTE(0xA6) +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndScan2ByteGRPrefix8EA7( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + CNS_8EAX_4BYTE(0xA7) +} +/*================================================================================= + +=================================================================================*/ +#define SBase 0xAC00 +#define SCount 11172 +#define LCount 19 +#define VCount 21 +#define TCount 28 +#define NCount (VCount * TCount) + +int uCnSAlways8BytesDecomposedHangul( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + + uint16_t LIndex, VIndex, TIndex; + /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */ + if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) || + (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6])) + return 0; + + /* Compute LIndex */ + if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */ + return 0; + } + else { + static const uint8_t lMap[] = { + /* A1 A2 A3 A4 A5 A6 A7 */ + 0, 1,0xff, 2,0xff,0xff, 3, + /* A8 A9 AA AB AC AD AE AF */ + 4, 5,0xff,0xff,0xff,0xff,0xff,0xff, + /* B0 B1 B2 B3 B4 B5 B6 B7 */ + 0xff, 6, 7, 8,0xff, 9, 10, 11, + /* B8 B9 BA BB BC BD BE */ + 12, 13, 14, 15, 16, 17, 18 + }; + + LIndex = lMap[in[3] - 0xa1]; + if(0xff == (0xff & LIndex)) + return 0; + } + + /* Compute VIndex */ + if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */ + return 0; + } + else { + VIndex = in[5] - 0xbf; + } + + /* Compute TIndex */ + if(0xd4 == in[7]) + { + TIndex = 0; + } + else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */ + return 0; + } + else { + static const uint8_t tMap[] = { + /* A1 A2 A3 A4 A5 A6 A7 */ + 1, 2, 3, 4, 5, 6, 7, + /* A8 A9 AA AB AC AD AE AF */ + 0xff, 8, 9, 10, 11, 12, 13, 14, + /* B0 B1 B2 B3 B4 B5 B6 B7 */ + 15, 16, 17,0xff, 18, 19, 20, 21, + /* B8 B9 BA BB BC BD BE */ + 22,0xff, 23, 24, 25, 26, 27 + }; + TIndex = tMap[in[7] - 0xa1]; + if(0xff == (0xff & TIndex)) + return 0; + } + + *inscanlen = 8; + /* the following line is from Unicode 2.0 page 3-13 item 5 */ + *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; + + return 1; +} +/*================================================================================= + +=================================================================================*/ + +int uCheckAndScanJohabHangul( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ +/* since we don't have code to convert Johab to Unicode right now * + * make this part of code #if 0 to save space until we fully test it */ + if(inbuflen < 2) + return 0; + else { + /* + * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183 + * of "CJKV Information Processing" for details + */ + static const uint8_t lMap[32]={ /* totaly 19 */ + 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */ + 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */ + 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */ + }; + static const uint8_t vMap[32]={ /* totaly 21 */ + 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */ + 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */ + 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */ + 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */ + }; + static const uint8_t tMap[32]={ /* totaly 29 */ + 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */ + 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */ + 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */ + 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */ + }; + uint16_t ch = (in[0] << 8) | in[1]; + uint16_t LIndex, VIndex, TIndex; + if(0 == (0x8000 & ch)) + return 0; + LIndex=lMap[(ch>>10)& 0x1F]; + VIndex=vMap[(ch>>5) & 0x1F]; + TIndex=tMap[(ch>>0) & 0x1F]; + if((0xff==(LIndex)) || + (0xff==(VIndex)) || + (0xff==(TIndex))) + return 0; + /* the following line is from Unicode 2.0 page 3-13 item 5 */ + *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; + *inscanlen = 2; + return 1; + } +} +int uCheckAndScanJohabSymbol( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + if(inbuflen < 2) + return 0; + else { + /* + * The following code are based on the Perl code lised under + * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of + * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com> + * + * sub johab2ks ($) { # Convert Johab to ISO-2022-KR + * my @johab = unpack("C*", $_[0]); + * my ($offset, $d8_off) = (0,0); + * my @out = (); + * while(($hi, $lo) = splice($johab, 0, 2)) { + * $offset = 1 if ($hi > 223 and $hi < 250); + * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); + * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) - + * ($lo < 161 ? 1 : 0) + $offset) + $d8_off), + * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 )); + * } + * return pack ("C*", @out); + * } + * additional comments from Ken Lunde + * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); + * has three possible return values: + * 0 if $hi is not equal to 216 + * 94 if $hi is euqal to 216 and if $lo is greater than 160 + * 42 if $hi is euqal to 216 and if $lo is not greater than 160 + */ + unsigned char hi = in[0]; + unsigned char lo = in[1]; + uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0; + uint16_t d8_off = 0; + if(216 == hi) { + if( lo > 160) + d8_off = 94; + else + d8_off = 42; + } + + *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) - + (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) | + (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : + 128)); + *inscanlen = 2; + return 1; + } +} +int uCheckAndScan4BytesGB18030( + int32_t* state, + unsigned char *in, + uint16_t *out, + uint32_t inbuflen, + uint32_t* inscanlen + ) +{ + uint32_t data; + if(inbuflen < 4) + return 0; + + if((in[0] < 0x81 ) || (0xfe < in[0])) + return 0; + if((in[1] < 0x30 ) || (0x39 < in[1])) + return 0; + if((in[2] < 0x81 ) || (0xfe < in[2])) + return 0; + if((in[3] < 0x30 ) || (0x39 < in[3])) + return 0; + + data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + + (in[2] - 0x81)) * 10 ) + (in[3] - 0x30); + + *inscanlen = 4; + *out = (data < 0x00010000) ? data : 0xFFFD; + return 1; +} |