summaryrefslogtreecommitdiffstats
path: root/intl/uconv/util
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/util')
-rw-r--r--intl/uconv/util/nsUCConstructors.cpp134
-rw-r--r--intl/uconv/util/nsUCConstructors.h70
-rw-r--r--intl/uconv/util/nsUCSupport.cpp621
-rw-r--r--intl/uconv/util/nsUnicodeDecodeHelper.cpp234
-rw-r--r--intl/uconv/util/nsUnicodeDecodeHelper.h55
-rw-r--r--intl/uconv/util/nsUnicodeEncodeHelper.cpp121
-rw-r--r--intl/uconv/util/nsUnicodeEncodeHelper.h42
-rw-r--r--intl/uconv/util/ugen.c712
-rw-r--r--intl/uconv/util/umap.c175
-rw-r--r--intl/uconv/util/umap.h53
-rw-r--r--intl/uconv/util/unicpriv.h52
-rw-r--r--intl/uconv/util/uscan.c759
12 files changed, 3028 insertions, 0 deletions
diff --git a/intl/uconv/util/nsUCConstructors.cpp b/intl/uconv/util/nsUCConstructors.cpp
new file mode 100644
index 000000000..e8ab5f89d
--- /dev/null
+++ b/intl/uconv/util/nsUCConstructors.cpp
@@ -0,0 +1,134 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUCSupport.h"
+#include "nsUCConstructors.h"
+
+template<class T>
+inline nsresult StabilizedQueryInterface(T* aNewObject,
+ REFNSIID aIID,
+ void **aResult)
+{
+ NS_ADDREF(aNewObject);
+ nsresult rv = aNewObject->QueryInterface(aIID, aResult);
+ NS_RELEASE(aNewObject);
+ return rv;
+}
+
+nsresult
+CreateMultiTableDecoder(int32_t aTableCount, const uRange * aRangeArray,
+ uScanClassID * aScanClassArray,
+ uMappingTable ** aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult)
+{
+
+ if (aOuter)
+ return NS_ERROR_NO_AGGREGATION;
+
+ nsMultiTableDecoderSupport* decoder =
+ new nsMultiTableDecoderSupport(aTableCount, aRangeArray,
+ aScanClassArray, aMappingTable,
+ aMaxLengthFactor);
+ if (!decoder)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ return StabilizedQueryInterface(decoder, aIID, aResult);
+}
+
+nsresult
+CreateMultiTableEncoder(int32_t aTableCount,
+ uScanClassID * aScanClassArray,
+ uShiftOutTable ** aShiftOutTable,
+ uMappingTable ** aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult)
+{
+
+ if (aOuter)
+ return NS_ERROR_NO_AGGREGATION;
+
+ nsMultiTableEncoderSupport* encoder =
+ new nsMultiTableEncoderSupport(aTableCount,
+ aScanClassArray,
+ aShiftOutTable,
+ aMappingTable,
+ aMaxLengthFactor);
+ if (!encoder)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ return StabilizedQueryInterface(encoder, aIID, aResult);
+}
+
+nsresult
+CreateMultiTableEncoder(int32_t aTableCount,
+ uScanClassID * aScanClassArray,
+ uMappingTable ** aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult)
+{
+ return CreateMultiTableEncoder(aTableCount, aScanClassArray,
+ nullptr,
+ aMappingTable, aMaxLengthFactor,
+ aOuter, aIID, aResult);
+}
+
+nsresult
+CreateTableEncoder(uScanClassID aScanClass,
+ uShiftOutTable * aShiftOutTable,
+ uMappingTable * aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult)
+{
+ if (aOuter)
+ return NS_ERROR_NO_AGGREGATION;
+
+ nsTableEncoderSupport* encoder =
+ new nsTableEncoderSupport(aScanClass,
+ aShiftOutTable, aMappingTable,
+ aMaxLengthFactor);
+ if (!encoder)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ return StabilizedQueryInterface(encoder, aIID, aResult);
+}
+
+nsresult
+CreateTableEncoder(uScanClassID aScanClass,
+ uMappingTable * aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult)
+{
+ return CreateTableEncoder(aScanClass, nullptr,
+ aMappingTable, aMaxLengthFactor,
+ aOuter, aIID, aResult);
+}
+
+nsresult
+CreateOneByteDecoder(uMappingTable * aMappingTable,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult)
+{
+ if (aOuter) return NS_ERROR_NO_AGGREGATION;
+
+ nsOneByteDecoderSupport* decoder =
+ new nsOneByteDecoderSupport(aMappingTable);
+
+ if (!decoder)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ return StabilizedQueryInterface(decoder, aIID, aResult);
+}
diff --git a/intl/uconv/util/nsUCConstructors.h b/intl/uconv/util/nsUCConstructors.h
new file mode 100644
index 000000000..360bf0305
--- /dev/null
+++ b/intl/uconv/util/nsUCConstructors.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __nsUCConstructors_h
+#define __nsUCConstructors_h
+
+#include <stdint.h>
+#include "nscore.h"
+#include "nsID.h"
+#include "uconvutil.h"
+
+class nsISupports;
+
+// all the useful constructors
+nsresult
+CreateMultiTableDecoder(int32_t aTableCount,
+ const uRange * aRangeArray,
+ uScanClassID * aScanClassArray,
+ uMappingTable ** aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult);
+
+nsresult
+CreateMultiTableEncoder(int32_t aTableCount,
+ uScanClassID * aScanClassArray,
+ uShiftOutTable ** aShiftOutTable,
+ uMappingTable ** aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult);
+
+nsresult
+CreateTableEncoder(uScanClassID aScanClass,
+ uShiftOutTable * aShiftOutTable,
+ uMappingTable * aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult);
+
+nsresult
+CreateMultiTableEncoder(int32_t aTableCount,
+ uScanClassID * aScanClassArray,
+ uMappingTable ** aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult);
+
+nsresult
+CreateTableEncoder(uScanClassID aScanClass,
+ uMappingTable * aMappingTable,
+ uint32_t aMaxLengthFactor,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult);
+
+nsresult
+CreateOneByteDecoder(uMappingTable * aMappingTable,
+ nsISupports* aOuter,
+ REFNSIID aIID,
+ void** aResult);
+
+
+#endif
diff --git a/intl/uconv/util/nsUCSupport.cpp b/intl/uconv/util/nsUCSupport.cpp
new file mode 100644
index 000000000..d6893f442
--- /dev/null
+++ b/intl/uconv/util/nsUCSupport.cpp
@@ -0,0 +1,621 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUCSupport.h"
+#include "nsUnicodeDecodeHelper.h"
+#include "nsUnicodeEncodeHelper.h"
+#include "mozilla/CheckedInt.h"
+#include <algorithm>
+
+#define DEFAULT_BUFFER_CAPACITY 16
+
+// XXX review the buffer growth limitation code
+
+//----------------------------------------------------------------------
+// Class nsBasicDecoderSupport [implementation]
+
+nsBasicDecoderSupport::nsBasicDecoderSupport()
+ : mErrBehavior(kOnError_Recover)
+{
+}
+
+nsBasicDecoderSupport::~nsBasicDecoderSupport()
+{
+}
+
+//----------------------------------------------------------------------
+// Interface nsISupports [implementation]
+
+#ifdef DEBUG
+NS_IMPL_ISUPPORTS(nsBasicDecoderSupport,
+ nsIUnicodeDecoder,
+ nsIBasicDecoder)
+#else
+NS_IMPL_ISUPPORTS(nsBasicDecoderSupport, nsIUnicodeDecoder)
+#endif
+
+//----------------------------------------------------------------------
+// Interface nsIUnicodeDecoder [implementation]
+
+void
+nsBasicDecoderSupport::SetInputErrorBehavior(int32_t aBehavior)
+{
+ MOZ_ASSERT(aBehavior == kOnError_Recover || aBehavior == kOnError_Signal,
+ "Unknown behavior for SetInputErrorBehavior");
+ mErrBehavior = aBehavior;
+}
+
+char16_t
+nsBasicDecoderSupport::GetCharacterForUnMapped()
+{
+ return char16_t(0xfffd); // Unicode REPLACEMENT CHARACTER
+}
+
+//----------------------------------------------------------------------
+// Class nsBufferDecoderSupport [implementation]
+
+nsBufferDecoderSupport::nsBufferDecoderSupport(uint32_t aMaxLengthFactor)
+ : nsBasicDecoderSupport(),
+ mMaxLengthFactor(aMaxLengthFactor)
+{
+ mBufferCapacity = DEFAULT_BUFFER_CAPACITY;
+ mBuffer = new char[mBufferCapacity];
+
+ Reset();
+}
+
+nsBufferDecoderSupport::~nsBufferDecoderSupport()
+{
+ delete [] mBuffer;
+}
+
+void nsBufferDecoderSupport::FillBuffer(const char ** aSrc, int32_t aSrcLength)
+{
+ int32_t bcr = std::min(mBufferCapacity - mBufferLength, aSrcLength);
+ memcpy(mBuffer + mBufferLength, *aSrc, bcr);
+ mBufferLength += bcr;
+ (*aSrc) += bcr;
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsBasicDecoderSupport class [implementation]
+
+NS_IMETHODIMP nsBufferDecoderSupport::Convert(const char* aSrc,
+ int32_t* aSrcLength,
+ char16_t* aDest,
+ int32_t* aDestLength)
+{
+ // we do all operations using pointers internally
+ const char* src = aSrc;
+ const char* srcEnd = aSrc + *aSrcLength;
+ char16_t* dest = aDest;
+ char16_t* destEnd = aDest + *aDestLength;
+
+ int32_t bcr, bcw; // byte counts for read & write;
+ nsresult res = NS_OK;
+
+ // do we have some residual data from the last conversion?
+ if (mBufferLength > 0) {
+ if (dest == destEnd) {
+ res = NS_OK_UDEC_MOREOUTPUT;
+ } else {
+ for (;;) {
+ // we need new data to add to the buffer
+ if (src == srcEnd) {
+ res = NS_OK_UDEC_MOREINPUT;
+ break;
+ }
+
+ // fill that buffer
+ int32_t buffLen = mBufferLength; // initial buffer length
+ FillBuffer(&src, srcEnd - src);
+
+ // convert that buffer
+ bcr = mBufferLength;
+ bcw = destEnd - dest;
+ res = ConvertNoBuff(mBuffer, &bcr, dest, &bcw);
+ dest += bcw;
+
+ // Detect invalid input character
+ if (res == NS_ERROR_ILLEGAL_INPUT && mErrBehavior == kOnError_Signal) {
+ break;
+ }
+
+ if ((res == NS_OK_UDEC_MOREINPUT) && (bcw == 0)) {
+ res = NS_ERROR_UNEXPECTED;
+#if defined(DEBUG_yokoyama) || defined(DEBUG_ftang)
+ NS_ERROR("This should not happen. Internal buffer may be corrupted.");
+#endif
+ break;
+ } else {
+ if (bcr < buffLen) {
+ // we didn't convert that residual data - unfill the buffer
+ src -= mBufferLength - buffLen;
+ mBufferLength = buffLen;
+#if defined(DEBUG_yokoyama) || defined(DEBUG_ftang)
+ NS_ERROR("This should not happen. Internal buffer may be corrupted.");
+#endif
+ } else {
+ // the buffer and some extra data was converted - unget the rest
+ src -= mBufferLength - bcr;
+ mBufferLength = 0;
+ res = NS_OK;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ if (res == NS_OK) {
+ bcr = srcEnd - src;
+ bcw = destEnd - dest;
+ res = ConvertNoBuff(src, &bcr, dest, &bcw);
+ src += bcr;
+ dest += bcw;
+
+ // if we have partial input, store it in our internal buffer.
+ if (res == NS_OK_UDEC_MOREINPUT) {
+ bcr = srcEnd - src;
+ // make sure buffer is large enough
+ if (bcr > mBufferCapacity) {
+ // somehow we got into an error state and the buffer is growing out
+ // of control
+ res = NS_ERROR_UNEXPECTED;
+ } else {
+ FillBuffer(&src, bcr);
+ }
+ }
+ }
+
+ *aSrcLength -= srcEnd - src;
+ *aDestLength -= destEnd - dest;
+ return res;
+}
+
+NS_IMETHODIMP nsBufferDecoderSupport::Reset()
+{
+ mBufferLength = 0;
+ return NS_OK;
+}
+
+NS_IMETHODIMP nsBufferDecoderSupport::GetMaxLength(const char* aSrc,
+ int32_t aSrcLength,
+ int32_t* aDestLength)
+{
+ NS_ASSERTION(mMaxLengthFactor != 0, "Must override GetMaxLength!");
+
+ mozilla::CheckedInt32 length = aSrcLength;
+ length *= mMaxLengthFactor;
+
+ if (!length.isValid()) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ *aDestLength = length.value();
+ return NS_OK;
+}
+
+//----------------------------------------------------------------------
+// Class nsMultiTableDecoderSupport [implementation]
+
+nsMultiTableDecoderSupport::nsMultiTableDecoderSupport(
+ int32_t aTableCount,
+ const uRange* aRangeArray,
+ uScanClassID* aScanClassArray,
+ uMappingTable** aMappingTable,
+ uint32_t aMaxLengthFactor)
+: nsBufferDecoderSupport(aMaxLengthFactor)
+{
+ mTableCount = aTableCount;
+ mRangeArray = aRangeArray;
+ mScanClassArray = aScanClassArray;
+ mMappingTable = aMappingTable;
+}
+
+nsMultiTableDecoderSupport::~nsMultiTableDecoderSupport()
+{
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsBufferDecoderSupport class [implementation]
+
+NS_IMETHODIMP nsMultiTableDecoderSupport::ConvertNoBuff(const char* aSrc,
+ int32_t* aSrcLength,
+ char16_t* aDest,
+ int32_t* aDestLength)
+{
+ return nsUnicodeDecodeHelper::ConvertByMultiTable(aSrc, aSrcLength,
+ aDest, aDestLength,
+ mTableCount, mRangeArray,
+ mScanClassArray,
+ mMappingTable,
+ mErrBehavior == kOnError_Signal);
+}
+
+//----------------------------------------------------------------------
+// Class nsOneByteDecoderSupport [implementation]
+
+nsOneByteDecoderSupport::nsOneByteDecoderSupport(
+ uMappingTable* aMappingTable)
+ : nsBasicDecoderSupport()
+ , mMappingTable(aMappingTable)
+ , mFastTableCreated(false)
+ , mFastTableMutex("nsOneByteDecoderSupport mFastTableMutex")
+{
+}
+
+nsOneByteDecoderSupport::~nsOneByteDecoderSupport()
+{
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsBasicDecoderSupport class [implementation]
+
+NS_IMETHODIMP nsOneByteDecoderSupport::Convert(const char* aSrc,
+ int32_t* aSrcLength,
+ char16_t* aDest,
+ int32_t* aDestLength)
+{
+ if (!mFastTableCreated) {
+ // Probably better to make this non-lazy and get rid of the mutex
+ mozilla::MutexAutoLock autoLock(mFastTableMutex);
+ if (!mFastTableCreated) {
+ nsresult res = nsUnicodeDecodeHelper::CreateFastTable(
+ mMappingTable, mFastTable, ONE_BYTE_TABLE_SIZE);
+ if (NS_FAILED(res)) return res;
+ mFastTableCreated = true;
+ }
+ }
+
+ return nsUnicodeDecodeHelper::ConvertByFastTable(aSrc, aSrcLength,
+ aDest, aDestLength,
+ mFastTable,
+ ONE_BYTE_TABLE_SIZE,
+ mErrBehavior == kOnError_Signal);
+}
+
+NS_IMETHODIMP nsOneByteDecoderSupport::GetMaxLength(const char* aSrc,
+ int32_t aSrcLength,
+ int32_t* aDestLength)
+{
+ // single byte to Unicode converter
+ *aDestLength = aSrcLength;
+ return NS_OK_UDEC_EXACTLENGTH;
+}
+
+NS_IMETHODIMP nsOneByteDecoderSupport::Reset()
+{
+ // nothing to reset, no internal state in this case
+ return NS_OK;
+}
+
+//----------------------------------------------------------------------
+// Class nsBasicEncoder [implementation]
+nsBasicEncoder::nsBasicEncoder()
+{
+}
+
+nsBasicEncoder::~nsBasicEncoder()
+{
+}
+
+//----------------------------------------------------------------------
+// Interface nsISupports [implementation]
+
+NS_IMPL_ADDREF(nsBasicEncoder)
+NS_IMPL_RELEASE(nsBasicEncoder)
+#ifdef DEBUG
+NS_IMPL_QUERY_INTERFACE(nsBasicEncoder,
+ nsIUnicodeEncoder,
+ nsIBasicEncoder)
+#else
+NS_IMPL_QUERY_INTERFACE(nsBasicEncoder,
+ nsIUnicodeEncoder)
+#endif
+//----------------------------------------------------------------------
+// Class nsEncoderSupport [implementation]
+
+nsEncoderSupport::nsEncoderSupport(uint32_t aMaxLengthFactor) :
+ mMaxLengthFactor(aMaxLengthFactor)
+{
+ mBufferCapacity = DEFAULT_BUFFER_CAPACITY;
+ mBuffer = new char[mBufferCapacity];
+
+ mErrBehavior = kOnError_Signal;
+ mErrChar = 0;
+
+ Reset();
+}
+
+nsEncoderSupport::~nsEncoderSupport()
+{
+ delete [] mBuffer;
+}
+
+NS_IMETHODIMP nsEncoderSupport::ConvertNoBuff(const char16_t* aSrc,
+ int32_t* aSrcLength,
+ char* aDest,
+ int32_t* aDestLength)
+{
+ // we do all operations using pointers internally
+ const char16_t* src = aSrc;
+ const char16_t* srcEnd = aSrc + *aSrcLength;
+ char* dest = aDest;
+ char* destEnd = aDest + *aDestLength;
+
+ int32_t bcr, bcw; // byte counts for read & write;
+ nsresult res;
+
+ for (;;) {
+ bcr = srcEnd - src;
+ bcw = destEnd - dest;
+ res = ConvertNoBuffNoErr(src, &bcr, dest, &bcw);
+ src += bcr;
+ dest += bcw;
+
+ if (res == NS_ERROR_UENC_NOMAPPING) {
+ if (mErrBehavior == kOnError_Replace) {
+ const char16_t buff[] = {mErrChar};
+ bcr = 1;
+ bcw = destEnd - dest;
+ src--; // back the input: maybe the guy won't consume consume anything.
+ res = ConvertNoBuffNoErr(buff, &bcr, dest, &bcw);
+ src += bcr;
+ dest += bcw;
+ if (res != NS_OK) break;
+ } else if (mErrBehavior == kOnError_CallBack) {
+ bcw = destEnd - dest;
+ src--;
+ res = mErrEncoder->Convert(*src, dest, &bcw);
+ dest += bcw;
+ // if enought output space then the last char was used
+ if (res != NS_OK_UENC_MOREOUTPUT) src++;
+ if (res != NS_OK) break;
+ } else break;
+ }
+ else break;
+ }
+
+ *aSrcLength -= srcEnd - src;
+ *aDestLength -= destEnd - dest;
+ return res;
+}
+
+NS_IMETHODIMP nsEncoderSupport::FinishNoBuff(char* aDest,
+ int32_t* aDestLength)
+{
+ *aDestLength = 0;
+ return NS_OK;
+}
+
+nsresult nsEncoderSupport::FlushBuffer(char** aDest, const char* aDestEnd)
+{
+ int32_t bcr, bcw; // byte counts for read & write;
+ nsresult res = NS_OK;
+ char* dest = *aDest;
+
+ if (mBufferStart < mBufferEnd) {
+ bcr = mBufferEnd - mBufferStart;
+ bcw = aDestEnd - dest;
+ if (bcw < bcr) bcr = bcw;
+ memcpy(dest, mBufferStart, bcr);
+ dest += bcr;
+ mBufferStart += bcr;
+
+ if (mBufferStart < mBufferEnd) res = NS_OK_UENC_MOREOUTPUT;
+ }
+
+ *aDest = dest;
+ return res;
+}
+
+
+//----------------------------------------------------------------------
+// Interface nsIUnicodeEncoder [implementation]
+
+NS_IMETHODIMP nsEncoderSupport::Convert(const char16_t* aSrc,
+ int32_t* aSrcLength,
+ char* aDest,
+ int32_t* aDestLength)
+{
+ // we do all operations using pointers internally
+ const char16_t* src = aSrc;
+ const char16_t* srcEnd = aSrc + *aSrcLength;
+ char* dest = aDest;
+ char* destEnd = aDest + *aDestLength;
+
+ int32_t bcr, bcw; // byte counts for read & write;
+ nsresult res;
+
+ res = FlushBuffer(&dest, destEnd);
+ if (res == NS_OK_UENC_MOREOUTPUT) goto final;
+
+ bcr = srcEnd - src;
+ bcw = destEnd - dest;
+ res = ConvertNoBuff(src, &bcr, dest, &bcw);
+ src += bcr;
+ dest += bcw;
+ if ((res == NS_OK_UENC_MOREOUTPUT) && (dest < destEnd)) {
+ // convert exactly one character into the internal buffer
+ // at this point, there should be at least a char in the input
+ for (;;) {
+ bcr = 1;
+ bcw = mBufferCapacity;
+ res = ConvertNoBuff(src, &bcr, mBuffer, &bcw);
+
+ if (res == NS_OK_UENC_MOREOUTPUT) {
+ delete [] mBuffer;
+ mBufferCapacity *= 2;
+ mBuffer = new char [mBufferCapacity];
+ } else {
+ src += bcr;
+ mBufferStart = mBufferEnd = mBuffer;
+ mBufferEnd += bcw;
+ break;
+ }
+ }
+
+ res = FlushBuffer(&dest, destEnd);
+ }
+
+final:
+ *aSrcLength -= srcEnd - src;
+ *aDestLength -= destEnd - dest;
+ return res;
+}
+
+NS_IMETHODIMP nsEncoderSupport::Finish(char* aDest, int32_t* aDestLength)
+{
+ // we do all operations using pointers internally
+ char* dest = aDest;
+ char* destEnd = aDest + *aDestLength;
+
+ int32_t bcw; // byte count for write;
+ nsresult res;
+
+ res = FlushBuffer(&dest, destEnd);
+ if (res == NS_OK_UENC_MOREOUTPUT) goto final;
+
+ // do the finish into the internal buffer.
+ for (;;) {
+ bcw = mBufferCapacity;
+ res = FinishNoBuff(mBuffer, &bcw);
+
+ if (res == NS_OK_UENC_MOREOUTPUT) {
+ delete [] mBuffer;
+ mBufferCapacity *= 2;
+ mBuffer = new char [mBufferCapacity];
+ } else {
+ mBufferStart = mBufferEnd = mBuffer;
+ mBufferEnd += bcw;
+ break;
+ }
+ }
+
+ res = FlushBuffer(&dest, destEnd);
+
+final:
+ *aDestLength -= destEnd - dest;
+ return res;
+}
+
+NS_IMETHODIMP nsEncoderSupport::Reset()
+{
+ mBufferStart = mBufferEnd = mBuffer;
+ return NS_OK;
+}
+
+NS_IMETHODIMP nsEncoderSupport::SetOutputErrorBehavior(
+ int32_t aBehavior,
+ nsIUnicharEncoder* aEncoder,
+ char16_t aChar)
+{
+ if (aBehavior == kOnError_CallBack && !aEncoder)
+ return NS_ERROR_NULL_POINTER;
+
+ mErrEncoder = aEncoder;
+ mErrBehavior = aBehavior;
+ mErrChar = aChar;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsEncoderSupport::GetMaxLength(const char16_t* aSrc,
+ int32_t aSrcLength,
+ int32_t* aDestLength)
+{
+ mozilla::CheckedInt32 length = aSrcLength;
+ length *= mMaxLengthFactor;
+
+ if (!length.isValid()) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ *aDestLength = length.value();
+ return NS_OK;
+}
+
+
+//----------------------------------------------------------------------
+// Class nsTableEncoderSupport [implementation]
+
+nsTableEncoderSupport::nsTableEncoderSupport(uScanClassID aScanClass,
+ uShiftOutTable* aShiftOutTable,
+ uMappingTable* aMappingTable,
+ uint32_t aMaxLengthFactor)
+: nsEncoderSupport(aMaxLengthFactor)
+{
+ mScanClass = aScanClass;
+ mShiftOutTable = aShiftOutTable,
+ mMappingTable = aMappingTable;
+}
+
+nsTableEncoderSupport::nsTableEncoderSupport(uScanClassID aScanClass,
+ uMappingTable* aMappingTable,
+ uint32_t aMaxLengthFactor)
+: nsEncoderSupport(aMaxLengthFactor)
+{
+ mScanClass = aScanClass;
+ mShiftOutTable = nullptr;
+ mMappingTable = aMappingTable;
+}
+
+nsTableEncoderSupport::~nsTableEncoderSupport()
+{
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsEncoderSupport class [implementation]
+
+NS_IMETHODIMP nsTableEncoderSupport::ConvertNoBuffNoErr(
+ const char16_t* aSrc,
+ int32_t* aSrcLength,
+ char* aDest,
+ int32_t* aDestLength)
+{
+ return nsUnicodeEncodeHelper::ConvertByTable(aSrc, aSrcLength,
+ aDest, aDestLength,
+ mScanClass,
+ mShiftOutTable, mMappingTable);
+}
+
+//----------------------------------------------------------------------
+// Class nsMultiTableEncoderSupport [implementation]
+
+nsMultiTableEncoderSupport::nsMultiTableEncoderSupport(
+ int32_t aTableCount,
+ uScanClassID* aScanClassArray,
+ uShiftOutTable** aShiftOutTable,
+ uMappingTable** aMappingTable,
+ uint32_t aMaxLengthFactor)
+: nsEncoderSupport(aMaxLengthFactor)
+{
+ mTableCount = aTableCount;
+ mScanClassArray = aScanClassArray;
+ mShiftOutTable = aShiftOutTable;
+ mMappingTable = aMappingTable;
+}
+
+nsMultiTableEncoderSupport::~nsMultiTableEncoderSupport()
+{
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsEncoderSupport class [implementation]
+
+NS_IMETHODIMP nsMultiTableEncoderSupport::ConvertNoBuffNoErr(
+ const char16_t* aSrc,
+ int32_t* aSrcLength,
+ char* aDest,
+ int32_t* aDestLength)
+{
+ return nsUnicodeEncodeHelper::ConvertByMultiTable(aSrc, aSrcLength,
+ aDest, aDestLength,
+ mTableCount,
+ mScanClassArray,
+ mShiftOutTable,
+ mMappingTable);
+}
diff --git a/intl/uconv/util/nsUnicodeDecodeHelper.cpp b/intl/uconv/util/nsUnicodeDecodeHelper.cpp
new file mode 100644
index 000000000..9d3491d86
--- /dev/null
+++ b/intl/uconv/util/nsUnicodeDecodeHelper.cpp
@@ -0,0 +1,234 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "unicpriv.h"
+#include "nsUnicodeDecodeHelper.h"
+#include "mozilla/UniquePtr.h"
+
+//----------------------------------------------------------------------
+// Class nsUnicodeDecodeHelper [implementation]
+nsresult nsUnicodeDecodeHelper::ConvertByTable(
+ const char * aSrc,
+ int32_t * aSrcLength,
+ char16_t * aDest,
+ int32_t * aDestLength,
+ uScanClassID aScanClass,
+ uShiftInTable * aShiftInTable,
+ uMappingTable * aMappingTable,
+ bool aErrorSignal)
+{
+ const char * src = aSrc;
+ int32_t srcLen = *aSrcLength;
+ char16_t * dest = aDest;
+ char16_t * destEnd = aDest + *aDestLength;
+
+ char16_t med;
+ int32_t bcr; // byte count for read
+ nsresult res = NS_OK;
+
+ while ((srcLen > 0) && (dest < destEnd)) {
+ bool charFound;
+ if (aScanClass == uMultibytesCharset) {
+ NS_ASSERTION(aShiftInTable, "shift table missing");
+ charFound = uScanShift(aShiftInTable, nullptr, (uint8_t *)src,
+ reinterpret_cast<uint16_t*>(&med), srcLen,
+ (uint32_t *)&bcr);
+ } else {
+ charFound = uScan(aScanClass, nullptr, (uint8_t *)src,
+ reinterpret_cast<uint16_t*>(&med),
+ srcLen, (uint32_t *)&bcr);
+ }
+ if (!charFound) {
+ res = NS_OK_UDEC_MOREINPUT;
+ break;
+ }
+
+ if (!uMapCode((uTable*) aMappingTable, static_cast<uint16_t>(med), reinterpret_cast<uint16_t*>(dest))) {
+ if (med < 0x20) {
+ // somehow some table miss the 0x00 - 0x20 part
+ *dest = med;
+ } else {
+ if (aErrorSignal) {
+ res = NS_ERROR_ILLEGAL_INPUT;
+ break;
+ }
+ // Unicode replacement value for unmappable chars
+ *dest = 0xfffd;
+ }
+ }
+
+ src += bcr;
+ srcLen -= bcr;
+ dest++;
+ }
+
+ if ((srcLen > 0) && (res == NS_OK)) res = NS_OK_UDEC_MOREOUTPUT;
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(
+ const char * aSrc,
+ int32_t * aSrcLength,
+ char16_t * aDest,
+ int32_t * aDestLength,
+ int32_t aTableCount,
+ const uRange * aRangeArray,
+ uScanClassID * aScanClassArray,
+ uMappingTable ** aMappingTable,
+ bool aErrorSignal)
+{
+ uint8_t * src = (uint8_t *)aSrc;
+ int32_t srcLen = *aSrcLength;
+ char16_t * dest = aDest;
+ char16_t * destEnd = aDest + *aDestLength;
+
+ char16_t med;
+ int32_t bcr; // byte count for read
+ nsresult res = NS_OK;
+ int32_t i;
+
+ while ((srcLen > 0) && (dest < destEnd))
+ {
+ bool done= false;
+ bool passRangeCheck = false;
+ bool passScan = false;
+ for (i=0; (!done) && (i<aTableCount); i++)
+ {
+ if ((aRangeArray[i].min <= *src) && (*src <= aRangeArray[i].max))
+ {
+ passRangeCheck = true;
+ if (uScan(aScanClassArray[i], nullptr, src,
+ reinterpret_cast<uint16_t*>(&med), srcLen,
+ (uint32_t *)&bcr))
+ {
+ passScan = true;
+ done = uMapCode((uTable*) aMappingTable[i],
+ static_cast<uint16_t>(med),
+ reinterpret_cast<uint16_t*>(dest));
+ } // if (uScan ... )
+ } // if Range
+ } // for loop
+
+ if(passRangeCheck && (! passScan))
+ {
+ if (res != NS_ERROR_ILLEGAL_INPUT)
+ res = NS_OK_UDEC_MOREINPUT;
+ break;
+ }
+ if(! done)
+ {
+ bcr = 1;
+ if ((uint8_t)*src < 0x20) {
+ // somehow some table miss the 0x00 - 0x20 part
+ *dest = *src;
+ } else if(*src == (uint8_t) 0xa0) {
+ // handle nbsp
+ *dest = 0x00a0;
+ } else {
+ // we need to decide how many byte we skip. We can use uScan to do this
+ for (i=0; i<aTableCount; i++)
+ {
+ if ((aRangeArray[i].min <= *src) && (*src <= aRangeArray[i].max))
+ {
+ if (uScan(aScanClassArray[i], nullptr, src,
+ reinterpret_cast<uint16_t*>(&med), srcLen,
+ (uint32_t*)&bcr))
+ {
+ // match the patten
+
+ int32_t k;
+ for(k = 1; k < bcr; k++)
+ {
+ if(0 == (src[k] & 0x80))
+ { // only skip if all bytes > 0x80
+ // if we hit bytes <= 0x80, skip only one byte
+ bcr = 1;
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+ // treat it as NSBR if bcr == 1 and it is 0xa0
+ if ((1==bcr)&&(*src == (uint8_t)0xa0 )) {
+ *dest = 0x00a0;
+ } else {
+ if (aErrorSignal) {
+ res = NS_ERROR_ILLEGAL_INPUT;
+ break;
+ }
+ *dest = 0xfffd;
+ }
+ }
+ }
+
+ src += bcr;
+ srcLen -= bcr;
+ dest++;
+ } // while
+
+ if ((srcLen > 0) && (res == NS_OK)) res = NS_OK_UDEC_MOREOUTPUT;
+
+ *aSrcLength = src - (uint8_t *)aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+nsresult nsUnicodeDecodeHelper::ConvertByFastTable(
+ const char * aSrc,
+ int32_t * aSrcLength,
+ char16_t * aDest,
+ int32_t * aDestLength,
+ const char16_t * aFastTable,
+ int32_t aTableSize,
+ bool aErrorSignal)
+{
+ uint8_t * src = (uint8_t *)aSrc;
+ uint8_t * srcEnd = src;
+ char16_t * dest = aDest;
+
+ nsresult res;
+ if (*aSrcLength > *aDestLength) {
+ srcEnd += (*aDestLength);
+ res = NS_PARTIAL_MORE_OUTPUT;
+ } else {
+ srcEnd += (*aSrcLength);
+ res = NS_OK;
+ }
+
+ for (; src<srcEnd;) {
+ *dest = aFastTable[*src];
+ if (*dest == 0xfffd && aErrorSignal) {
+ res = NS_ERROR_ILLEGAL_INPUT;
+ break;
+ }
+ src++;
+ dest++;
+ }
+
+ *aSrcLength = src - (uint8_t *)aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+nsresult nsUnicodeDecodeHelper::CreateFastTable(
+ uMappingTable * aMappingTable,
+ char16_t * aFastTable,
+ int32_t aTableSize)
+{
+ int32_t tableSize = aTableSize;
+ int32_t buffSize = aTableSize;
+ auto buff = mozilla::MakeUnique<char[]>(buffSize);
+
+ char * p = buff.get();
+ for (int32_t i=0; i<aTableSize; i++) *(p++) = i;
+ return ConvertByTable(buff.get(), &buffSize, aFastTable, &tableSize,
+ u1ByteCharset, nullptr, aMappingTable);
+}
+
diff --git a/intl/uconv/util/nsUnicodeDecodeHelper.h b/intl/uconv/util/nsUnicodeDecodeHelper.h
new file mode 100644
index 000000000..f1ce56b19
--- /dev/null
+++ b/intl/uconv/util/nsUnicodeDecodeHelper.h
@@ -0,0 +1,55 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsUnicodeDecodeHelper_h__
+#define nsUnicodeDecodeHelper_h__
+
+#include "nsError.h"
+#include "uconvutil.h"
+//----------------------------------------------------------------------
+// Class nsUnicodeDecodeHelper [declaration]
+
+/**
+ *
+ * @created 18/Mar/1998
+ * @author Catalin Rotaru [CATA]
+ */
+class nsUnicodeDecodeHelper
+{
+public:
+ /**
+ * Converts data using a lookup table and optional shift table
+ */
+ static nsresult ConvertByTable(const char * aSrc, int32_t * aSrcLength,
+ char16_t * aDest, int32_t * aDestLength,
+ uScanClassID aScanClass,
+ uShiftInTable * aShiftInTable,
+ uMappingTable * aMappingTable,
+ bool aErrorSignal = false);
+
+ /**
+ * Converts data using a set of lookup tables.
+ */
+ static nsresult ConvertByMultiTable(const char * aSrc, int32_t * aSrcLength,
+ char16_t * aDest, int32_t * aDestLength, int32_t aTableCount,
+ const uRange * aRangeArray, uScanClassID * aScanClassArray,
+ uMappingTable ** aMappingTable, bool aErrorSignal = false);
+
+ /**
+ * Converts data using a fast lookup table.
+ */
+ static nsresult ConvertByFastTable(const char * aSrc, int32_t * aSrcLength,
+ char16_t * aDest, int32_t * aDestLength, const char16_t * aFastTable,
+ int32_t aTableSize, bool aErrorSignal);
+
+ /**
+ * Create a cache-like fast lookup table from a normal one.
+ */
+ static nsresult CreateFastTable(uMappingTable * aMappingTable,
+ char16_t * aFastTable, int32_t aTableSize);
+};
+
+#endif // nsUnicodeDecodeHelper_h__
+
+
diff --git a/intl/uconv/util/nsUnicodeEncodeHelper.cpp b/intl/uconv/util/nsUnicodeEncodeHelper.cpp
new file mode 100644
index 000000000..f801e8166
--- /dev/null
+++ b/intl/uconv/util/nsUnicodeEncodeHelper.cpp
@@ -0,0 +1,121 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "unicpriv.h"
+#include "nsUnicodeEncodeHelper.h"
+#include "nsDebug.h"
+
+//----------------------------------------------------------------------
+// Class nsUnicodeEncodeHelper [implementation]
+nsresult nsUnicodeEncodeHelper::ConvertByTable(
+ const char16_t * aSrc,
+ int32_t * aSrcLength,
+ char * aDest,
+ int32_t * aDestLength,
+ uScanClassID aScanClass,
+ uShiftOutTable * aShiftOutTable,
+ uMappingTable * aMappingTable)
+{
+ const char16_t * src = aSrc;
+ const char16_t * srcEnd = aSrc + *aSrcLength;
+ char * dest = aDest;
+ int32_t destLen = *aDestLength;
+
+ char16_t med;
+ int32_t bcw; // byte count for write;
+ nsresult res = NS_OK;
+
+ while (src < srcEnd) {
+ if (!uMapCode((uTable*) aMappingTable, static_cast<char16_t>(*(src++)), reinterpret_cast<uint16_t*>(&med))) {
+ if (aScanClass == u1ByteCharset && *(src - 1) < 0x20) {
+ // some tables are missing the 0x00 - 0x20 part
+ med = *(src - 1);
+ } else {
+ res = NS_ERROR_UENC_NOMAPPING;
+ break;
+ }
+ }
+
+ bool charFound;
+ if (aScanClass == uMultibytesCharset) {
+ NS_ASSERTION(aShiftOutTable, "shift table missing");
+ charFound = uGenerateShift(aShiftOutTable, 0, med,
+ (uint8_t *)dest, destLen,
+ (uint32_t *)&bcw);
+ } else {
+ charFound = uGenerate(aScanClass, 0, med,
+ (uint8_t *)dest, destLen,
+ (uint32_t *)&bcw);
+ }
+ if (!charFound) {
+ src--;
+ res = NS_OK_UENC_MOREOUTPUT;
+ break;
+ }
+
+ dest += bcw;
+ destLen -= bcw;
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+nsresult nsUnicodeEncodeHelper::ConvertByMultiTable(
+ const char16_t * aSrc,
+ int32_t * aSrcLength,
+ char * aDest,
+ int32_t * aDestLength,
+ int32_t aTableCount,
+ uScanClassID * aScanClassArray,
+ uShiftOutTable ** aShiftOutTable,
+ uMappingTable ** aMappingTable)
+{
+ const char16_t * src = aSrc;
+ const char16_t * srcEnd = aSrc + *aSrcLength;
+ char * dest = aDest;
+ int32_t destLen = *aDestLength;
+
+ char16_t med;
+ int32_t bcw; // byte count for write;
+ nsresult res = NS_OK;
+ int32_t i;
+
+ while (src < srcEnd) {
+ for (i=0; i<aTableCount; i++)
+ if (uMapCode((uTable*) aMappingTable[i], static_cast<uint16_t>(*src), reinterpret_cast<uint16_t*>(&med))) break;
+
+ src++;
+ if (i == aTableCount) {
+ res = NS_ERROR_UENC_NOMAPPING;
+ break;
+ }
+
+ bool charFound;
+ if (aScanClassArray[i] == uMultibytesCharset) {
+ NS_ASSERTION(aShiftOutTable[i], "shift table missing");
+ charFound = uGenerateShift(aShiftOutTable[i], 0, med,
+ (uint8_t *)dest, destLen,
+ (uint32_t *)&bcw);
+ }
+ else
+ charFound = uGenerate(aScanClassArray[i], 0, med,
+ (uint8_t *)dest, destLen,
+ (uint32_t *)&bcw);
+ if (!charFound) {
+ src--;
+ res = NS_OK_UENC_MOREOUTPUT;
+ break;
+ }
+
+ dest += bcw;
+ destLen -= bcw;
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
diff --git a/intl/uconv/util/nsUnicodeEncodeHelper.h b/intl/uconv/util/nsUnicodeEncodeHelper.h
new file mode 100644
index 000000000..456c277ba
--- /dev/null
+++ b/intl/uconv/util/nsUnicodeEncodeHelper.h
@@ -0,0 +1,42 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsUnicodeEncodeHelper_h__
+#define nsUnicodeEncodeHelper_h__
+
+#include "nsError.h"
+#include "uconvutil.h"
+//----------------------------------------------------------------------
+// Class nsUnicodeEncodeHelper [declaration]
+
+/**
+ *
+ * @created 22/Nov/1998
+ * @author Catalin Rotaru [CATA]
+ */
+class nsUnicodeEncodeHelper
+{
+
+public:
+ //--------------------------------------------------------------------
+
+ /**
+ * Converts data using a lookup table and optional shift table.
+ */
+ static nsresult ConvertByTable(const char16_t * aSrc, int32_t * aSrcLength,
+ char * aDest, int32_t * aDestLength, uScanClassID aScanClass,
+ uShiftOutTable * aShiftOutTable, uMappingTable * aMappingTable);
+
+ /**
+ * Converts data using a set of lookup tables and optional shift tables.
+ */
+ static nsresult ConvertByMultiTable(const char16_t * aSrc, int32_t * aSrcLength,
+ char * aDest, int32_t * aDestLength, int32_t aTableCount,
+ uScanClassID * aScanClassArray,
+ uShiftOutTable ** aShiftOutTable, uMappingTable ** aMappingTable);
+};
+
+#endif // nsUnicodeEncodeHelper_h__
+
+
diff --git a/intl/uconv/util/ugen.c b/intl/uconv/util/ugen.c
new file mode 100644
index 000000000..9a11b9f39
--- /dev/null
+++ b/intl/uconv/util/ugen.c
@@ -0,0 +1,712 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "unicpriv.h"
+/*=================================================================================
+
+=================================================================================*/
+typedef int (*uSubGeneratorFunc) (uint16_t in, unsigned char* out);
+/*=================================================================================
+
+=================================================================================*/
+
+typedef int (*uGeneratorFunc) (
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uGenerate(
+ uScanClassID scanClass,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+#define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out))
+
+int uCheckAndGenAlways1Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGenAlways2Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGenAlways2ByteShiftGR(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uGenerateShift(
+ uShiftOutTable *shift,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGen2ByteGRPrefix8F(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGen2ByteGRPrefix8EA2(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA3(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA4(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA5(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA6(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA7(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCnGAlways8BytesDecomposedHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGenJohabHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGenJohabSymbol(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+
+int uCheckAndGen4BytesGB18030(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uGenAlways2Byte(
+ uint16_t in,
+ unsigned char* out
+ );
+int uGenAlways2ByteShiftGR(
+ uint16_t in,
+ unsigned char* out
+ );
+int uGenAlways1Byte(
+ uint16_t in,
+ unsigned char* out
+ );
+int uGenAlways1BytePrefix8E(
+ uint16_t in,
+ unsigned char* out
+ );
+/*=================================================================================
+
+=================================================================================*/
+const uGeneratorFunc m_generator[uNumOfCharsetType] =
+{
+ uCheckAndGenAlways1Byte,
+ uCheckAndGenAlways2Byte,
+ uCheckAndGenAlways2ByteShiftGR,
+ uCheckAndGen2ByteGRPrefix8F,
+ uCheckAndGen2ByteGRPrefix8EA2,
+ uCheckAndGen2ByteGRPrefix8EA3,
+ uCheckAndGen2ByteGRPrefix8EA4,
+ uCheckAndGen2ByteGRPrefix8EA5,
+ uCheckAndGen2ByteGRPrefix8EA6,
+ uCheckAndGen2ByteGRPrefix8EA7,
+ uCnGAlways8BytesDecomposedHangul,
+ uCheckAndGenJohabHangul,
+ uCheckAndGenJohabSymbol,
+ uCheckAndGen4BytesGB18030,
+ uCheckAndGenAlways2Byte /* place-holder for GR128 */
+};
+
+/*=================================================================================
+
+=================================================================================*/
+
+const uSubGeneratorFunc m_subgenerator[uNumOfCharType] =
+{
+ uGenAlways1Byte,
+ uGenAlways2Byte,
+ uGenAlways2ByteShiftGR,
+ uGenAlways1BytePrefix8E
+};
+/*=================================================================================
+
+=================================================================================*/
+int uGenerate(
+ uScanClassID scanClass,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen);
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways1Byte(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = (unsigned char)in;
+ return 1;
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways2Byte(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = (unsigned char)((in >> 8) & 0xff);
+ out[1] = (unsigned char)(in & 0xff);
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways2ByteShiftGR(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80);
+ out[1] = (unsigned char)((in & 0xff) | 0x80);
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways1BytePrefix8E(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = 0x8E;
+ out[1] = (unsigned char)(in & 0xff);
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGenAlways1Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ /* Don't check inlen. The caller should ensure it is larger than 0 */
+ /* Oops, I don't agree. Code changed to check every time. [CATA] */
+ if(outbuflen < 1)
+ return 0;
+ else
+ {
+ *outlen = 1;
+ out[0] = in & 0xff;
+ return 1;
+ }
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGenAlways2Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ *outlen = 2;
+ out[0] = ((in >> 8 ) & 0xff);
+ out[1] = in & 0xff;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGenAlways2ByteShiftGR(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ *outlen = 2;
+ out[0] = ((in >> 8 ) & 0xff) | 0x80;
+ out[1] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenerateShift(
+ uShiftOutTable *shift,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ int16_t i;
+ const uShiftOutCell* cell = &(shift->shiftcell[0]);
+ int16_t itemnum = shift->numOfItem;
+ unsigned char inH, inL;
+ inH = (in >> 8) & 0xff;
+ inL = (in & 0xff );
+ for(i=0;i<itemnum;i++)
+ {
+ if( ( inL >= cell[i].shiftout_MinLB) &&
+ ( inL <= cell[i].shiftout_MaxLB) &&
+ ( inH >= cell[i].shiftout_MinHB) &&
+ ( inH <= cell[i].shiftout_MaxHB) )
+ {
+ if(outbuflen < cell[i].reserveLen)
+ {
+ return 0;
+ }
+ else
+ {
+ *outlen = cell[i].reserveLen;
+ return (uSubGenerator(cell[i].classID,in,out));
+ }
+ }
+ }
+ return 0;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8F(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 3)
+ return 0;
+ else
+ {
+ *outlen = 3;
+ out[0] = 0x8F;
+ out[1] = ((in >> 8 ) & 0xff) | 0x80;
+ out[2] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA2(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA2;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+
+
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA3(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA3;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA4(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA4;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA5(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA5;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA6(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA6;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA7(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA7;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+#define SBase 0xAC00
+#define LCount 19
+#define VCount 21
+#define TCount 28
+#define NCount (VCount * TCount)
+/*=================================================================================
+
+=================================================================================*/
+int uCnGAlways8BytesDecomposedHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ static const uint8_t lMap[LCount] = {
+ 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
+ 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
+ };
+
+ static const uint8_t tMap[TCount] = {
+ 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa,
+ 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5,
+ 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
+ };
+
+ uint16_t SIndex, LIndex, VIndex, TIndex;
+
+ if(outbuflen < 8)
+ return 0;
+
+ /* the following line are copy from Unicode 2.0 page 3-13 */
+ /* item 1 of Hangul Syllabel Decomposition */
+ SIndex = in - SBase;
+
+ /* the following lines are copy from Unicode 2.0 page 3-14 */
+ /* item 2 of Hangul Syllabel Decomposition w/ modification */
+ LIndex = SIndex / NCount;
+ VIndex = (SIndex % NCount) / TCount;
+ TIndex = SIndex % TCount;
+
+ /*
+ * A Hangul syllable not enumerated in KS X 1001 is represented
+ * by a sequence of 8 bytes beginning with Hangul-filler
+ * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three
+ * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making
+ * up the syllable. ref. KS X 1001:1998 Annex 3
+ */
+ *outlen = 8;
+ out[0] = out[2] = out[4] = out[6] = 0xa4;
+ out[1] = 0xd4;
+ out[3] = lMap[LIndex] ;
+ out[5] = (VIndex + 0xbf);
+ out[7] = tMap[TIndex];
+
+ return 1;
+}
+
+int uCheckAndGenJohabHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ /*
+ See Table 4-45 (page 183) of CJKV Information Processing
+ for detail explanation of the following table.
+ */
+ /*
+ static const uint8_t lMap[LCount] = {
+ 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
+ };
+ Therefore lMap[i] == i+2;
+ */
+
+ static const uint8_t vMap[VCount] = {
+ /* no 0,1,2 */
+ 3,4,5,6,7, /* no 8,9 */
+ 10,11,12,13,14,15, /* no 16,17 */
+ 18,19,20,21,22,23, /* no 24,25 */
+ 26,27,28,29
+ };
+ static const uint8_t tMap[TCount] = {
+ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */
+ 19,20,21,22,23,24,25,26,27,28,29
+ };
+ uint16_t SIndex, LIndex, VIndex, TIndex, ch;
+ /* the following line are copy from Unicode 2.0 page 3-13 */
+ /* item 1 of Hangul Syllabel Decomposition */
+ SIndex = in - SBase;
+
+ /* the following lines are copy from Unicode 2.0 page 3-14 */
+ /* item 2 of Hangul Syllabel Decomposition w/ modification */
+ LIndex = SIndex / NCount;
+ VIndex = (SIndex % NCount) / TCount;
+ TIndex = SIndex % TCount;
+
+ *outlen = 2;
+ ch = 0x8000 |
+ ((LIndex+2)<<10) |
+ (vMap[VIndex]<<5)|
+ tMap[TIndex];
+ out[0] = (ch >> 8);
+ out[1] = ch & 0x00FF;
+#if 0
+ printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex);
+#endif
+ return 1;
+ }
+}
+int uCheckAndGenJohabSymbol(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ /* The following code are based on the Perl code listed under
+ * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013)
+ * in the book "CJKV Information Processing" by
+ * Ken Lunde <lunde@adobe.com>
+ *
+ * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab
+ * my @euc = unpack("C*", $_[0]);
+ * my ($fe_off, $hi_off, $lo_off) = (0,0,1);
+ * my @out = ();
+ * while(($hi, $lo) = splice(@euc, 0, 2)) {
+ * $hi &= 127; $lo &= 127;
+ * $fe_off = 21 if $hi == 73;
+ * $fe_off = 34 if $hi == 126;
+ * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125);
+ * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off),
+ * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128));
+ * }
+ * return pack("C*", @out);
+ */
+
+ unsigned char fe_off = 0;
+ unsigned char hi_off = 0;
+ unsigned char lo_off = 1;
+ unsigned char hi = (in >> 8) & 0x7F;
+ unsigned char lo = in & 0x7F;
+ if(73 == hi)
+ fe_off = 21;
+ if(126 == hi)
+ fe_off = 34;
+ if( (hi < 74) || ( hi > 125) )
+ {
+ hi_off = 1;
+ lo_off = 0;
+ }
+ *outlen = 2;
+ out[0] = ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off;
+ out[1] = lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) :
+ 128);
+#if 0
+ printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in);
+#endif
+ return 1;
+ }
+}
+int uCheckAndGen4BytesGB18030(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ out[0] = (in / (10*126*10)) + 0x81;
+ in %= (10*126*10);
+ out[1] = (in / (10*126)) + 0x30;
+ in %= (10*126);
+ out[2] = (in / (10)) + 0x81;
+ out[3] = (in % 10) + 0x30;
+ *outlen = 4;
+ return 1;
+}
diff --git a/intl/uconv/util/umap.c b/intl/uconv/util/umap.c
new file mode 100644
index 000000000..43a41e591
--- /dev/null
+++ b/intl/uconv/util/umap.c
@@ -0,0 +1,175 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* #include "PRIntlpriv.h" */
+#include "unicpriv.h"
+
+
+typedef uint16_t (* MapFormatFunc)(uint16_t in,const uTable *uT,const uMapCell *cell);
+typedef int (* HitFormateFunc)(uint16_t in,const uMapCell *cell);
+typedef void (* FillInfoFormateFunc)(const uTable *uT, const uMapCell *cell, uint32_t* info);
+
+
+int uHitFormate0(uint16_t in,const uMapCell *cell);
+int uHitFormate2(uint16_t in,const uMapCell *cell);
+uint16_t uMapFormate0(uint16_t in,const uTable *uT,const uMapCell *cell);
+uint16_t uMapFormate1(uint16_t in,const uTable *uT,const uMapCell *cell);
+uint16_t uMapFormate2(uint16_t in,const uTable *uT,const uMapCell *cell);
+void uFillInfoFormate0(const uTable *uT,const uMapCell *cell,uint32_t* aInfo);
+void uFillInfoFormate1(const uTable *uT,const uMapCell *cell,uint32_t* aInfo);
+void uFillInfoFormate2(const uTable *uT,const uMapCell *cell,uint32_t* aInfo);
+
+
+const uMapCell *uGetMapCell(const uTable *uT, int16_t item);
+char uGetFormat(const uTable *uT, int16_t item);
+
+
+/*=================================================================================
+
+=================================================================================*/
+const MapFormatFunc m_map[uNumFormatTag] =
+{
+ uMapFormate0,
+ uMapFormate1,
+ uMapFormate2,
+};
+
+/*=================================================================================
+
+=================================================================================*/
+const FillInfoFormateFunc m_fillinfo[uNumFormatTag] =
+{
+ uFillInfoFormate0,
+ uFillInfoFormate1,
+ uFillInfoFormate2,
+};
+
+/*=================================================================================
+
+=================================================================================*/
+const HitFormateFunc m_hit[uNumFormatTag] =
+{
+ uHitFormate0,
+ uHitFormate0,
+ uHitFormate2,
+};
+
+#define uHit(format,in,cell) (* m_hit[(format)])((in),(cell))
+#define uMap(format,in,uT,cell) (* m_map[(format)])((in),(uT),(cell))
+#define uGetMapCell(uT, item) ((uMapCell *)(((uint16_t *)uT) + (uT)->offsetToMapCellArray + (item)*(UMAPCELL_SIZE/sizeof(uint16_t))))
+#define uGetFormat(uT, item) (((((uint16_t *)uT) + (uT)->offsetToFormatArray)[(item)>> 2 ] >> (((item)% 4 ) << 2)) & 0x0f)
+
+/*=================================================================================
+
+=================================================================================*/
+int uMapCode(const uTable *uT, uint16_t in, uint16_t* out)
+{
+ int done = 0;
+ uint16_t itemOfList = uT->itemOfList;
+ uint16_t i;
+ *out = NOMAPPING;
+ for(i=0;i<itemOfList;i++)
+ {
+ const uMapCell* uCell;
+ int8_t format = uGetFormat(uT,i);
+ uCell = uGetMapCell(uT,i);
+ if(uHit(format, in, uCell))
+ {
+ *out = uMap(format, in, uT,uCell);
+ done = 1;
+ break;
+ }
+ }
+ return ( done && (*out != NOMAPPING));
+}
+
+
+/*
+member function
+*/
+/*=================================================================================
+
+=================================================================================*/
+int uHitFormate0(uint16_t in,const uMapCell *cell)
+{
+ return ( (in >= cell->fmt.format0.srcBegin) &&
+ (in <= cell->fmt.format0.srcEnd) ) ;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uHitFormate2(uint16_t in,const uMapCell *cell)
+{
+ return (in == cell->fmt.format2.srcBegin);
+}
+/*=================================================================================
+
+=================================================================================*/
+uint16_t uMapFormate0(uint16_t in,const uTable *uT,const uMapCell *cell)
+{
+ return ((in - cell->fmt.format0.srcBegin) + cell->fmt.format0.destBegin);
+}
+/*=================================================================================
+
+=================================================================================*/
+uint16_t uMapFormate1(uint16_t in,const uTable *uT,const uMapCell *cell)
+{
+ return (*(((uint16_t *)uT) + uT->offsetToMappingTable
+ + cell->fmt.format1.mappingOffset + in - cell->fmt.format1.srcBegin));
+}
+/*=================================================================================
+
+=================================================================================*/
+uint16_t uMapFormate2(uint16_t in,const uTable *uT,const uMapCell *cell)
+{
+ return (cell->fmt.format2.destBegin);
+}
+
+#define SET_REPRESENTABLE(info, c) (info)[(c) >> 5] |= (1L << ((c) & 0x1f))
+/*=================================================================================
+
+=================================================================================*/
+void uFillInfoFormate0(const uTable *uT,const uMapCell *cell,uint32_t* info)
+{
+ uint16_t begin, end, i;
+ begin = cell->fmt.format0.srcBegin;
+ end = cell->fmt.format0.srcEnd;
+ if( (begin >> 5) == (end >> 5)) /* High 17 bits are the same */
+ {
+ for(i = begin; i <= end; i++)
+ SET_REPRESENTABLE(info, i);
+ }
+ else {
+ uint32_t b = begin >> 5;
+ uint32_t e = end >> 5;
+ info[ b ] |= (0xFFFFFFFFL << ((begin) & 0x1f));
+ info[ e ] |= (0xFFFFFFFFL >> (31 - ((end) & 0x1f)));
+ for(b++ ; b < e ; b++)
+ info[b] |= 0xFFFFFFFFL;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+void uFillInfoFormate1(const uTable *uT,const uMapCell *cell,uint32_t* info)
+{
+ uint16_t begin, end, i;
+ uint16_t *base;
+ begin = cell->fmt.format0.srcBegin;
+ end = cell->fmt.format0.srcEnd;
+ base = (((uint16_t *)uT) + uT->offsetToMappingTable + cell->fmt.format1.mappingOffset);
+ for(i = begin; i <= end; i++)
+ {
+ if(0xFFFD != base[i - begin]) /* check every item */
+ SET_REPRESENTABLE(info, i);
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+void uFillInfoFormate2(const uTable *uT,const uMapCell *cell,uint32_t* info)
+{
+ SET_REPRESENTABLE(info, cell->fmt.format2.srcBegin);
+}
+
diff --git a/intl/uconv/util/umap.h b/intl/uconv/util/umap.h
new file mode 100644
index 000000000..24266e7bf
--- /dev/null
+++ b/intl/uconv/util/umap.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef __UMAP__
+#define __UMAP__
+
+#define NOMAPPING 0xfffd
+
+enum {
+ uFormat0Tag = 0,
+ uFormat1Tag,
+ uFormat2Tag,
+ uNumFormatTag
+};
+
+typedef struct {
+ uint16_t srcBegin; /* 2 byte */
+ uint16_t srcEnd; /* 2 byte */
+ uint16_t destBegin; /* 2 byte */
+} uFormat0;
+
+typedef struct {
+ uint16_t srcBegin; /* 2 byte */
+ uint16_t srcEnd; /* 2 byte */
+ uint16_t mappingOffset; /* 2 byte */
+} uFormat1;
+
+typedef struct {
+ uint16_t srcBegin; /* 2 byte */
+ uint16_t srcEnd; /* 2 byte -waste */
+ uint16_t destBegin; /* 2 byte */
+} uFormat2;
+
+typedef struct {
+ union {
+ uFormat0 format0;
+ uFormat1 format1;
+ uFormat2 format2;
+ } fmt;
+} uMapCell;
+
+#define UMAPCELL_SIZE (3*sizeof(uint16_t))
+
+typedef struct {
+ uint16_t itemOfList;
+ uint16_t offsetToFormatArray;
+ uint16_t offsetToMapCellArray;
+ uint16_t offsetToMappingTable;
+ uint16_t data[1];
+} uTable;
+
+#endif
diff --git a/intl/uconv/util/unicpriv.h b/intl/uconv/util/unicpriv.h
new file mode 100644
index 000000000..b14f10c35
--- /dev/null
+++ b/intl/uconv/util/unicpriv.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef __UNIPRIV__
+#define __UNIPRIV__
+
+#include <stdint.h>
+#include "umap.h"
+#include "uconvutil.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int uMapCode(const uTable *uT,
+ uint16_t in,
+ uint16_t* out);
+
+int uGenerate(uScanClassID scanClass,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen);
+
+int uScan(uScanClassID scanClass,
+ int32_t *state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen);
+
+int uGenerateShift(uShiftOutTable *shift,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen);
+
+int uScanShift(uShiftInTable *shift,
+ int32_t *state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __UNIPRIV__ */
diff --git a/intl/uconv/util/uscan.c b/intl/uconv/util/uscan.c
new file mode 100644
index 000000000..0abdd0c00
--- /dev/null
+++ b/intl/uconv/util/uscan.c
@@ -0,0 +1,759 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "unicpriv.h"
+#define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff )
+#define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2))
+/*=================================================================================
+
+=================================================================================*/
+typedef int (*uSubScannerFunc) (unsigned char* in, uint16_t* out);
+/*=================================================================================
+
+=================================================================================*/
+
+typedef int (*uScannerFunc) (
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+
+int uScan(
+ uScanClassID scanClass,
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+
+#define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out))
+
+int uCheckAndScanAlways1Byte(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScanAlways2Byte(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScanAlways2ByteShiftGR(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScanAlways2ByteGR128(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uScanShift(
+ uShiftInTable *shift,
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+
+int uCheckAndScan2ByteGRPrefix8F(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScan2ByteGRPrefix8EA2(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScan2ByteGRPrefix8EA3(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScan2ByteGRPrefix8EA4(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScan2ByteGRPrefix8EA5(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScan2ByteGRPrefix8EA6(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScan2ByteGRPrefix8EA7(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCnSAlways8BytesDecomposedHangul(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScanJohabHangul(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+int uCheckAndScanJohabSymbol(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+
+int uCheckAndScan4BytesGB18030(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ );
+
+int uScanAlways2Byte(
+ unsigned char* in,
+ uint16_t* out
+ );
+int uScanAlways2ByteShiftGR(
+ unsigned char* in,
+ uint16_t* out
+ );
+int uScanAlways1Byte(
+ unsigned char* in,
+ uint16_t* out
+ );
+int uScanAlways1BytePrefix8E(
+ unsigned char* in,
+ uint16_t* out
+ );
+/*=================================================================================
+
+=================================================================================*/
+const uScannerFunc m_scanner[uNumOfCharsetType] =
+{
+ uCheckAndScanAlways1Byte,
+ uCheckAndScanAlways2Byte,
+ uCheckAndScanAlways2ByteShiftGR,
+ uCheckAndScan2ByteGRPrefix8F,
+ uCheckAndScan2ByteGRPrefix8EA2,
+ uCheckAndScan2ByteGRPrefix8EA3,
+ uCheckAndScan2ByteGRPrefix8EA4,
+ uCheckAndScan2ByteGRPrefix8EA5,
+ uCheckAndScan2ByteGRPrefix8EA6,
+ uCheckAndScan2ByteGRPrefix8EA7,
+ uCnSAlways8BytesDecomposedHangul,
+ uCheckAndScanJohabHangul,
+ uCheckAndScanJohabSymbol,
+ uCheckAndScan4BytesGB18030,
+ uCheckAndScanAlways2ByteGR128
+};
+
+/*=================================================================================
+
+=================================================================================*/
+
+const uSubScannerFunc m_subscanner[uNumOfCharType] =
+{
+ uScanAlways1Byte,
+ uScanAlways2Byte,
+ uScanAlways2ByteShiftGR,
+ uScanAlways1BytePrefix8E
+};
+/*=================================================================================
+
+=================================================================================*/
+int uScan(
+ uScanClassID scanClass,
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen);
+}
+/*=================================================================================
+
+=================================================================================*/
+int uScanAlways1Byte(
+ unsigned char* in,
+ uint16_t* out
+ )
+{
+ *out = (uint16_t) in[0];
+ return 1;
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uScanAlways2Byte(
+ unsigned char* in,
+ uint16_t* out
+ )
+{
+ *out = (uint16_t) (( in[0] << 8) | (in[1]));
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uScanAlways2ByteShiftGR(
+ unsigned char* in,
+ uint16_t* out
+ )
+{
+ *out = (uint16_t) ((( in[0] << 8) | (in[1])) & 0x7F7F);
+ return 1;
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uScanAlways1BytePrefix8E(
+ unsigned char* in,
+ uint16_t* out
+ )
+{
+ *out = (uint16_t) in[1];
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScanAlways1Byte(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ /* Don't check inlen. The caller should ensure it is larger than 0 */
+ *inscanlen = 1;
+ *out = (uint16_t) in[0];
+
+ return 1;
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScanAlways2Byte(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ if(inbuflen < 2)
+ return 0;
+ else
+ {
+ *inscanlen = 2;
+ *out = ((in[0] << 8) | ( in[1])) ;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScanAlways2ByteShiftGR(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ /*
+ * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets
+ * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets.
+ * Only 2nd byte range needs to be checked because
+ * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp
+ */
+ if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */
+ return 0;
+ else if (! CHK_GR94(in[1]))
+ {
+ *inscanlen = 2;
+ *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
+ return 1;
+ }
+ else
+ {
+ *inscanlen = 2;
+ *out = (((in[0] << 8) | ( in[1])) & 0x7F7F);
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScanAlways2ByteGR128(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ /*
+ * The first byte should be in [0xa1,0xfe]
+ * and the second byte in [0x41,0xfe]
+ * Used by CP949 -> Unicode converter.
+ * Only 2nd byte range needs to be checked because
+ * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp
+ */
+ if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */
+ return 0;
+ else if (in[1] < 0x41) /* 2nd byte range check */
+ {
+ *inscanlen = 2;
+ *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
+ return 1;
+ }
+ else
+ {
+ *inscanlen = 2;
+ *out = (in[0] << 8) | in[1];
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uScanShift(
+ uShiftInTable *shift,
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ int16_t i;
+ const uShiftInCell* cell = &(shift->shiftcell[0]);
+ int16_t itemnum = shift->numOfItem;
+ for(i=0;i<itemnum;i++)
+ {
+ if( ( in[0] >= cell[i].shiftin_Min) &&
+ ( in[0] <= cell[i].shiftin_Max))
+ {
+ if(inbuflen < cell[i].reserveLen)
+ return 0;
+ else
+ {
+ *inscanlen = cell[i].reserveLen;
+ return (uSubScanner(cell[i].classID,in,out));
+ }
+ }
+ }
+ return 0;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScan2ByteGRPrefix8F(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ if((inbuflen < 3) ||(in[0] != 0x8F))
+ return 0;
+ else if (! CHK_GR94(in[1])) /* 2nd byte range check */
+ {
+ *inscanlen = 2;
+ *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
+ return 1;
+ }
+ else if (! CHK_GR94(in[2])) /* 3rd byte range check */
+ {
+ *inscanlen = 3;
+ *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
+ return 1;
+ }
+ else
+ {
+ *inscanlen = 3;
+ *out = (((in[1] << 8) | ( in[2])) & 0x7F7F);
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+
+/* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX()
+ * where X is 2,3,4,5,6,7
+ */
+#define CNS_8EAX_4BYTE(PREFIX) \
+ if((inbuflen < 4) || (in[0] != 0x8E)) \
+ return 0; \
+ else if((in[1] != (PREFIX))) \
+ { \
+ *inscanlen = 2; \
+ *out = 0xFF; \
+ return 1; \
+ } \
+ else if(! CHK_GR94(in[2])) \
+ { \
+ *inscanlen = 3; \
+ *out = 0xFF; \
+ return 1; \
+ } \
+ else if(! CHK_GR94(in[3])) \
+ { \
+ *inscanlen = 4; \
+ *out = 0xFF; \
+ return 1; \
+ } \
+ else \
+ { \
+ *inscanlen = 4; \
+ *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \
+ return 1; \
+ }
+
+int uCheckAndScan2ByteGRPrefix8EA2(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ CNS_8EAX_4BYTE(0xA2)
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScan2ByteGRPrefix8EA3(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ CNS_8EAX_4BYTE(0xA3)
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScan2ByteGRPrefix8EA4(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ CNS_8EAX_4BYTE(0xA4)
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScan2ByteGRPrefix8EA5(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ CNS_8EAX_4BYTE(0xA5)
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScan2ByteGRPrefix8EA6(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ CNS_8EAX_4BYTE(0xA6)
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndScan2ByteGRPrefix8EA7(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ CNS_8EAX_4BYTE(0xA7)
+}
+/*=================================================================================
+
+=================================================================================*/
+#define SBase 0xAC00
+#define SCount 11172
+#define LCount 19
+#define VCount 21
+#define TCount 28
+#define NCount (VCount * TCount)
+
+int uCnSAlways8BytesDecomposedHangul(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+
+ uint16_t LIndex, VIndex, TIndex;
+ /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
+ if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) ||
+ (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6]))
+ return 0;
+
+ /* Compute LIndex */
+ if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */
+ return 0;
+ }
+ else {
+ static const uint8_t lMap[] = {
+ /* A1 A2 A3 A4 A5 A6 A7 */
+ 0, 1,0xff, 2,0xff,0xff, 3,
+ /* A8 A9 AA AB AC AD AE AF */
+ 4, 5,0xff,0xff,0xff,0xff,0xff,0xff,
+ /* B0 B1 B2 B3 B4 B5 B6 B7 */
+ 0xff, 6, 7, 8,0xff, 9, 10, 11,
+ /* B8 B9 BA BB BC BD BE */
+ 12, 13, 14, 15, 16, 17, 18
+ };
+
+ LIndex = lMap[in[3] - 0xa1];
+ if(0xff == (0xff & LIndex))
+ return 0;
+ }
+
+ /* Compute VIndex */
+ if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */
+ return 0;
+ }
+ else {
+ VIndex = in[5] - 0xbf;
+ }
+
+ /* Compute TIndex */
+ if(0xd4 == in[7])
+ {
+ TIndex = 0;
+ }
+ else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */
+ return 0;
+ }
+ else {
+ static const uint8_t tMap[] = {
+ /* A1 A2 A3 A4 A5 A6 A7 */
+ 1, 2, 3, 4, 5, 6, 7,
+ /* A8 A9 AA AB AC AD AE AF */
+ 0xff, 8, 9, 10, 11, 12, 13, 14,
+ /* B0 B1 B2 B3 B4 B5 B6 B7 */
+ 15, 16, 17,0xff, 18, 19, 20, 21,
+ /* B8 B9 BA BB BC BD BE */
+ 22,0xff, 23, 24, 25, 26, 27
+ };
+ TIndex = tMap[in[7] - 0xa1];
+ if(0xff == (0xff & TIndex))
+ return 0;
+ }
+
+ *inscanlen = 8;
+ /* the following line is from Unicode 2.0 page 3-13 item 5 */
+ *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
+
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+
+int uCheckAndScanJohabHangul(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+/* since we don't have code to convert Johab to Unicode right now *
+ * make this part of code #if 0 to save space until we fully test it */
+ if(inbuflen < 2)
+ return 0;
+ else {
+ /*
+ * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183
+ * of "CJKV Information Processing" for details
+ */
+ static const uint8_t lMap[32]={ /* totaly 19 */
+ 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */
+ 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */
+ 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */
+ };
+ static const uint8_t vMap[32]={ /* totaly 21 */
+ 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */
+ 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */
+ 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */
+ 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */
+ };
+ static const uint8_t tMap[32]={ /* totaly 29 */
+ 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */
+ 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */
+ 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */
+ 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */
+ };
+ uint16_t ch = (in[0] << 8) | in[1];
+ uint16_t LIndex, VIndex, TIndex;
+ if(0 == (0x8000 & ch))
+ return 0;
+ LIndex=lMap[(ch>>10)& 0x1F];
+ VIndex=vMap[(ch>>5) & 0x1F];
+ TIndex=tMap[(ch>>0) & 0x1F];
+ if((0xff==(LIndex)) ||
+ (0xff==(VIndex)) ||
+ (0xff==(TIndex)))
+ return 0;
+ /* the following line is from Unicode 2.0 page 3-13 item 5 */
+ *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
+ *inscanlen = 2;
+ return 1;
+ }
+}
+int uCheckAndScanJohabSymbol(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ if(inbuflen < 2)
+ return 0;
+ else {
+ /*
+ * The following code are based on the Perl code lised under
+ * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of
+ * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com>
+ *
+ * sub johab2ks ($) { # Convert Johab to ISO-2022-KR
+ * my @johab = unpack("C*", $_[0]);
+ * my ($offset, $d8_off) = (0,0);
+ * my @out = ();
+ * while(($hi, $lo) = splice($johab, 0, 2)) {
+ * $offset = 1 if ($hi > 223 and $hi < 250);
+ * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
+ * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) -
+ * ($lo < 161 ? 1 : 0) + $offset) + $d8_off),
+ * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 ));
+ * }
+ * return pack ("C*", @out);
+ * }
+ * additional comments from Ken Lunde
+ * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
+ * has three possible return values:
+ * 0 if $hi is not equal to 216
+ * 94 if $hi is euqal to 216 and if $lo is greater than 160
+ * 42 if $hi is euqal to 216 and if $lo is not greater than 160
+ */
+ unsigned char hi = in[0];
+ unsigned char lo = in[1];
+ uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0;
+ uint16_t d8_off = 0;
+ if(216 == hi) {
+ if( lo > 160)
+ d8_off = 94;
+ else
+ d8_off = 42;
+ }
+
+ *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) -
+ (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) |
+ (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) :
+ 128));
+ *inscanlen = 2;
+ return 1;
+ }
+}
+int uCheckAndScan4BytesGB18030(
+ int32_t* state,
+ unsigned char *in,
+ uint16_t *out,
+ uint32_t inbuflen,
+ uint32_t* inscanlen
+ )
+{
+ uint32_t data;
+ if(inbuflen < 4)
+ return 0;
+
+ if((in[0] < 0x81 ) || (0xfe < in[0]))
+ return 0;
+ if((in[1] < 0x30 ) || (0x39 < in[1]))
+ return 0;
+ if((in[2] < 0x81 ) || (0xfe < in[2]))
+ return 0;
+ if((in[3] < 0x30 ) || (0x39 < in[3]))
+ return 0;
+
+ data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) +
+ (in[2] - 0x81)) * 10 ) + (in[3] - 0x30);
+
+ *inscanlen = 4;
+ *out = (data < 0x00010000) ? data : 0xFFFD;
+ return 1;
+}