summaryrefslogtreecommitdiffstats
path: root/intl/uconv/ucvtw/nsBIG5ToUnicode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/ucvtw/nsBIG5ToUnicode.cpp')
-rw-r--r--intl/uconv/ucvtw/nsBIG5ToUnicode.cpp165
1 files changed, 165 insertions, 0 deletions
diff --git a/intl/uconv/ucvtw/nsBIG5ToUnicode.cpp b/intl/uconv/ucvtw/nsBIG5ToUnicode.cpp
new file mode 100644
index 000000000..8dbf84a14
--- /dev/null
+++ b/intl/uconv/ucvtw/nsBIG5ToUnicode.cpp
@@ -0,0 +1,165 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsBIG5ToUnicode.h"
+#include "mozilla/BinarySearch.h"
+#include "mozilla/ArrayUtils.h"
+#include "nsBIG5Data.h"
+
+nsBIG5ToUnicode::nsBIG5ToUnicode()
+ : mPendingTrail(0)
+ , mBig5Lead(0)
+{
+}
+
+NS_IMETHODIMP
+nsBIG5ToUnicode::Convert(const char* aSrc,
+ int32_t* aSrcLength,
+ char16_t* aDest,
+ int32_t* aDestLength)
+{
+ // We'll be doing comparisons as unsigned.
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(aSrc);
+ const uint8_t* inEnd = in + *aSrcLength;
+ char16_t* out = aDest;
+ char16_t* outEnd = out + *aDestLength;
+
+ if (mPendingTrail) {
+ if (out == outEnd) {
+ *aSrcLength = 0;
+ *aDestLength = 0;
+ return NS_OK_UDEC_MOREOUTPUT;
+ }
+ *out++ = mPendingTrail;
+ mPendingTrail = 0;
+ }
+ for (;;) {
+ if (in == inEnd) {
+ *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
+ *aDestLength = out - aDest;
+ return mBig5Lead ? NS_OK_UDEC_MOREINPUT : NS_OK;
+ }
+ if (out == outEnd) {
+ *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
+ *aDestLength = out - aDest;
+ return NS_OK_UDEC_MOREOUTPUT;
+ }
+ uint8_t b = *in++;
+ if (!mBig5Lead) {
+ if (b <= 0x7F) {
+ *out++ = (char16_t)b;
+ continue;
+ }
+ if (b >= 0x81 && b <= 0xFE) {
+ mBig5Lead = b;
+ continue;
+ }
+ if (mErrBehavior == kOnError_Signal) {
+ --in;
+ *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
+ *aDestLength = out - aDest;
+ return NS_ERROR_ILLEGAL_INPUT;
+ }
+ *out++ = 0xFFFD;
+ continue;
+ }
+ size_t lead = mBig5Lead;
+ mBig5Lead = 0;
+ size_t offset = (b < 0x7F) ? 0x40 : 0x62;
+ if ((b >= 0x40 && b <= 0x7E) || (b >= 0xA1 && b <= 0xFE)) {
+ size_t pointer = (lead - 0x81) * 157L + (b - offset);
+ char16_t outTrail;
+ switch (pointer) {
+ case 1133:
+ *out++ = 0x00CA;
+ outTrail = 0x0304;
+ break;
+ case 1135:
+ *out++ = 0x00CA;
+ outTrail = 0x030C;
+ break;
+ case 1164:
+ *out++ = 0x00EA;
+ outTrail = 0x0304;
+ break;
+ case 1166:
+ *out++ = 0x00EA;
+ outTrail = 0x030C;
+ break;
+ default:
+ char16_t lowBits = nsBIG5Data::LowBits(pointer);
+ if (!lowBits) {
+ if (b <= 0x7F) {
+ // prepend byte to stream
+ // Always legal, since we've always just read a byte
+ // if we come here.
+ --in;
+ }
+ if (mErrBehavior == kOnError_Signal) {
+ --in;
+ *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
+ *aDestLength = out - aDest;
+ return NS_ERROR_ILLEGAL_INPUT;
+ }
+ *out++ = 0xFFFD;
+ continue;
+ }
+ if (nsBIG5Data::IsAstral(pointer)) {
+ uint32_t codePoint = uint32_t(lowBits) | 0x20000;
+ *out++ = char16_t(0xD7C0 + (codePoint >> 10));
+ outTrail = char16_t(0xDC00 + (codePoint & 0x3FF));
+ break;
+ }
+ *out++ = lowBits;
+ continue;
+ }
+ if (out == outEnd) {
+ mPendingTrail = outTrail;
+ *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
+ *aDestLength = out - aDest;
+ return NS_OK_UDEC_MOREOUTPUT;
+ }
+ *out++ = outTrail;
+ continue;
+ }
+ // pointer is null
+ if (b <= 0x7F) {
+ // prepend byte to stream
+ // Always legal, since we've always just read a byte
+ // if we come here.
+ --in;
+ }
+ if (mErrBehavior == kOnError_Signal) {
+ // Moving in one past the start of aSrc is actually OK per API contract,
+ // since assigning -1 to aSrcLength means that we want the caller to
+ // record one U+FFFD and repush the same input buffer.
+ --in;
+ *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
+ *aDestLength = out - aDest;
+ return NS_ERROR_ILLEGAL_INPUT;
+ }
+ *out++ = 0xFFFD;
+ continue;
+ }
+}
+
+NS_IMETHODIMP
+nsBIG5ToUnicode::GetMaxLength(const char* aSrc,
+ int32_t aSrcLength,
+ int32_t* aDestLength)
+{
+ // The length of the output in UTF-16 code units never exceeds the length
+ // of the input in bytes.
+ *aDestLength = aSrcLength + (mPendingTrail ? 1 : 0) + (mBig5Lead ? 1 : 0);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsBIG5ToUnicode::Reset()
+{
+ mPendingTrail = 0;
+ mBig5Lead = 0;
+ return NS_OK;
+}