1 files changed, 271 insertions, 0 deletions
diff --git a/intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp b/intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp
new file mode 100644
index 000000000..9328afdee
--- /dev/null
+++ b/intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp
@@ -0,0 +1,271 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUnicodeToISO2022JP.h"
+#include "nsUCVJADll.h"
+#include "nsUnicodeEncodeHelper.h"
+
+//----------------------------------------------------------------------
+// Global functions and data [declaration]
+
+// Basic mapping from Hankaku to Zenkaku
+// Nigori and Maru are taken care of outside this basic mapping
+static const char16_t gBasicMapping[0x40] =
+{
+// 0xff60
+0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1,
+// 0xff68
+0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3,
+// 0xff70
+0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad,
+// 0xff78
+0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd,
+// 0xff80
+0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc,
+// 0xff88
+0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de,
+// 0xff90
+0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9,
+// 0xff98
+0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c
+};
+
+// Do we need to check for Nigori for the next unicode ?
+#define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e)))
+
+// Do we need to check for Maru for the next unicode ?
+#define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e))
+
+// The  unicode is in Katakana Hankaku block
+#define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f))
+#define IS_NIGORI(u) (0xff9e == (u))
+#define IS_MARU(u)   (0xff9f == (u))
+#define NIGORI_MODIFIER 1
+#define MARU_MODIFIER   2
+
+static const uint16_t g_ufAsciiMapping [] = {
+  0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000
+};
+
+#define SIZE_OF_ISO2022JP_TABLES 5
+static const uint16_t * g_ufMappingTables[SIZE_OF_ISO2022JP_TABLES] = {
+  g_ufAsciiMapping,             // ASCII           ISOREG 6
+  g_uf0201GLMapping,            // JIS X 0201-1976 ISOREG 14
+  g_uf0208Mapping,              // JIS X 0208-1983 ISOREG 87
+  g_uf0208extMapping,           // JIS X 0208 - cp932 ext
+  g_uf0208Mapping,              // JIS X 0208-1978 ISOREG 42
+};
+
+static const uScanClassID g_ufScanClassIDs[SIZE_OF_ISO2022JP_TABLES] = {
+  u1ByteCharset,                // ASCII           ISOREG 6
+  u1ByteCharset,                // JIS X 0201-1976 ISOREG 14
+  u2BytesCharset,               // JIS X 0208-1983 ISOREG 87
+  u2BytesCharset,               // JIS X 0208- cp932 ext
+  u2BytesCharset,               // JIS X 0208-1978 ISOREG 42
+};
+#define JIS_X_208_INDEX 2
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToISO2022JP [implementation]
+
+// worst case max length: 
+//  1  2 3  4  5  6  7 8
+// ESC $ B XX XX ESC ( B
+nsUnicodeToISO2022JP::nsUnicodeToISO2022JP() 
+: nsEncoderSupport(8)
+{
+  Reset();
+}
+
+nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP() 
+{
+}
+
+nsresult nsUnicodeToISO2022JP::ChangeCharset(int32_t aCharset,
+                                             char * aDest, 
+                                             int32_t * aDestLength)
+{
+  // both 2 and 3 generate the same escape sequence. 2 is for
+  // the standard JISx0208 table, and 3 is for theCP932 extensions
+  // therefore, we treat them as the same one.
+  if(((2 == aCharset) && ( 3 == mCharset)) ||
+     ((3 == aCharset) && ( 2 == mCharset)) )
+  {
+    mCharset = aCharset;
+  }
+
+  if(aCharset == mCharset) 
+  {
+    *aDestLength = 0;
+    return NS_OK;
+  } 
+  
+  if (*aDestLength < 3) {
+    *aDestLength = 0;
+    return NS_OK_UENC_MOREOUTPUT;
+  }
+
+  switch (aCharset) {
+    case 0: // ASCII ISOREG 6
+      aDest[0] = 0x1b;
+      aDest[1] = '(';
+      aDest[2] = 'B';
+      break;
+    case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14
+      aDest[0] = 0x1b;
+      aDest[1] = '(';
+      aDest[2] = 'J';
+      break;
+    case 2: // JIS X 0208-1983 ISOREG 87
+    case 3: // JIS X 0208-1983 
+            // we currently use this for CP932 ext
+      aDest[0] = 0x1b;
+      aDest[1] = '$';
+      aDest[2] = 'B';
+      break;
+    case 4: // JIS X 0201-1978 ISOREG 87- 
+            // we currently do not have a diff mapping for it.
+      aDest[0] = 0x1b;
+      aDest[1] = '$';
+      aDest[2] = '@';
+      break;
+  }
+
+  mCharset = aCharset;
+  *aDestLength = 3;
+  return NS_OK;
+}
+
+nsresult nsUnicodeToISO2022JP::ConvertHankaku(const char16_t * aSrc,
+                                              int32_t * aSrcLength,
+                                              char * aDest,
+                                              int32_t * aDestLength)
+{
+  nsresult res = NS_OK;
+
+  const char16_t * src = aSrc;
+  const char16_t * srcEnd = aSrc + *aSrcLength;
+  char * dest = aDest;
+  char * destEnd = aDest + *aDestLength;
+  char16_t srcChar, tempChar;
+  int32_t bcr, bcw;
+
+  bcw = destEnd - dest;
+  res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw);
+  dest += bcw;
+  if (res != NS_OK) {
+    return res;
+  }
+
+  while (src < srcEnd) {
+    srcChar = *src;
+    if (!IS_HANKAKU(srcChar)) {
+      break;
+    }
+    ++src;
+    tempChar = gBasicMapping[(srcChar) - 0xff60];
+
+    if (src < srcEnd) {
+      // if the character could take a modifier, and the next char
+      // is a modifier, modify it and eat one char16_t
+      if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) {
+        tempChar += NIGORI_MODIFIER;
+        ++src;
+      } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) {
+        tempChar += MARU_MODIFIER;
+        ++src;
+      }
+    }
+    bcr = 1;
+    bcw = destEnd - dest;
+    res = nsUnicodeEncodeHelper::ConvertByTable(
+             &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX],
+             nullptr, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]);
+    dest += bcw;
+    if (res != NS_OK)
+      break;
+  }
+  *aDestLength = dest - aDest;
+  *aSrcLength = src - aSrc;
+  return res;
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsTableEncoderSupport class [implementation]
+
+NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr(
+                                    const char16_t * aSrc, 
+                                    int32_t * aSrcLength, 
+                                    char * aDest, 
+                                    int32_t * aDestLength)
+{
+  nsresult res = NS_OK;
+
+  const char16_t * src = aSrc;
+  const char16_t * srcEnd = aSrc + *aSrcLength;
+  char * dest = aDest;
+  char * destEnd = aDest + *aDestLength;
+  int32_t bcr, bcw;
+  int32_t i;
+
+  while (src < srcEnd) {
+    for (i=0; i< SIZE_OF_ISO2022JP_TABLES ; i++) {
+      bcr = 1;
+      bcw = destEnd - dest;
+      res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 
+                                      g_ufScanClassIDs[i], nullptr,
+                                      (uMappingTable *) g_ufMappingTables[i]);
+      if (res != NS_ERROR_UENC_NOMAPPING) break;
+    }
+
+    if ( i>=  SIZE_OF_ISO2022JP_TABLES) {
+      if (IS_HANKAKU(*src)) {
+        bcr = srcEnd - src;
+        bcw = destEnd - dest;
+        res = ConvertHankaku(src, &bcr, dest, &bcw);
+        dest += bcw;
+        src += bcr;
+        if (res == NS_OK) continue;
+      } else {
+        res = NS_ERROR_UENC_NOMAPPING;
+        src++;
+      }
+    }
+    if (res != NS_OK) break;
+
+    bcw = destEnd - dest;
+    res = ChangeCharset(i, dest, &bcw);
+    dest += bcw;
+    if (res != NS_OK) break;
+
+    bcr = srcEnd - src;
+    bcw = destEnd - dest;
+    res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 
+                                      g_ufScanClassIDs[i], nullptr,
+                                      (uMappingTable *) g_ufMappingTables[i]);
+    src += bcr;
+    dest += bcw;
+
+    if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break;
+    if (res == NS_ERROR_UENC_NOMAPPING) src--;
+  }
+
+  *aSrcLength = src - aSrc;
+  *aDestLength  = dest - aDest;
+  return res;
+}
+
+NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest, 
+                                                 int32_t * aDestLength)
+{
+  ChangeCharset(0, aDest, aDestLength);
+  return NS_OK;
+}
+
+NS_IMETHODIMP nsUnicodeToISO2022JP::Reset()
+{
+  mCharset = 0;
+  return nsEncoderSupport::Reset();
+}