summaryrefslogtreecommitdiffstats
path: root/intl/chardet/tools/GenCyrillicClass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/chardet/tools/GenCyrillicClass.cpp')
-rw-r--r--intl/chardet/tools/GenCyrillicClass.cpp135
1 files changed, 135 insertions, 0 deletions
diff --git a/intl/chardet/tools/GenCyrillicClass.cpp b/intl/chardet/tools/GenCyrillicClass.cpp
new file mode 100644
index 000000000..180651a49
--- /dev/null
+++ b/intl/chardet/tools/GenCyrillicClass.cpp
@@ -0,0 +1,135 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "nsICharsetConverterManager.h"
+#include <iostream.h>
+#include "nsISupports.h"
+#include "nsIComponentManager.h"
+#include "nsIServiceManager.h"
+#include "nsIUnicodeDecoder.h"
+#include "nsIUnicodeEncoder.h"
+#include "nsCRT.h"
+#include <stdio.h>
+#include <stdlib.h>
+#if defined(XP_WIN)
+#include <io.h>
+#endif
+#ifdef XP_UNIX
+#include <unistd.h>
+#endif
+
+//---------------------------------------------------------------------------
+void header()
+{
+char *header=
+"#ifndef nsCyrillicClass_h__\n"
+"#define nsCyrillicClass_h__\n"
+"/* PLEASE DO NOT EDIT THIS FILE DIRECTLY. THIS FILE IS GENERATED BY \n"
+" GenCyrllicClass found in mozilla/intl/chardet/tools\n"
+" */\n";
+ printf(header);
+}
+//---------------------------------------------------------------------------
+void footer()
+{
+ printf("#endif\n");
+}
+//---------------------------------------------------------------------------
+void npl()
+{
+char *npl=
+"/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
+"/* This Source Code Form is subject to the terms of the Mozilla Public\n"
+" * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
+" * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n";
+ printf(npl);
+}
+//---------------------------------------------------------------------------
+static nsIUnicodeEncoder* gKOI8REncoder = nullptr;
+static nsICharsetConverterManager* gCCM = nullptr;
+
+//---------------------------------------------------------------------------
+uint8_t CyrillicClass(nsIUnicodeDecoder* decoder, uint8_t byte)
+{
+ char16_t ubuf[2];
+ uint8_t bbuf[2];
+
+ int32_t blen = 1;
+ int32_t ulen = 1;
+ nsresult res = decoder->Convert((char*)&byte, &blen, ubuf, &ulen);
+ if(NS_SUCCEEDED(res) && (1 == ulen ))
+ {
+ ubuf[0] = nsCRT::ToUpper(ubuf[0]);
+ blen=1;
+ res = gKOI8REncoder->Convert(ubuf,&ulen,(char*)bbuf,&blen);
+ if(NS_SUCCEEDED(res) && (1 == blen))
+ {
+ if(0xe0 <= bbuf[0])
+ {
+ return bbuf[0] - (uint8_t)0xdf;
+ }
+ }
+ }
+ return 0;
+}
+//---------------------------------------------------------------------------
+void genCyrillicClass(const char* name, const char* charset)
+{
+ nsIUnicodeDecoder *decoder = nullptr;
+ nsresult res = NS_OK;
+ nsAutoString str(charset);
+ res = gCCM->GetUnicodeDecoder(&str, &decoder);
+ if(NS_FAILED(res))
+ {
+ printf("cannot locate %s Decoder\n", charset);
+ return;
+ }
+ printf("static const uint8_t %sMap [128] = {\n",name);
+ uint8_t i,j;
+ for(i=0x80;i!=0x00;i+=0x10)
+ {
+ for(j=0;j<=0x0f;j++)
+ {
+ uint8_t cls = CyrillicClass(decoder, i+j);
+ printf(" %2d, ",cls);
+ }
+ printf("\n");
+ }
+ printf("};\n");
+ NS_IF_RELEASE(decoder);
+}
+//---------------------------------------------------------------------------
+
+
+int main(int argc, char** argv) {
+ nsresult res = nullptr;
+
+ nsCOMPtr<nsICharsetConverterManager> gCCM = do_GetService(kCharsetConverterManagerCID, &res);
+
+ if(NS_FAILED(res) && (nullptr != gCCM))
+ {
+ printf("cannot locate CharsetConverterManager\n");
+ return(-1);
+ }
+ nsAutoString koi8r("KOI8-R");
+ res = gCCM->GetUnicodeEncoder(&koi8r,&gKOI8REncoder);
+ if(NS_FAILED(res) && (nullptr != gKOI8REncoder))
+ {
+ printf("cannot locate KOI8-R Encoder\n");
+ return(-1);
+ }
+
+
+ npl();
+ header();
+
+ genCyrillicClass("KOI8", "KOI8-R");
+ genCyrillicClass("CP1251", "windows-1251");
+ genCyrillicClass("IBM866", "IBM866");
+ genCyrillicClass("ISO88595", "ISO-8859-5");
+ genCyrillicClass("MacCyrillic", "x-mac-cyrillic");
+ footer();
+ NS_IF_RELEASE(gKOI8REncoder);
+ return(0);
+};