summaryrefslogtreecommitdiffstats
path: root/intl/chardet/tools
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /intl/chardet/tools
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'intl/chardet/tools')
-rw-r--r--intl/chardet/tools/GenCyrillicClass.cpp135
-rw-r--r--intl/chardet/tools/charfreq.pl50
-rw-r--r--intl/chardet/tools/charfreqtostat.pl95
-rwxr-xr-xintl/chardet/tools/gen.cmd18
-rw-r--r--intl/chardet/tools/genbig5.pl42
-rw-r--r--intl/chardet/tools/gencp1252.pl55
-rw-r--r--intl/chardet/tools/gencyrillic.pl65
-rw-r--r--intl/chardet/tools/geneucjp.pl47
-rw-r--r--intl/chardet/tools/geneuckr.pl42
-rw-r--r--intl/chardet/tools/geneuctw.pl49
-rw-r--r--intl/chardet/tools/gengb18030.pl44
-rw-r--r--intl/chardet/tools/gengb2312.pl41
-rw-r--r--intl/chardet/tools/genhz.pl57
-rw-r--r--intl/chardet/tools/geniso2022cn.pl58
-rw-r--r--intl/chardet/tools/geniso2022jp.pl49
-rw-r--r--intl/chardet/tools/geniso2022kr.pl55
-rw-r--r--intl/chardet/tools/gensjis.pl46
-rw-r--r--intl/chardet/tools/genutf8.pl189
-rw-r--r--intl/chardet/tools/genverifier.pm175
19 files changed, 1312 insertions, 0 deletions
diff --git a/intl/chardet/tools/GenCyrillicClass.cpp b/intl/chardet/tools/GenCyrillicClass.cpp
new file mode 100644
index 000000000..180651a49
--- /dev/null
+++ b/intl/chardet/tools/GenCyrillicClass.cpp
@@ -0,0 +1,135 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "nsICharsetConverterManager.h"
+#include <iostream.h>
+#include "nsISupports.h"
+#include "nsIComponentManager.h"
+#include "nsIServiceManager.h"
+#include "nsIUnicodeDecoder.h"
+#include "nsIUnicodeEncoder.h"
+#include "nsCRT.h"
+#include <stdio.h>
+#include <stdlib.h>
+#if defined(XP_WIN)
+#include <io.h>
+#endif
+#ifdef XP_UNIX
+#include <unistd.h>
+#endif
+
+//---------------------------------------------------------------------------
+void header()
+{
+char *header=
+"#ifndef nsCyrillicClass_h__\n"
+"#define nsCyrillicClass_h__\n"
+"/* PLEASE DO NOT EDIT THIS FILE DIRECTLY. THIS FILE IS GENERATED BY \n"
+" GenCyrllicClass found in mozilla/intl/chardet/tools\n"
+" */\n";
+ printf(header);
+}
+//---------------------------------------------------------------------------
+void footer()
+{
+ printf("#endif\n");
+}
+//---------------------------------------------------------------------------
+void npl()
+{
+char *npl=
+"/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
+"/* This Source Code Form is subject to the terms of the Mozilla Public\n"
+" * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
+" * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n";
+ printf(npl);
+}
+//---------------------------------------------------------------------------
+static nsIUnicodeEncoder* gKOI8REncoder = nullptr;
+static nsICharsetConverterManager* gCCM = nullptr;
+
+//---------------------------------------------------------------------------
+uint8_t CyrillicClass(nsIUnicodeDecoder* decoder, uint8_t byte)
+{
+ char16_t ubuf[2];
+ uint8_t bbuf[2];
+
+ int32_t blen = 1;
+ int32_t ulen = 1;
+ nsresult res = decoder->Convert((char*)&byte, &blen, ubuf, &ulen);
+ if(NS_SUCCEEDED(res) && (1 == ulen ))
+ {
+ ubuf[0] = nsCRT::ToUpper(ubuf[0]);
+ blen=1;
+ res = gKOI8REncoder->Convert(ubuf,&ulen,(char*)bbuf,&blen);
+ if(NS_SUCCEEDED(res) && (1 == blen))
+ {
+ if(0xe0 <= bbuf[0])
+ {
+ return bbuf[0] - (uint8_t)0xdf;
+ }
+ }
+ }
+ return 0;
+}
+//---------------------------------------------------------------------------
+void genCyrillicClass(const char* name, const char* charset)
+{
+ nsIUnicodeDecoder *decoder = nullptr;
+ nsresult res = NS_OK;
+ nsAutoString str(charset);
+ res = gCCM->GetUnicodeDecoder(&str, &decoder);
+ if(NS_FAILED(res))
+ {
+ printf("cannot locate %s Decoder\n", charset);
+ return;
+ }
+ printf("static const uint8_t %sMap [128] = {\n",name);
+ uint8_t i,j;
+ for(i=0x80;i!=0x00;i+=0x10)
+ {
+ for(j=0;j<=0x0f;j++)
+ {
+ uint8_t cls = CyrillicClass(decoder, i+j);
+ printf(" %2d, ",cls);
+ }
+ printf("\n");
+ }
+ printf("};\n");
+ NS_IF_RELEASE(decoder);
+}
+//---------------------------------------------------------------------------
+
+
+int main(int argc, char** argv) {
+ nsresult res = nullptr;
+
+ nsCOMPtr<nsICharsetConverterManager> gCCM = do_GetService(kCharsetConverterManagerCID, &res);
+
+ if(NS_FAILED(res) && (nullptr != gCCM))
+ {
+ printf("cannot locate CharsetConverterManager\n");
+ return(-1);
+ }
+ nsAutoString koi8r("KOI8-R");
+ res = gCCM->GetUnicodeEncoder(&koi8r,&gKOI8REncoder);
+ if(NS_FAILED(res) && (nullptr != gKOI8REncoder))
+ {
+ printf("cannot locate KOI8-R Encoder\n");
+ return(-1);
+ }
+
+
+ npl();
+ header();
+
+ genCyrillicClass("KOI8", "KOI8-R");
+ genCyrillicClass("CP1251", "windows-1251");
+ genCyrillicClass("IBM866", "IBM866");
+ genCyrillicClass("ISO88595", "ISO-8859-5");
+ genCyrillicClass("MacCyrillic", "x-mac-cyrillic");
+ footer();
+ NS_IF_RELEASE(gKOI8REncoder);
+ return(0);
+};
diff --git a/intl/chardet/tools/charfreq.pl b/intl/chardet/tools/charfreq.pl
new file mode 100644
index 000000000..4232d4765
--- /dev/null
+++ b/intl/chardet/tools/charfreq.pl
@@ -0,0 +1,50 @@
+#!/usr/bin/perl
+#!/usr/bin/perl
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n";
+@count;
+while(<STAT>)
+{
+ @k = split(/\s+/, $_);
+ $count{$k[0]} = $k[1];
+}
+$count = 0;
+while(<STDIN>)
+{
+ @ck = split /\s*/, $_;
+ $s = 0;
+ $fb = 0;
+ $cl = $#ck;
+ $j = 0;
+ while($j < $cl) {
+ $cc = unpack("C", $ck[$j]);
+ if(0 eq $s ) {
+ if($cc > 0x80) {
+ if($cc > 0xa0) {
+ $fb = $ck[$j];
+ $s = 2;
+ } else {
+ $s = 1;
+ }
+ }
+ } elsif (1 eq $s) {
+ } else {
+ if($cc > 0xa0) {
+ $fb .= $ck[$j];
+ $count{$fb}++;
+ print $fb . " " .$count{$fb} . "\n";
+ $s = 0;
+ } else {
+ $s = 1;
+ }
+ }
+ $j = $j + 1;
+ }
+}
+foreach $c (sort(keys( %count )))
+{
+ print $c . " ". $count{$c} . "\n";
+}
diff --git a/intl/chardet/tools/charfreqtostat.pl b/intl/chardet/tools/charfreqtostat.pl
new file mode 100644
index 000000000..04af0c82c
--- /dev/null
+++ b/intl/chardet/tools/charfreqtostat.pl
@@ -0,0 +1,95 @@
+#!/usr/bin/perl
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+sub GenNPL {
+ my($ret) = << "END_NPL";
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+END_NPL
+
+ return $ret;
+}
+
+print GenNPL();
+$total=0;
+@h;
+@l;
+
+while(<STDIN>)
+{
+ @k = split(/\s+/, $_);
+ @i = unpack("CCCC", $k[0]);
+# printf("%x %x %s",$i[0] , $i[1] , "[" . $k[0] . "] " . $i . " " . $j . " " . $k[1] ."\n");
+ if((0xA1 <= $i[0]) && (0xA1 <= $i[1])){
+ $total += $k[1];
+ $v = $i[0] - 0x00A1;
+ $h[$v] += $k[1];
+ $u = $i[1] - 0x00A1;
+ $l[$u] += $k[1];
+# print "hello $v $h[$v] $u $l[$u]\n";
+ }
+}
+
+
+$ffh = 0.0;
+$ffl = 0.0;
+for($i=0x00A1;$i< 0x00FF ; $i++)
+{
+ $fh[$i - 0x00a1] = $h[$i- 0x00a1] / $total;
+ $ffh += $fh[$i - 0x00a1];
+
+ $fl[$i - 0x00a1] = $l[$i- 0x00a1] / $total;
+ $ffl += $fl[$i - 0x00a1];
+}
+$mh = $ffh / 94.0;
+$ml = $ffl / 94.0;
+
+$sumh=0.0;
+$suml=0.0;
+for($i=0x00A1;$i< 0x00FF ; $i++)
+{
+ $sh = $fh[$i - 0x00a1] - $mh;
+ $sh *= $sh;
+ $sumh += $sh;
+
+ $sl = $fl[$i - 0x00a1] - $ml;
+ $sl *= $sl;
+ $suml += $sl;
+}
+$sumh /= 94.0;
+$suml /= 94.0;
+$stdh = sqrt($sumh);
+$stdl = sqrt($suml);
+
+print "{\n";
+print " {\n";
+for($i=0x00A1;$i< 0x00FF ; $i++)
+{
+ if($i eq 0xfe) {
+ printf(" %.6ff \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i);
+ } else {
+ printf(" %.6ff, \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i);
+ }
+}
+print " },\n";
+printf ("%.6ff, \/\/ Lead Byte StdDev\n", $stdh);
+printf ("%.6ff, \/\/ Lead Byte Mean\n", $mh);
+printf ("%.6ff, \/\/ Lead Byte Weight\n", $stdh / ($stdh + $stdl));
+print " {\n";
+for($i=0x00A1;$i< 0x00FF ; $i++)
+{
+ if($i eq 0xfe) {
+ printf(" %.6ff \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i);
+ } else {
+ printf(" %.6ff, \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i);
+ }
+}
+print " },\n";
+printf ("%.6ff, \/\/ Trail Byte StdDev\n", $stdl);
+printf ("%.6ff, \/\/ Trail Byte Mean\n", $ml);
+printf ("%.6ff \/\/ Trial Byte Weight\n", $stdl / ($stdh + $stdl));
+print "};\n";
diff --git a/intl/chardet/tools/gen.cmd b/intl/chardet/tools/gen.cmd
new file mode 100755
index 000000000..56ca34bc9
--- /dev/null
+++ b/intl/chardet/tools/gen.cmd
@@ -0,0 +1,18 @@
+REM This Source Code Form is subject to the terms of the Mozilla Public
+REM License, v. 2.0. If a copy of the MPL was not distributed with this
+REM file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+perl gencp1252.pl > ..\src\nsCP1252Verifier.h
+perl geneucjp.pl > ..\src\nsEUCJPVerifier.h
+perl geniso2022jp.pl > ..\src\nsISO2022JPVerifier.h
+perl gensjis.pl > ..\src\nsSJISVerifier.h
+perl genutf8.pl > ..\src\nsUTF8Verifier.h
+perl geneuckr.pl > ..\src\nsEUCKRVerifier.h
+perl gengb2312.pl > ..\src\nsGB2312Verifier.h
+perl genbig5.pl > ..\src\nsBIG5Verifier.h
+perl geneuctw.pl > ..\src\nsEUCTWVerifier.h
+perl genucs2be.pl > ..\src\nsUCS2BEVerifier.h
+perl genucs2le.pl > ..\src\nsUCS2LEVerifier.h
+perl genhz.pl > ..\src\nsHZVerifier.h
+perl geniso2022kr.pl > ..\src\nsISO2022KRVerifier.h
+perl geniso2022cn.pl > ..\src\nsISO2022CNVerifier.h
diff --git a/intl/chardet/tools/genbig5.pl b/intl/chardet/tools/genbig5.pl
new file mode 100644
index 000000000..8e3a777cb
--- /dev/null
+++ b/intl/chardet/tools/genbig5.pl
@@ -0,0 +1,42 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@big5_cls);
+my(@big5_st);
+my($big5_ver);
+
+
+@big5_cls = (
+ [ 0x00 , 0x00 , 1 ],
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0x01 , 0x3f , 1 ],
+ [ 0x40 , 0x7e , 2 ],
+ [ 0x7f , 0x7f , 1 ],
+ [ 0xff , 0xff , 0 ],
+ [ 0x80 , 0xa0 , 4 ],
+ [ 0xa1 , 0xfe , 3 ],
+);
+
+package genverifier;
+@big5_st = (
+# 0 1 2 3 4
+ 1, 0, 0, 3, 1, # state 0
+ 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 0, 0, 0, # state 3
+);
+
+
+$big5_ver = genverifier::GenVerifier("BIG5", "Big5", \@big5_cls, 5, \@big5_st);
+print $big5_ver;
+
+
+
diff --git a/intl/chardet/tools/gencp1252.pl b/intl/chardet/tools/gencp1252.pl
new file mode 100644
index 000000000..debc53ca5
--- /dev/null
+++ b/intl/chardet/tools/gencp1252.pl
@@ -0,0 +1,55 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@cp1252_cls);
+my(@cp1252_st);
+my($cp1252_ver);
+
+
+@cp1252_cls = (
+ [ 0x00 , 0x00 , 1 ],
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0x81 , 0x81 , 0 ],
+ [ 0x8d , 0x8d , 0 ],
+ [ 0x8f , 0x8f , 0 ],
+ [ 0x90 , 0x90 , 0 ],
+ [ 0x9d , 0x9d , 0 ],
+ [ 0xc0 , 0xd6 , 1 ],
+ [ 0xd8 , 0xf6 , 1 ],
+ [ 0xf8 , 0xff , 1 ],
+ [ 0x8a , 0x8a , 1 ],
+ [ 0x8c , 0x8c , 1 ],
+ [ 0x8e , 0x8e , 1 ],
+ [ 0x9a , 0x9a , 1 ],
+ [ 0x9c , 0x9c , 1 ],
+ [ 0x9e , 0x9e , 1 ],
+ [ 0x9f , 0x9f , 1 ],
+ [ 0x00 , 0xff , 2 ],
+);
+
+package genverifier;
+@cp1252_st = (
+# 0 1 2
+ 1, 3, 0, # Start State - 0
+ 1, 1, 1, # Error State - 1
+ 2, 2, 2, # ItsMe State - 2
+ 1, 4, 0, # State - 3
+ 1, 5, 4, # State - 4
+ 1, 1, 4, # State - 5
+);
+
+
+$cp1252_ver = genverifier::GenVerifier("CP1252", "windows-1252",
+ \@cp1252_cls, 3, \@cp1252_st);
+print $cp1252_ver;
+
+
+
diff --git a/intl/chardet/tools/gencyrillic.pl b/intl/chardet/tools/gencyrillic.pl
new file mode 100644
index 000000000..51bd6e456
--- /dev/null
+++ b/intl/chardet/tools/gencyrillic.pl
@@ -0,0 +1,65 @@
+#!/usr/local/bin/perl
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use StatKoi '.' ;
+
+open(FILE, "> ../src/nsCyrillicProb.h") or die "cannot open nsCyrillicDetector.h";
+
+print FILE <<EOF;
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsCyrillicDetector_h__
+#define nsCyrillicDetector_h__
+/*
+ DO NOT EDIT THIS FILE !!!
+ This file is generated by the perl script in
+ mozilla/intl/chardet/tools/gencyrillic.pl
+
+ To ues that script, you need to grab StatKoi.pm file from
+ the "Cyrillic Software Suite" written by John Neystdt.
+ http://www.neystadt.org/cyrillic (You can also find it from CPAN)
+ */
+EOF
+$table = \%Lingua::DetectCharset::StatKoi::StatsTableKoi;
+print FILE "const uint16_t gCyrillicProb[33][33] = {";
+ print FILE "{ \n";
+ print FILE "0,\n";
+ for($j = 0xc0; $j < 0xe0; $j++)
+ {
+ print FILE "0, \t";
+ if( 7 == ( $j % 8) )
+ {
+ print FILE "\n";
+ }
+ }
+ print FILE "\n}, \n";
+for($i = 0xc0; $i < 0xe0; $i++)
+{
+ print FILE "{ \n";
+ print FILE "0,\n";
+ for($j = 0xc0; $j < 0xe0; $j++)
+ {
+ $key = chr($i) . chr($j);
+ if(exists($table->{$key}))
+ {
+ $v = $table->{$key};
+ } else {
+ $v = 0;
+ }
+ print FILE $v . ", \t";
+ if( 7 == ( $j % 8) )
+ {
+ print FILE "\n";
+ }
+ }
+ print FILE "\n}, \n";
+}
+print FILE "};\n";
+print FILE "#endif\n";
diff --git a/intl/chardet/tools/geneucjp.pl b/intl/chardet/tools/geneucjp.pl
new file mode 100644
index 000000000..692be15ab
--- /dev/null
+++ b/intl/chardet/tools/geneucjp.pl
@@ -0,0 +1,47 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@eucjp_cls);
+my(@eucjp_st);
+my($eucjp_ver);
+
+
+@eucjp_cls = (
+ [ 0x0e , 0x0f , 5 ],
+ [ 0xe0 , 0xfe , 0 ],
+ [ 0x8e , 0x8e , 1 ],
+ [ 0xa1 , 0xdf , 2 ],
+ [ 0x8f , 0x8f , 3 ],
+ [ 0x01 , 0x1a , 4 ],
+ [ 0x1c , 0x7f , 4 ],
+ [ 0x00 , 0x00 , 4 ],
+ [ 0x1b , 0x1b , 5 ],
+ [ 0x80 , 0x8d , 5 ],
+ [ 0xa0 , 0xa0 , 5 ],
+ [ 0x80 , 0xff , 5 ]
+);
+
+package genverifier;
+@eucjp_st = (
+# 0 1 2 3 4 5
+ 3, 4, 3, 5, 0, 1, # state 0
+ 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 0, 1, 0, 1, 1, 1, # state 3
+ 1, 1, 0, 1, 1, 1, # state 4
+ 3, 1, 3, 1, 1, 1, # state 5
+);
+
+
+$eucjp_ver = genverifier::GenVerifier("EUCJP", "EUC-JP", \@eucjp_cls, 6, \@eucjp_st);
+print $eucjp_ver;
+
+
+
diff --git a/intl/chardet/tools/geneuckr.pl b/intl/chardet/tools/geneuckr.pl
new file mode 100644
index 000000000..007810a6a
--- /dev/null
+++ b/intl/chardet/tools/geneuckr.pl
@@ -0,0 +1,42 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@euckr_cls);
+my(@euckr_st);
+my($euckr_ver);
+
+
+@euckr_cls = (
+ [ 0x00 , 0x00 , 1 ],
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0x01 , 0x7f , 1 ],
+ [ 0x80 , 0xa0 , 0 ],
+ [ 0xff , 0xff , 0 ],
+ [ 0xad , 0xaf , 3 ],
+ [ 0xc9 , 0xc9 , 3 ],
+ [ 0xa1 , 0xfe , 2 ],
+);
+
+package genverifier;
+@euckr_st = (
+# 0 1 2 3
+ 1, 0, 3, 1, # state 0
+ 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 0, 0, # state 3
+);
+
+
+$euckr_ver = genverifier::GenVerifier("EUCKR", "EUC-KR", \@euckr_cls, 4, \@euckr_st);
+print $euckr_ver;
+
+
+
diff --git a/intl/chardet/tools/geneuctw.pl b/intl/chardet/tools/geneuctw.pl
new file mode 100644
index 000000000..88453155e
--- /dev/null
+++ b/intl/chardet/tools/geneuctw.pl
@@ -0,0 +1,49 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@euctw_cls);
+my(@euctw_st);
+my($euctw_ver);
+
+
+@euctw_cls = (
+ [ 0x00 , 0x00 , 2 ],
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0x01 , 0x7f , 2 ],
+ [ 0x8e , 0x8e , 6 ],
+ [ 0x80 , 0xa0 , 0 ],
+ [ 0xff , 0xff , 0 ],
+ [ 0xa1 , 0xa1 , 3 ],
+ [ 0xa2 , 0xa7 , 4 ],
+ [ 0xa8 , 0xa9 , 5 ],
+ [ 0xaa , 0xc1 , 1 ],
+ [ 0xc2 , 0xc2 , 3 ],
+ [ 0xc3 , 0xc3 , 1 ],
+ [ 0xc4 , 0xfe , 3 ],
+);
+
+package genverifier;
+@euctw_st = (
+# 0 1 2 3 4 5 6
+ 1, 1, 0, 3, 3, 3, 4, # state 0
+ 1, 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 0, 1, 0, 0, 0, 1, # state 3
+ 1, 1, 1, 1, 5, 1, 1, # state 4
+ 1, 0, 1, 0, 0, 0, 1, # state 5
+);
+
+
+$euctw_ver = genverifier::GenVerifier("EUCTW", "x-euc-tw", \@euctw_cls, 7, \@euctw_st);
+print $euctw_ver;
+
+
+
diff --git a/intl/chardet/tools/gengb18030.pl b/intl/chardet/tools/gengb18030.pl
new file mode 100644
index 000000000..654710b2c
--- /dev/null
+++ b/intl/chardet/tools/gengb18030.pl
@@ -0,0 +1,44 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@gb18030_cls);
+my(@gb18030_st);
+my($gb18030_ver);
+
+
+@gb18030_cls = (
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0x30 , 0x39 , 3 ],
+ [ 0x00 , 0x3f , 1 ],
+ [ 0x40 , 0x7e , 2 ],
+ [ 0x7f , 0x7f , 4 ],
+ [ 0x80 , 0x80 , 5 ],
+ [ 0x81 , 0xfe , 6 ],
+ [ 0xff , 0xff , 0 ],
+);
+
+package genverifier;
+@gb18030_st = (
+# 0 1 2 3 4 5 6
+ 1, 0, 0, 0, 0, 0, 3, # state 0
+ 1, 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 0, 4, 1, 0, 0, # state 3, multibytes, 1st byte identified
+ 1, 1, 1, 1, 1, 1, 5, # state 4, multibytes, 2nd byte identified
+ 1, 1, 1, 2, 1, 1, 1, # state 5, multibytes, 3rd byte identified
+);
+
+
+$gb18030_ver = genverifier::GenVerifier("gb18030", "gb18030", \@gb18030_cls, 7, \@gb18030_st);
+print $gb18030_ver;
+
+
+
diff --git a/intl/chardet/tools/gengb2312.pl b/intl/chardet/tools/gengb2312.pl
new file mode 100644
index 000000000..57d86926b
--- /dev/null
+++ b/intl/chardet/tools/gengb2312.pl
@@ -0,0 +1,41 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@gb2312_cls);
+my(@gb2312_st);
+my($gb2312_ver);
+
+
+@gb2312_cls = (
+ [ 0x00 , 0x00 , 1 ],
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0x01 , 0x7f , 1 ],
+ [ 0x80 , 0xa0 , 0 ],
+ [ 0xff , 0xff , 0 ],
+ [ 0xaa , 0xaf , 3 ],
+ [ 0xa1 , 0xfe , 2 ],
+);
+
+package genverifier;
+@gb2312_st = (
+# 0 1 2 3
+ 1, 0, 3, 1, # state 0
+ 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 0, 0, # state 3
+);
+
+
+$gb2312_ver = genverifier::GenVerifier("GB2312", "GB2312", \@gb2312_cls, 4, \@gb2312_st);
+print $gb2312_ver;
+
+
+
diff --git a/intl/chardet/tools/genhz.pl b/intl/chardet/tools/genhz.pl
new file mode 100644
index 000000000..c58eb4675
--- /dev/null
+++ b/intl/chardet/tools/genhz.pl
@@ -0,0 +1,57 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@hz_cls);
+my(@hz_st);
+my($hz_ver);
+
+
+#
+#
+# > 0x80 - 1
+# ~ - 2
+# LF - 3
+# { - 4
+# } - 5
+#
+@hz_cls = (
+ [ 0x01 , 0x1a , 0 ],
+ [ 0x7e , 0x7e , 2 ],
+ [ 0x0a , 0x0a , 3 ],
+ [ 0x7b , 0x7b , 4 ],
+ [ 0x7d , 0x7d , 5 ],
+ [ 0x1c , 0x7f , 0 ],
+ [ 0x0e , 0x0f , 1 ],
+ [ 0x1b , 0x1b , 1 ],
+ [ 0x00 , 0x00 , 1 ],
+ [ 0x80 , 0xff , 1 ]
+);
+
+
+#
+#
+package genverifier;
+@hz_st = (
+# 0 1 2 3 4 5
+ 0, 1, 3, 0, 0, 0, # Start State - 0
+ 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 0, 0, 4, 1, # state 3 - got ~
+ 5, 1, 6, 1, 5, 5, # state 4 - got ~ {
+ 4, 1, 4, 1, 4, 4, # state 5 - got ~ { X
+ 4, 1, 4, 1, 4, 2, # state 6 - got ~ { [X X]* ~
+);
+
+$hz_ver = genverifier::GenVerifier("HZ", "HZ-GB-2312",
+ \@hz_cls, 6, \@hz_st);
+print $hz_ver;
+
+
+
diff --git a/intl/chardet/tools/geniso2022cn.pl b/intl/chardet/tools/geniso2022cn.pl
new file mode 100644
index 000000000..c4a43caae
--- /dev/null
+++ b/intl/chardet/tools/geniso2022cn.pl
@@ -0,0 +1,58 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@iso2022cn_cls);
+my(@iso2022cn_st);
+my($iso2022cn_ver);
+
+
+#
+#
+# ESC - 1
+# > 0x80 - 2
+# $ - 3
+# ) - 4
+# * - 5
+# A G - 6
+# H - 7
+# N O - 8
+#
+@iso2022cn_cls = (
+ [ 0x01 , 0x1a , 0 ],
+ [ 0x29 , 0x29 , 3 ],
+ [ 0x43 , 0x43 , 4 ],
+ [ 0x1c , 0x7f , 0 ],
+ [ 0x1b , 0x1b , 1 ],
+ [ 0x00 , 0x00 , 2 ],
+ [ 0x80 , 0xff , 2 ]
+);
+
+
+#
+# ESC$((([)][AG])|([*]H))|[NO])
+#
+package genverifier;
+@iso2022cn_st = (
+# 0 1 2 3 4 5 6 7 8
+ 0, 3, 1, 0, 0, 0, 0, 0, 0, # Start State - 0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 1, 4, 1, 1, 1, 1, 2, # state 3 - got ESC
+ 1, 1, 1, 1, 5, 6, 1, 1, 1, # state 4 - got ESC $
+ 1, 1, 1, 1, 1, 1, 2, 1, 1, # state 5 - got ESC $ )
+ 1, 1, 1, 1, 1, 1, 1, 2, 1, # state 6 - got ESC $ *
+);
+
+$iso2022cn_ver = genverifier::GenVerifier("ISO2022CN", "ISO-2022-CN",
+ \@iso2022cn_cls, 9, \@iso2022cn_st);
+print $iso2022cn_ver;
+
+
+
diff --git a/intl/chardet/tools/geniso2022jp.pl b/intl/chardet/tools/geniso2022jp.pl
new file mode 100644
index 000000000..4408fbeb0
--- /dev/null
+++ b/intl/chardet/tools/geniso2022jp.pl
@@ -0,0 +1,49 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@iso2022jp_cls);
+my(@iso2022jp_st);
+my($iso2022jp_ver);
+
+# 1:ESC 3:'(' 4:'B' 5:'J' 6:'@' 7:'$' 8:'D' 9:'I'
+@iso2022jp_cls = (
+ [ 0x0e , 0x0f , 2 ],
+ [ 0x28 , 0x28 , 3 ],
+ [ 0x42 , 0x42 , 4 ],
+ [ 0x4a , 0x4a , 5 ],
+ [ 0x40 , 0x40 , 6 ],
+ [ 0x24 , 0x24 , 7 ],
+ [ 0x44 , 0x44 , 8 ],
+ [ 0x49 , 0x49 , 9 ],
+ [ 0x01 , 0x1a , 0 ],
+ [ 0x1c , 0x7f , 0 ],
+ [ 0x1b , 0x1b , 1 ],
+ [ 0x00 , 0x00 , 2 ],
+ [ 0x80 , 0xff , 2 ]
+);
+
+package genverifier;
+@iso2022jp_st = (
+# 0 1 2 3 4 5 6 7 8 9
+ 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, # Start State - 0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 1, 5, 1, 1, 1, 4, 1, 1, # got ESC
+ 1, 1, 1, 6, 2, 1, 2, 1, 1, 1, # got ESC $
+ 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, # got ESC (
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, # got ESC $ (
+);
+
+$iso2022jp_ver = genverifier::GenVerifier("ISO2022JP", "ISO-2022-JP",
+ \@iso2022jp_cls, 10, \@iso2022jp_st);
+print $iso2022jp_ver;
+
+
+
diff --git a/intl/chardet/tools/geniso2022kr.pl b/intl/chardet/tools/geniso2022kr.pl
new file mode 100644
index 000000000..f56bcf9fb
--- /dev/null
+++ b/intl/chardet/tools/geniso2022kr.pl
@@ -0,0 +1,55 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@iso2022kr_cls);
+my(@iso2022kr_st);
+my($iso2022kr_ver);
+
+
+#
+#
+# ESC - 1
+# > 0x80 - 2
+# $ - 3
+# ) - 4
+# C - 5
+#
+@iso2022kr_cls = (
+ [ 0x01 , 0x1a , 0 ],
+ [ 0x24 , 0x24 , 3 ],
+ [ 0x29 , 0x29 , 4 ],
+ [ 0x43 , 0x43 , 5 ],
+ [ 0x1c , 0x7f , 0 ],
+ [ 0x1b , 0x1b , 1 ],
+ [ 0x00 , 0x00 , 2 ],
+ [ 0x80 , 0xff , 2 ]
+);
+
+
+#
+# ESC$)C
+#
+package genverifier;
+@iso2022kr_st = (
+# 0 1 2 3 4 5
+ 0, 3, 1, 0, 0, 0, # Start State - 0
+ 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 1, 4, 1, 1, # state 3 - got ESC
+ 1, 1, 1, 1, 5, 1, # state 4 - got ESC $
+ 1, 1, 1, 1, 1, 2, # state 5 - got ESC $ )
+);
+
+$iso2022kr_ver = genverifier::GenVerifier("ISO2022KR", "ISO-2022-KR",
+ \@iso2022kr_cls, 6, \@iso2022kr_st);
+print $iso2022kr_ver;
+
+
+
diff --git a/intl/chardet/tools/gensjis.pl b/intl/chardet/tools/gensjis.pl
new file mode 100644
index 000000000..20966d03e
--- /dev/null
+++ b/intl/chardet/tools/gensjis.pl
@@ -0,0 +1,46 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@sjis_cls);
+my(@sjis_st);
+my($sjis_ver);
+
+@sjis_cls = (
+ [ 0x00 , 0x00 , 0 ],
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0xfd , 0xff , 0 ],
+ [ 0x85 , 0x86 , 3 ],
+ [ 0xeb , 0xec , 5 ],
+ [ 0x01 , 0x1a , 1 ],
+ [ 0x1c , 0x3f , 1 ],
+ [ 0x7f , 0x7f , 1 ],
+ [ 0x40 , 0x7e , 2 ],
+ [ 0xa1 , 0xdf , 2 ],
+ [ 0x80 , 0x9f , 3 ],
+ [ 0xa0 , 0xa0 , 4 ],
+ [ 0xe0 , 0xea , 3 ],
+ [ 0xed , 0xfc , 4 ],
+);
+
+package genverifier;
+@sjis_st = (
+# 0 1 2 3 4 5
+ 1, 0, 0, 3, 1, 1, # Start State - 0
+ 1, 1, 1, 1, 1, 1, # Error State - 1
+ 2, 2, 2, 2, 2, 2, # ItsMe State - 2
+ 1, 1, 0, 0, 0, 0, # State - 3
+);
+
+$sjis_ver = genverifier::GenVerifier("SJIS", "Shift_JIS", \@sjis_cls, 6, \@sjis_st);
+print $sjis_ver;
+
+
+
diff --git a/intl/chardet/tools/genutf8.pl b/intl/chardet/tools/genutf8.pl
new file mode 100644
index 000000000..437dd535b
--- /dev/null
+++ b/intl/chardet/tools/genutf8.pl
@@ -0,0 +1,189 @@
+#!/usr/local/bin/perl
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+use strict;
+require "genverifier.pm";
+use genverifier;
+
+
+my(@utf8_cls);
+my(@utf8_st);
+my($utf8_ver);
+
+#
+#
+# UTF8 encode the UCS4 into 1 to 4 bytes
+#
+# 1 byte 00 00 00 00 00 00 00 7f
+# 2 bytes 00 00 00 80 00 00 07 ff
+# 3 bytes 00 00 08 00 00 00 ff ff
+# 4 bytes 00 01 00 00 00 10 ff ff
+#
+# However, since Surrogate area should not be encoded into UTF8 as
+# a Surrogate pair, we can remove the surrogate area from UTF8
+#
+# 1 byte 00 00 00 00 00 00 00 7f
+# 2 bytes 00 00 00 80 00 00 07 ff
+# 3 bytes 00 00 08 00 00 00 d7 ff
+# 00 00 e0 00 00 00 ff ff
+# 4 bytes 00 01 00 00 00 10 ff ff
+#
+# Now we break them into 6 bits group for 2-4 bytes UTF8
+#
+# 1 byte 00 7f
+# 2 bytes 02 00 1f 3f
+# 3 bytes 00 20 00 0d 1f 3f
+# 0e 00 00 0f 3f 3f
+# 4 bytes 00 10 00 00 04 0f 3f 3f
+#
+# Break down more
+#
+# 1 byte 00 7f
+# 2 bytes 02 00 1f 3f
+# 3 bytes 00 20 00 00 3f 3f
+# 01 00 00 0c 3f 3f
+# 0d 00 00 0d 1f 3f
+# 0e 00 00 0f 3f 3f
+# 4 bytes 00 10 00 00 00 3f 3f 3f
+# 01 00 00 00 03 3f 3f 3f
+# 04 00 00 00 04 0f 3f 3f
+#
+# Now, add
+# c0 to the lead byte of 2 bytes UTF8
+# e0 to the lead byte of 3 bytes UTF8
+# f0 to the lead byte of 4 bytes UTF8
+# 80 to the trail bytes
+#
+# 1 byte 00 7f
+# 2 bytes c2 80 df bf
+# 3 bytes e0 a0 80 e0 bf bf
+# e1 80 80 ec bf bf
+# ed 80 80 ed 9f bf
+# ee 80 80 ef bf bf
+# 4 bytes f0 90 80 80 f0 bf bf bf
+# f1 80 80 80 f3 bf bf bf
+# f4 80 80 80 f4 8f bf bf
+#
+#
+# Now we can construct our state diagram
+#
+# 0:0x0e,0x0f,0x1b->Error
+# 0:[0-0x7f]->0
+# 0:[c2-df]->3
+# 0:e0->4
+# 0:[e1-ec, ee-ef]->5
+# 0:ed->6
+# 0:f0->7
+# 0:[f1-f3]->8
+# 0:f4->9
+# 0:*->Error
+# 3:[80-bf]->0
+# 3:*->Error
+# 4:[a0-bf]->3
+# 4:*->Error
+# 5:[80-bf]->3
+# 5:*->Error
+# 6:[80-9f]->3
+# 6:*->Error
+# 7:[90-bf]->5
+# 7:*->Error
+# 8:[80-bf]->5
+# 8:*->Error
+# 9:[80-8f]->5
+# 9:*->Error
+#
+# Now, we classified chars into class
+#
+# 00,0e,0f,1b:k0
+# 01-0d,10-1a,1c-7f:k1
+# 80-8f:k2
+# 90-9f:k3
+# a0-bf:k4
+# c0-c1:k0
+# c2-df:k5
+# e0:k6
+# e1-ec:k7
+# ed:k8
+# ee-ef:k7
+# f0:k9
+# f1-f3:k10
+# f4:k11
+# f5-ff:k0
+#
+# Now, let's put them into array form
+
+@utf8_cls = (
+ [ 0x00 , 0x00 , 1 ],
+ [ 0x0e , 0x0f , 0 ],
+ [ 0x1b , 0x1b , 0 ],
+ [ 0x01 , 0x0d , 1 ],
+ [ 0x10 , 0x1a , 1 ],
+ [ 0x1c , 0x7f , 1 ],
+ [ 0x80 , 0x8f , 2 ],
+ [ 0x90 , 0x9f , 3 ],
+ [ 0xa0 , 0xbf , 4 ],
+ [ 0xc0 , 0xc1 , 0 ],
+ [ 0xc2 , 0xdf , 5 ],
+ [ 0xe0 , 0xe0 , 6 ],
+ [ 0xe1 , 0xec , 7 ],
+ [ 0xed , 0xed , 8 ],
+ [ 0xee , 0xef , 7 ],
+ [ 0xf0 , 0xf0 , 9 ],
+ [ 0xf1 , 0xf3 , 10 ],
+ [ 0xf4 , 0xf4 , 11 ],
+ [ 0xf5 , 0xff , 0 ],
+);
+#
+# Now, we write the state diagram in class
+#
+# 0:k0->Error
+# 0:k1->0
+# 0:k5->3
+# 0:k6->4
+# 0:k7->5
+# 0:k8->6
+# 0:k9->7
+# 0:k10->8
+# 0:k11->9
+# 0:*->Error
+# 3:k2,k3,k4->0
+# 3:*->Error
+# 4:k4->3
+# 4:*->Error
+# 5:k2,k3,k4->3
+# 5:*->Error
+# 6:k2,k3->3
+# 6:*->Error
+# 7:k3,k4->5
+# 7:*->Error
+# 8:k2,k3,k4->5
+# 8:*->Error
+# 9:k2->5
+# 9:*->Error
+#
+# Now, let's put them into array
+#
+package genverifier;
+@utf8_st = (
+# 0 1 2 3 4 5 6 7 8 9 10 11
+ 1, 0, 1, 1, 1, 3, 4, 5, 6, 7, 8, 9, # state 0 Start
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 1 Error
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # state 2 ItsMe
+ 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, # state 3
+ 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, # state 4
+ 1, 1, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, # state 5
+ 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, # state 6
+ 1, 1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, # state 7
+ 1, 1, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, # state 8
+ 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 9
+);
+
+
+
+$utf8_ver = genverifier::GenVerifier("UTF8", "UTF-8", \@utf8_cls, 12, \@utf8_st);
+print $utf8_ver;
+
+
+
diff --git a/intl/chardet/tools/genverifier.pm b/intl/chardet/tools/genverifier.pm
new file mode 100644
index 000000000..8ccfef4d6
--- /dev/null
+++ b/intl/chardet/tools/genverifier.pm
@@ -0,0 +1,175 @@
+#!/usr/local/bin/perl
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+package genverifier;
+use strict;
+use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
+
+use Exporter;
+$VERSION = 1.00;
+@ISA = qw(Exporter);
+
+@EXPORT = qw(
+ GenVerifier
+ );
+@EXPORT_OK = qw();
+
+sub GenNPL {
+ my($ret) = << "END_MPL";
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+END_MPL
+
+ return $ret;
+}
+
+##--------------------------------------------------------------
+sub GetClass {
+ my($char, $clstbl) = @_;
+ my($l);
+ for($l =0; $l <= @$clstbl; $l++) {
+ if(($clstbl->[$l][0] <= $char) && ($char <= $clstbl->[$l][1]))
+ {
+ return $clstbl->[$l][2];
+ }
+ }
+ print "WARNING- there are no class for $char\n";
+};
+##--------------------------------------------------------------
+sub GenClassPkg {
+ my($name, $bits) = @_;
+ return GenPkg($name, $bits, "_cls");
+}
+##--------------------------------------------------------------
+sub GenStatePkg {
+ my($name, $bits) = @_;
+ return GenPkg($name, $bits, "_st");
+};
+##--------------------------------------------------------------
+sub GenPkg {
+ my($name, $bits, $tbl) = @_;
+ my($ret);
+ $ret = " {" .
+ "eIdxSft" . $bits . "bits, " .
+ "eSftMsk" . $bits . "bits, " .
+ "eBitSft" . $bits . "bits, " .
+ "eUnitMsk" . $bits . "bits, " .
+ $name . $tbl . "" .
+ " }";
+ return $ret;
+};
+##--------------------------------------------------------------
+sub Gen4BitsClass {
+ my($name, $clstbl) = @_;
+ my($i,$j);
+ my($cls);
+ my($ret);
+ $ret = "";
+ $ret .= "static const uint32_t " . $name . "_cls [ 256 / 8 ] = {\n";
+ for($i = 0; $i < 0x100; $i+= 8) {
+ $ret .= "PCK4BITS(";
+ for($j = $i; $j < $i + 8; $j++) {
+ $cls = &GetClass($j,$clstbl);
+ $ret .= sprintf("%2d", $cls) ;
+ if($j != ($i+7)) {
+ $ret .= ",";
+ }
+ }
+ if( $i+8 >= 0x100) {
+ $ret .= ") ";
+ } else {
+ $ret .= "),";
+ }
+ $ret .= sprintf(" // %02x - %02x\n", $i, ($i+7));
+ }
+ $ret .= "};\n";
+ return $ret;
+};
+##--------------------------------------------------------------
+sub GenVerifier {
+ my($name, $charset, $cls, $numcls, $st) = @_;
+ my($ret);
+ $ret = GenNPL();
+ $ret .= GenNote();
+ $ret .= GenHeader();
+ $ret .= Gen4BitsClass($name, $cls);
+ $ret .= "\n\n";
+ $ret .= Gen4BitsState($name, $st);
+ $ret .= "\n\n";
+ $ret .= "const SMModel " . $name . "SMModel = {\n";
+ $ret .= GenClassPkg($name, 4);
+ $ret .= ",\n";
+ $ret .= " " . $numcls;
+ $ret .= ",\n";
+ $ret .= GenStatePkg($name, 4);
+ $ret .= ",\n";
+ $ret .= " " . "CHAR_LEN_TABLE(" . $name . "CharLenTable),\n";
+ $ret .= ' "' . $charset . '",' . "\n";
+ $ret .= "};\n";
+ return $ret;
+
+};
+##--------------------------------------------------------------
+sub Gen4BitsState {
+ my($name, $sttbl) = @_;
+ my($lenafterpad) = (((@$sttbl-1) >> 3) + 1) << 3;
+ my($i,$j);
+ my($ret);
+ $ret = "";
+ $ret .= "static const uint32_t " . $name . "_st [ " . ($lenafterpad >> 3) . "] = {\n";
+ for($i = 0; $i < $lenafterpad ; $i+= 8) {
+ $ret .= "PCK4BITS(";
+ for($j = $i; $j < $i + 8; $j++) {
+ if(0 == $sttbl->[$j]) {
+ $ret .= "eStart";
+ } else { if(1 == $sttbl->[$j]) {
+ $ret .= "eError";
+ } else { if(2 == $sttbl->[$j]) {
+ $ret .= "eItsMe";
+ } else {
+ $ret .= sprintf(" %d", $sttbl->[$j]) ;
+ }}}
+ if($j != ($i+7)) {
+ $ret .= ",";
+ }
+ }
+ if( $i+8 >= $lenafterpad ) {
+ $ret .= ") ";
+ } else {
+ $ret .= "),";
+ }
+ $ret .= sprintf(" // %02x - %02x\n", $i, ($i+7));
+ }
+ $ret .= "};\n";
+ return $ret;
+};
+##--------------------------------------------------------------
+
+sub GenNote {
+ my($ret) = << "END_NOTE";
+/*
+ * DO NOT EDIT THIS DOCUMENT MANUALLY !!!
+ * THIS FILE IS AUTOMATICALLY GENERATED BY THE TOOLS UNDER
+ * mozilla/intl/chardet/tools/
+ * Please contact ftang\@netscape.com or mozilla-i18n\@mozilla.org
+ * if you have any question. Thanks
+ */
+END_NOTE
+ return $ret;
+}
+
+##--------------------------------------------------------------
+sub GenHeader {
+ my($ret) = << "END_HEADER";
+#include "nsVerifier.h"
+END_HEADER
+
+ return $ret;
+}
+##--------------------------------------------------------------
+1; # this should be the last line