diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /intl/chardet/tools | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'intl/chardet/tools')
-rw-r--r-- | intl/chardet/tools/GenCyrillicClass.cpp | 135 | ||||
-rw-r--r-- | intl/chardet/tools/charfreq.pl | 50 | ||||
-rw-r--r-- | intl/chardet/tools/charfreqtostat.pl | 95 | ||||
-rwxr-xr-x | intl/chardet/tools/gen.cmd | 18 | ||||
-rw-r--r-- | intl/chardet/tools/genbig5.pl | 42 | ||||
-rw-r--r-- | intl/chardet/tools/gencp1252.pl | 55 | ||||
-rw-r--r-- | intl/chardet/tools/gencyrillic.pl | 65 | ||||
-rw-r--r-- | intl/chardet/tools/geneucjp.pl | 47 | ||||
-rw-r--r-- | intl/chardet/tools/geneuckr.pl | 42 | ||||
-rw-r--r-- | intl/chardet/tools/geneuctw.pl | 49 | ||||
-rw-r--r-- | intl/chardet/tools/gengb18030.pl | 44 | ||||
-rw-r--r-- | intl/chardet/tools/gengb2312.pl | 41 | ||||
-rw-r--r-- | intl/chardet/tools/genhz.pl | 57 | ||||
-rw-r--r-- | intl/chardet/tools/geniso2022cn.pl | 58 | ||||
-rw-r--r-- | intl/chardet/tools/geniso2022jp.pl | 49 | ||||
-rw-r--r-- | intl/chardet/tools/geniso2022kr.pl | 55 | ||||
-rw-r--r-- | intl/chardet/tools/gensjis.pl | 46 | ||||
-rw-r--r-- | intl/chardet/tools/genutf8.pl | 189 | ||||
-rw-r--r-- | intl/chardet/tools/genverifier.pm | 175 |
19 files changed, 1312 insertions, 0 deletions
diff --git a/intl/chardet/tools/GenCyrillicClass.cpp b/intl/chardet/tools/GenCyrillicClass.cpp new file mode 100644 index 000000000..180651a49 --- /dev/null +++ b/intl/chardet/tools/GenCyrillicClass.cpp @@ -0,0 +1,135 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nsICharsetConverterManager.h" +#include <iostream.h> +#include "nsISupports.h" +#include "nsIComponentManager.h" +#include "nsIServiceManager.h" +#include "nsIUnicodeDecoder.h" +#include "nsIUnicodeEncoder.h" +#include "nsCRT.h" +#include <stdio.h> +#include <stdlib.h> +#if defined(XP_WIN) +#include <io.h> +#endif +#ifdef XP_UNIX +#include <unistd.h> +#endif + +//--------------------------------------------------------------------------- +void header() +{ +char *header= +"#ifndef nsCyrillicClass_h__\n" +"#define nsCyrillicClass_h__\n" +"/* PLEASE DO NOT EDIT THIS FILE DIRECTLY. THIS FILE IS GENERATED BY \n" +" GenCyrllicClass found in mozilla/intl/chardet/tools\n" +" */\n"; + printf(header); +} +//--------------------------------------------------------------------------- +void footer() +{ + printf("#endif\n"); +} +//--------------------------------------------------------------------------- +void npl() +{ +char *npl= +"/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n" +"/* This Source Code Form is subject to the terms of the Mozilla Public\n" +" * License, v. 2.0. If a copy of the MPL was not distributed with this\n" +" * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n"; + printf(npl); +} +//--------------------------------------------------------------------------- +static nsIUnicodeEncoder* gKOI8REncoder = nullptr; +static nsICharsetConverterManager* gCCM = nullptr; + +//--------------------------------------------------------------------------- +uint8_t CyrillicClass(nsIUnicodeDecoder* decoder, uint8_t byte) +{ + char16_t ubuf[2]; + uint8_t bbuf[2]; + + int32_t blen = 1; + int32_t ulen = 1; + nsresult res = decoder->Convert((char*)&byte, &blen, ubuf, &ulen); + if(NS_SUCCEEDED(res) && (1 == ulen )) + { + ubuf[0] = nsCRT::ToUpper(ubuf[0]); + blen=1; + res = gKOI8REncoder->Convert(ubuf,&ulen,(char*)bbuf,&blen); + if(NS_SUCCEEDED(res) && (1 == blen)) + { + if(0xe0 <= bbuf[0]) + { + return bbuf[0] - (uint8_t)0xdf; + } + } + } + return 0; +} +//--------------------------------------------------------------------------- +void genCyrillicClass(const char* name, const char* charset) +{ + nsIUnicodeDecoder *decoder = nullptr; + nsresult res = NS_OK; + nsAutoString str(charset); + res = gCCM->GetUnicodeDecoder(&str, &decoder); + if(NS_FAILED(res)) + { + printf("cannot locate %s Decoder\n", charset); + return; + } + printf("static const uint8_t %sMap [128] = {\n",name); + uint8_t i,j; + for(i=0x80;i!=0x00;i+=0x10) + { + for(j=0;j<=0x0f;j++) + { + uint8_t cls = CyrillicClass(decoder, i+j); + printf(" %2d, ",cls); + } + printf("\n"); + } + printf("};\n"); + NS_IF_RELEASE(decoder); +} +//--------------------------------------------------------------------------- + + +int main(int argc, char** argv) { + nsresult res = nullptr; + + nsCOMPtr<nsICharsetConverterManager> gCCM = do_GetService(kCharsetConverterManagerCID, &res); + + if(NS_FAILED(res) && (nullptr != gCCM)) + { + printf("cannot locate CharsetConverterManager\n"); + return(-1); + } + nsAutoString koi8r("KOI8-R"); + res = gCCM->GetUnicodeEncoder(&koi8r,&gKOI8REncoder); + if(NS_FAILED(res) && (nullptr != gKOI8REncoder)) + { + printf("cannot locate KOI8-R Encoder\n"); + return(-1); + } + + + npl(); + header(); + + genCyrillicClass("KOI8", "KOI8-R"); + genCyrillicClass("CP1251", "windows-1251"); + genCyrillicClass("IBM866", "IBM866"); + genCyrillicClass("ISO88595", "ISO-8859-5"); + genCyrillicClass("MacCyrillic", "x-mac-cyrillic"); + footer(); + NS_IF_RELEASE(gKOI8REncoder); + return(0); +}; diff --git a/intl/chardet/tools/charfreq.pl b/intl/chardet/tools/charfreq.pl new file mode 100644 index 000000000..4232d4765 --- /dev/null +++ b/intl/chardet/tools/charfreq.pl @@ -0,0 +1,50 @@ +#!/usr/bin/perl +#!/usr/bin/perl +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n"; +@count; +while(<STAT>) +{ + @k = split(/\s+/, $_); + $count{$k[0]} = $k[1]; +} +$count = 0; +while(<STDIN>) +{ + @ck = split /\s*/, $_; + $s = 0; + $fb = 0; + $cl = $#ck; + $j = 0; + while($j < $cl) { + $cc = unpack("C", $ck[$j]); + if(0 eq $s ) { + if($cc > 0x80) { + if($cc > 0xa0) { + $fb = $ck[$j]; + $s = 2; + } else { + $s = 1; + } + } + } elsif (1 eq $s) { + } else { + if($cc > 0xa0) { + $fb .= $ck[$j]; + $count{$fb}++; + print $fb . " " .$count{$fb} . "\n"; + $s = 0; + } else { + $s = 1; + } + } + $j = $j + 1; + } +} +foreach $c (sort(keys( %count ))) +{ + print $c . " ". $count{$c} . "\n"; +} diff --git a/intl/chardet/tools/charfreqtostat.pl b/intl/chardet/tools/charfreqtostat.pl new file mode 100644 index 000000000..04af0c82c --- /dev/null +++ b/intl/chardet/tools/charfreqtostat.pl @@ -0,0 +1,95 @@ +#!/usr/bin/perl +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +sub GenNPL { + my($ret) = << "END_NPL"; +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +END_NPL + + return $ret; +} + +print GenNPL(); +$total=0; +@h; +@l; + +while(<STDIN>) +{ + @k = split(/\s+/, $_); + @i = unpack("CCCC", $k[0]); +# printf("%x %x %s",$i[0] , $i[1] , "[" . $k[0] . "] " . $i . " " . $j . " " . $k[1] ."\n"); + if((0xA1 <= $i[0]) && (0xA1 <= $i[1])){ + $total += $k[1]; + $v = $i[0] - 0x00A1; + $h[$v] += $k[1]; + $u = $i[1] - 0x00A1; + $l[$u] += $k[1]; +# print "hello $v $h[$v] $u $l[$u]\n"; + } +} + + +$ffh = 0.0; +$ffl = 0.0; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + $fh[$i - 0x00a1] = $h[$i- 0x00a1] / $total; + $ffh += $fh[$i - 0x00a1]; + + $fl[$i - 0x00a1] = $l[$i- 0x00a1] / $total; + $ffl += $fl[$i - 0x00a1]; +} +$mh = $ffh / 94.0; +$ml = $ffl / 94.0; + +$sumh=0.0; +$suml=0.0; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + $sh = $fh[$i - 0x00a1] - $mh; + $sh *= $sh; + $sumh += $sh; + + $sl = $fl[$i - 0x00a1] - $ml; + $sl *= $sl; + $suml += $sl; +} +$sumh /= 94.0; +$suml /= 94.0; +$stdh = sqrt($sumh); +$stdl = sqrt($suml); + +print "{\n"; +print " {\n"; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + if($i eq 0xfe) { + printf(" %.6ff \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i); + } else { + printf(" %.6ff, \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i); + } +} +print " },\n"; +printf ("%.6ff, \/\/ Lead Byte StdDev\n", $stdh); +printf ("%.6ff, \/\/ Lead Byte Mean\n", $mh); +printf ("%.6ff, \/\/ Lead Byte Weight\n", $stdh / ($stdh + $stdl)); +print " {\n"; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + if($i eq 0xfe) { + printf(" %.6ff \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i); + } else { + printf(" %.6ff, \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i); + } +} +print " },\n"; +printf ("%.6ff, \/\/ Trail Byte StdDev\n", $stdl); +printf ("%.6ff, \/\/ Trail Byte Mean\n", $ml); +printf ("%.6ff \/\/ Trial Byte Weight\n", $stdl / ($stdh + $stdl)); +print "};\n"; diff --git a/intl/chardet/tools/gen.cmd b/intl/chardet/tools/gen.cmd new file mode 100755 index 000000000..56ca34bc9 --- /dev/null +++ b/intl/chardet/tools/gen.cmd @@ -0,0 +1,18 @@ +REM This Source Code Form is subject to the terms of the Mozilla Public +REM License, v. 2.0. If a copy of the MPL was not distributed with this +REM file, You can obtain one at http://mozilla.org/MPL/2.0/. + +perl gencp1252.pl > ..\src\nsCP1252Verifier.h +perl geneucjp.pl > ..\src\nsEUCJPVerifier.h +perl geniso2022jp.pl > ..\src\nsISO2022JPVerifier.h +perl gensjis.pl > ..\src\nsSJISVerifier.h +perl genutf8.pl > ..\src\nsUTF8Verifier.h +perl geneuckr.pl > ..\src\nsEUCKRVerifier.h +perl gengb2312.pl > ..\src\nsGB2312Verifier.h +perl genbig5.pl > ..\src\nsBIG5Verifier.h +perl geneuctw.pl > ..\src\nsEUCTWVerifier.h +perl genucs2be.pl > ..\src\nsUCS2BEVerifier.h +perl genucs2le.pl > ..\src\nsUCS2LEVerifier.h +perl genhz.pl > ..\src\nsHZVerifier.h +perl geniso2022kr.pl > ..\src\nsISO2022KRVerifier.h +perl geniso2022cn.pl > ..\src\nsISO2022CNVerifier.h diff --git a/intl/chardet/tools/genbig5.pl b/intl/chardet/tools/genbig5.pl new file mode 100644 index 000000000..8e3a777cb --- /dev/null +++ b/intl/chardet/tools/genbig5.pl @@ -0,0 +1,42 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@big5_cls); +my(@big5_st); +my($big5_ver); + + +@big5_cls = ( + [ 0x00 , 0x00 , 1 ], + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0x01 , 0x3f , 1 ], + [ 0x40 , 0x7e , 2 ], + [ 0x7f , 0x7f , 1 ], + [ 0xff , 0xff , 0 ], + [ 0x80 , 0xa0 , 4 ], + [ 0xa1 , 0xfe , 3 ], +); + +package genverifier; +@big5_st = ( +# 0 1 2 3 4 + 1, 0, 0, 3, 1, # state 0 + 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 0, 0, 0, # state 3 +); + + +$big5_ver = genverifier::GenVerifier("BIG5", "Big5", \@big5_cls, 5, \@big5_st); +print $big5_ver; + + + diff --git a/intl/chardet/tools/gencp1252.pl b/intl/chardet/tools/gencp1252.pl new file mode 100644 index 000000000..debc53ca5 --- /dev/null +++ b/intl/chardet/tools/gencp1252.pl @@ -0,0 +1,55 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@cp1252_cls); +my(@cp1252_st); +my($cp1252_ver); + + +@cp1252_cls = ( + [ 0x00 , 0x00 , 1 ], + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0x81 , 0x81 , 0 ], + [ 0x8d , 0x8d , 0 ], + [ 0x8f , 0x8f , 0 ], + [ 0x90 , 0x90 , 0 ], + [ 0x9d , 0x9d , 0 ], + [ 0xc0 , 0xd6 , 1 ], + [ 0xd8 , 0xf6 , 1 ], + [ 0xf8 , 0xff , 1 ], + [ 0x8a , 0x8a , 1 ], + [ 0x8c , 0x8c , 1 ], + [ 0x8e , 0x8e , 1 ], + [ 0x9a , 0x9a , 1 ], + [ 0x9c , 0x9c , 1 ], + [ 0x9e , 0x9e , 1 ], + [ 0x9f , 0x9f , 1 ], + [ 0x00 , 0xff , 2 ], +); + +package genverifier; +@cp1252_st = ( +# 0 1 2 + 1, 3, 0, # Start State - 0 + 1, 1, 1, # Error State - 1 + 2, 2, 2, # ItsMe State - 2 + 1, 4, 0, # State - 3 + 1, 5, 4, # State - 4 + 1, 1, 4, # State - 5 +); + + +$cp1252_ver = genverifier::GenVerifier("CP1252", "windows-1252", + \@cp1252_cls, 3, \@cp1252_st); +print $cp1252_ver; + + + diff --git a/intl/chardet/tools/gencyrillic.pl b/intl/chardet/tools/gencyrillic.pl new file mode 100644 index 000000000..51bd6e456 --- /dev/null +++ b/intl/chardet/tools/gencyrillic.pl @@ -0,0 +1,65 @@ +#!/usr/local/bin/perl + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use StatKoi '.' ; + +open(FILE, "> ../src/nsCyrillicProb.h") or die "cannot open nsCyrillicDetector.h"; + +print FILE <<EOF; +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsCyrillicDetector_h__ +#define nsCyrillicDetector_h__ +/* + DO NOT EDIT THIS FILE !!! + This file is generated by the perl script in + mozilla/intl/chardet/tools/gencyrillic.pl + + To ues that script, you need to grab StatKoi.pm file from + the "Cyrillic Software Suite" written by John Neystdt. + http://www.neystadt.org/cyrillic (You can also find it from CPAN) + */ +EOF +$table = \%Lingua::DetectCharset::StatKoi::StatsTableKoi; +print FILE "const uint16_t gCyrillicProb[33][33] = {"; + print FILE "{ \n"; + print FILE "0,\n"; + for($j = 0xc0; $j < 0xe0; $j++) + { + print FILE "0, \t"; + if( 7 == ( $j % 8) ) + { + print FILE "\n"; + } + } + print FILE "\n}, \n"; +for($i = 0xc0; $i < 0xe0; $i++) +{ + print FILE "{ \n"; + print FILE "0,\n"; + for($j = 0xc0; $j < 0xe0; $j++) + { + $key = chr($i) . chr($j); + if(exists($table->{$key})) + { + $v = $table->{$key}; + } else { + $v = 0; + } + print FILE $v . ", \t"; + if( 7 == ( $j % 8) ) + { + print FILE "\n"; + } + } + print FILE "\n}, \n"; +} +print FILE "};\n"; +print FILE "#endif\n"; diff --git a/intl/chardet/tools/geneucjp.pl b/intl/chardet/tools/geneucjp.pl new file mode 100644 index 000000000..692be15ab --- /dev/null +++ b/intl/chardet/tools/geneucjp.pl @@ -0,0 +1,47 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@eucjp_cls); +my(@eucjp_st); +my($eucjp_ver); + + +@eucjp_cls = ( + [ 0x0e , 0x0f , 5 ], + [ 0xe0 , 0xfe , 0 ], + [ 0x8e , 0x8e , 1 ], + [ 0xa1 , 0xdf , 2 ], + [ 0x8f , 0x8f , 3 ], + [ 0x01 , 0x1a , 4 ], + [ 0x1c , 0x7f , 4 ], + [ 0x00 , 0x00 , 4 ], + [ 0x1b , 0x1b , 5 ], + [ 0x80 , 0x8d , 5 ], + [ 0xa0 , 0xa0 , 5 ], + [ 0x80 , 0xff , 5 ] +); + +package genverifier; +@eucjp_st = ( +# 0 1 2 3 4 5 + 3, 4, 3, 5, 0, 1, # state 0 + 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 0, 1, 0, 1, 1, 1, # state 3 + 1, 1, 0, 1, 1, 1, # state 4 + 3, 1, 3, 1, 1, 1, # state 5 +); + + +$eucjp_ver = genverifier::GenVerifier("EUCJP", "EUC-JP", \@eucjp_cls, 6, \@eucjp_st); +print $eucjp_ver; + + + diff --git a/intl/chardet/tools/geneuckr.pl b/intl/chardet/tools/geneuckr.pl new file mode 100644 index 000000000..007810a6a --- /dev/null +++ b/intl/chardet/tools/geneuckr.pl @@ -0,0 +1,42 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@euckr_cls); +my(@euckr_st); +my($euckr_ver); + + +@euckr_cls = ( + [ 0x00 , 0x00 , 1 ], + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0x01 , 0x7f , 1 ], + [ 0x80 , 0xa0 , 0 ], + [ 0xff , 0xff , 0 ], + [ 0xad , 0xaf , 3 ], + [ 0xc9 , 0xc9 , 3 ], + [ 0xa1 , 0xfe , 2 ], +); + +package genverifier; +@euckr_st = ( +# 0 1 2 3 + 1, 0, 3, 1, # state 0 + 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 0, 0, # state 3 +); + + +$euckr_ver = genverifier::GenVerifier("EUCKR", "EUC-KR", \@euckr_cls, 4, \@euckr_st); +print $euckr_ver; + + + diff --git a/intl/chardet/tools/geneuctw.pl b/intl/chardet/tools/geneuctw.pl new file mode 100644 index 000000000..88453155e --- /dev/null +++ b/intl/chardet/tools/geneuctw.pl @@ -0,0 +1,49 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@euctw_cls); +my(@euctw_st); +my($euctw_ver); + + +@euctw_cls = ( + [ 0x00 , 0x00 , 2 ], + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0x01 , 0x7f , 2 ], + [ 0x8e , 0x8e , 6 ], + [ 0x80 , 0xa0 , 0 ], + [ 0xff , 0xff , 0 ], + [ 0xa1 , 0xa1 , 3 ], + [ 0xa2 , 0xa7 , 4 ], + [ 0xa8 , 0xa9 , 5 ], + [ 0xaa , 0xc1 , 1 ], + [ 0xc2 , 0xc2 , 3 ], + [ 0xc3 , 0xc3 , 1 ], + [ 0xc4 , 0xfe , 3 ], +); + +package genverifier; +@euctw_st = ( +# 0 1 2 3 4 5 6 + 1, 1, 0, 3, 3, 3, 4, # state 0 + 1, 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 0, 1, 0, 0, 0, 1, # state 3 + 1, 1, 1, 1, 5, 1, 1, # state 4 + 1, 0, 1, 0, 0, 0, 1, # state 5 +); + + +$euctw_ver = genverifier::GenVerifier("EUCTW", "x-euc-tw", \@euctw_cls, 7, \@euctw_st); +print $euctw_ver; + + + diff --git a/intl/chardet/tools/gengb18030.pl b/intl/chardet/tools/gengb18030.pl new file mode 100644 index 000000000..654710b2c --- /dev/null +++ b/intl/chardet/tools/gengb18030.pl @@ -0,0 +1,44 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@gb18030_cls); +my(@gb18030_st); +my($gb18030_ver); + + +@gb18030_cls = ( + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0x30 , 0x39 , 3 ], + [ 0x00 , 0x3f , 1 ], + [ 0x40 , 0x7e , 2 ], + [ 0x7f , 0x7f , 4 ], + [ 0x80 , 0x80 , 5 ], + [ 0x81 , 0xfe , 6 ], + [ 0xff , 0xff , 0 ], +); + +package genverifier; +@gb18030_st = ( +# 0 1 2 3 4 5 6 + 1, 0, 0, 0, 0, 0, 3, # state 0 + 1, 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 0, 4, 1, 0, 0, # state 3, multibytes, 1st byte identified + 1, 1, 1, 1, 1, 1, 5, # state 4, multibytes, 2nd byte identified + 1, 1, 1, 2, 1, 1, 1, # state 5, multibytes, 3rd byte identified +); + + +$gb18030_ver = genverifier::GenVerifier("gb18030", "gb18030", \@gb18030_cls, 7, \@gb18030_st); +print $gb18030_ver; + + + diff --git a/intl/chardet/tools/gengb2312.pl b/intl/chardet/tools/gengb2312.pl new file mode 100644 index 000000000..57d86926b --- /dev/null +++ b/intl/chardet/tools/gengb2312.pl @@ -0,0 +1,41 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@gb2312_cls); +my(@gb2312_st); +my($gb2312_ver); + + +@gb2312_cls = ( + [ 0x00 , 0x00 , 1 ], + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0x01 , 0x7f , 1 ], + [ 0x80 , 0xa0 , 0 ], + [ 0xff , 0xff , 0 ], + [ 0xaa , 0xaf , 3 ], + [ 0xa1 , 0xfe , 2 ], +); + +package genverifier; +@gb2312_st = ( +# 0 1 2 3 + 1, 0, 3, 1, # state 0 + 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 0, 0, # state 3 +); + + +$gb2312_ver = genverifier::GenVerifier("GB2312", "GB2312", \@gb2312_cls, 4, \@gb2312_st); +print $gb2312_ver; + + + diff --git a/intl/chardet/tools/genhz.pl b/intl/chardet/tools/genhz.pl new file mode 100644 index 000000000..c58eb4675 --- /dev/null +++ b/intl/chardet/tools/genhz.pl @@ -0,0 +1,57 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@hz_cls); +my(@hz_st); +my($hz_ver); + + +# +# +# > 0x80 - 1 +# ~ - 2 +# LF - 3 +# { - 4 +# } - 5 +# +@hz_cls = ( + [ 0x01 , 0x1a , 0 ], + [ 0x7e , 0x7e , 2 ], + [ 0x0a , 0x0a , 3 ], + [ 0x7b , 0x7b , 4 ], + [ 0x7d , 0x7d , 5 ], + [ 0x1c , 0x7f , 0 ], + [ 0x0e , 0x0f , 1 ], + [ 0x1b , 0x1b , 1 ], + [ 0x00 , 0x00 , 1 ], + [ 0x80 , 0xff , 1 ] +); + + +# +# +package genverifier; +@hz_st = ( +# 0 1 2 3 4 5 + 0, 1, 3, 0, 0, 0, # Start State - 0 + 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 0, 0, 4, 1, # state 3 - got ~ + 5, 1, 6, 1, 5, 5, # state 4 - got ~ { + 4, 1, 4, 1, 4, 4, # state 5 - got ~ { X + 4, 1, 4, 1, 4, 2, # state 6 - got ~ { [X X]* ~ +); + +$hz_ver = genverifier::GenVerifier("HZ", "HZ-GB-2312", + \@hz_cls, 6, \@hz_st); +print $hz_ver; + + + diff --git a/intl/chardet/tools/geniso2022cn.pl b/intl/chardet/tools/geniso2022cn.pl new file mode 100644 index 000000000..c4a43caae --- /dev/null +++ b/intl/chardet/tools/geniso2022cn.pl @@ -0,0 +1,58 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@iso2022cn_cls); +my(@iso2022cn_st); +my($iso2022cn_ver); + + +# +# +# ESC - 1 +# > 0x80 - 2 +# $ - 3 +# ) - 4 +# * - 5 +# A G - 6 +# H - 7 +# N O - 8 +# +@iso2022cn_cls = ( + [ 0x01 , 0x1a , 0 ], + [ 0x29 , 0x29 , 3 ], + [ 0x43 , 0x43 , 4 ], + [ 0x1c , 0x7f , 0 ], + [ 0x1b , 0x1b , 1 ], + [ 0x00 , 0x00 , 2 ], + [ 0x80 , 0xff , 2 ] +); + + +# +# ESC$((([)][AG])|([*]H))|[NO]) +# +package genverifier; +@iso2022cn_st = ( +# 0 1 2 3 4 5 6 7 8 + 0, 3, 1, 0, 0, 0, 0, 0, 0, # Start State - 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 1, 4, 1, 1, 1, 1, 2, # state 3 - got ESC + 1, 1, 1, 1, 5, 6, 1, 1, 1, # state 4 - got ESC $ + 1, 1, 1, 1, 1, 1, 2, 1, 1, # state 5 - got ESC $ ) + 1, 1, 1, 1, 1, 1, 1, 2, 1, # state 6 - got ESC $ * +); + +$iso2022cn_ver = genverifier::GenVerifier("ISO2022CN", "ISO-2022-CN", + \@iso2022cn_cls, 9, \@iso2022cn_st); +print $iso2022cn_ver; + + + diff --git a/intl/chardet/tools/geniso2022jp.pl b/intl/chardet/tools/geniso2022jp.pl new file mode 100644 index 000000000..4408fbeb0 --- /dev/null +++ b/intl/chardet/tools/geniso2022jp.pl @@ -0,0 +1,49 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@iso2022jp_cls); +my(@iso2022jp_st); +my($iso2022jp_ver); + +# 1:ESC 3:'(' 4:'B' 5:'J' 6:'@' 7:'$' 8:'D' 9:'I' +@iso2022jp_cls = ( + [ 0x0e , 0x0f , 2 ], + [ 0x28 , 0x28 , 3 ], + [ 0x42 , 0x42 , 4 ], + [ 0x4a , 0x4a , 5 ], + [ 0x40 , 0x40 , 6 ], + [ 0x24 , 0x24 , 7 ], + [ 0x44 , 0x44 , 8 ], + [ 0x49 , 0x49 , 9 ], + [ 0x01 , 0x1a , 0 ], + [ 0x1c , 0x7f , 0 ], + [ 0x1b , 0x1b , 1 ], + [ 0x00 , 0x00 , 2 ], + [ 0x80 , 0xff , 2 ] +); + +package genverifier; +@iso2022jp_st = ( +# 0 1 2 3 4 5 6 7 8 9 + 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, # Start State - 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 1, 5, 1, 1, 1, 4, 1, 1, # got ESC + 1, 1, 1, 6, 2, 1, 2, 1, 1, 1, # got ESC $ + 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, # got ESC ( + 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, # got ESC $ ( +); + +$iso2022jp_ver = genverifier::GenVerifier("ISO2022JP", "ISO-2022-JP", + \@iso2022jp_cls, 10, \@iso2022jp_st); +print $iso2022jp_ver; + + + diff --git a/intl/chardet/tools/geniso2022kr.pl b/intl/chardet/tools/geniso2022kr.pl new file mode 100644 index 000000000..f56bcf9fb --- /dev/null +++ b/intl/chardet/tools/geniso2022kr.pl @@ -0,0 +1,55 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@iso2022kr_cls); +my(@iso2022kr_st); +my($iso2022kr_ver); + + +# +# +# ESC - 1 +# > 0x80 - 2 +# $ - 3 +# ) - 4 +# C - 5 +# +@iso2022kr_cls = ( + [ 0x01 , 0x1a , 0 ], + [ 0x24 , 0x24 , 3 ], + [ 0x29 , 0x29 , 4 ], + [ 0x43 , 0x43 , 5 ], + [ 0x1c , 0x7f , 0 ], + [ 0x1b , 0x1b , 1 ], + [ 0x00 , 0x00 , 2 ], + [ 0x80 , 0xff , 2 ] +); + + +# +# ESC$)C +# +package genverifier; +@iso2022kr_st = ( +# 0 1 2 3 4 5 + 0, 3, 1, 0, 0, 0, # Start State - 0 + 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 1, 4, 1, 1, # state 3 - got ESC + 1, 1, 1, 1, 5, 1, # state 4 - got ESC $ + 1, 1, 1, 1, 1, 2, # state 5 - got ESC $ ) +); + +$iso2022kr_ver = genverifier::GenVerifier("ISO2022KR", "ISO-2022-KR", + \@iso2022kr_cls, 6, \@iso2022kr_st); +print $iso2022kr_ver; + + + diff --git a/intl/chardet/tools/gensjis.pl b/intl/chardet/tools/gensjis.pl new file mode 100644 index 000000000..20966d03e --- /dev/null +++ b/intl/chardet/tools/gensjis.pl @@ -0,0 +1,46 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@sjis_cls); +my(@sjis_st); +my($sjis_ver); + +@sjis_cls = ( + [ 0x00 , 0x00 , 0 ], + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0xfd , 0xff , 0 ], + [ 0x85 , 0x86 , 3 ], + [ 0xeb , 0xec , 5 ], + [ 0x01 , 0x1a , 1 ], + [ 0x1c , 0x3f , 1 ], + [ 0x7f , 0x7f , 1 ], + [ 0x40 , 0x7e , 2 ], + [ 0xa1 , 0xdf , 2 ], + [ 0x80 , 0x9f , 3 ], + [ 0xa0 , 0xa0 , 4 ], + [ 0xe0 , 0xea , 3 ], + [ 0xed , 0xfc , 4 ], +); + +package genverifier; +@sjis_st = ( +# 0 1 2 3 4 5 + 1, 0, 0, 3, 1, 1, # Start State - 0 + 1, 1, 1, 1, 1, 1, # Error State - 1 + 2, 2, 2, 2, 2, 2, # ItsMe State - 2 + 1, 1, 0, 0, 0, 0, # State - 3 +); + +$sjis_ver = genverifier::GenVerifier("SJIS", "Shift_JIS", \@sjis_cls, 6, \@sjis_st); +print $sjis_ver; + + + diff --git a/intl/chardet/tools/genutf8.pl b/intl/chardet/tools/genutf8.pl new file mode 100644 index 000000000..437dd535b --- /dev/null +++ b/intl/chardet/tools/genutf8.pl @@ -0,0 +1,189 @@ +#!/usr/local/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use strict; +require "genverifier.pm"; +use genverifier; + + +my(@utf8_cls); +my(@utf8_st); +my($utf8_ver); + +# +# +# UTF8 encode the UCS4 into 1 to 4 bytes +# +# 1 byte 00 00 00 00 00 00 00 7f +# 2 bytes 00 00 00 80 00 00 07 ff +# 3 bytes 00 00 08 00 00 00 ff ff +# 4 bytes 00 01 00 00 00 10 ff ff +# +# However, since Surrogate area should not be encoded into UTF8 as +# a Surrogate pair, we can remove the surrogate area from UTF8 +# +# 1 byte 00 00 00 00 00 00 00 7f +# 2 bytes 00 00 00 80 00 00 07 ff +# 3 bytes 00 00 08 00 00 00 d7 ff +# 00 00 e0 00 00 00 ff ff +# 4 bytes 00 01 00 00 00 10 ff ff +# +# Now we break them into 6 bits group for 2-4 bytes UTF8 +# +# 1 byte 00 7f +# 2 bytes 02 00 1f 3f +# 3 bytes 00 20 00 0d 1f 3f +# 0e 00 00 0f 3f 3f +# 4 bytes 00 10 00 00 04 0f 3f 3f +# +# Break down more +# +# 1 byte 00 7f +# 2 bytes 02 00 1f 3f +# 3 bytes 00 20 00 00 3f 3f +# 01 00 00 0c 3f 3f +# 0d 00 00 0d 1f 3f +# 0e 00 00 0f 3f 3f +# 4 bytes 00 10 00 00 00 3f 3f 3f +# 01 00 00 00 03 3f 3f 3f +# 04 00 00 00 04 0f 3f 3f +# +# Now, add +# c0 to the lead byte of 2 bytes UTF8 +# e0 to the lead byte of 3 bytes UTF8 +# f0 to the lead byte of 4 bytes UTF8 +# 80 to the trail bytes +# +# 1 byte 00 7f +# 2 bytes c2 80 df bf +# 3 bytes e0 a0 80 e0 bf bf +# e1 80 80 ec bf bf +# ed 80 80 ed 9f bf +# ee 80 80 ef bf bf +# 4 bytes f0 90 80 80 f0 bf bf bf +# f1 80 80 80 f3 bf bf bf +# f4 80 80 80 f4 8f bf bf +# +# +# Now we can construct our state diagram +# +# 0:0x0e,0x0f,0x1b->Error +# 0:[0-0x7f]->0 +# 0:[c2-df]->3 +# 0:e0->4 +# 0:[e1-ec, ee-ef]->5 +# 0:ed->6 +# 0:f0->7 +# 0:[f1-f3]->8 +# 0:f4->9 +# 0:*->Error +# 3:[80-bf]->0 +# 3:*->Error +# 4:[a0-bf]->3 +# 4:*->Error +# 5:[80-bf]->3 +# 5:*->Error +# 6:[80-9f]->3 +# 6:*->Error +# 7:[90-bf]->5 +# 7:*->Error +# 8:[80-bf]->5 +# 8:*->Error +# 9:[80-8f]->5 +# 9:*->Error +# +# Now, we classified chars into class +# +# 00,0e,0f,1b:k0 +# 01-0d,10-1a,1c-7f:k1 +# 80-8f:k2 +# 90-9f:k3 +# a0-bf:k4 +# c0-c1:k0 +# c2-df:k5 +# e0:k6 +# e1-ec:k7 +# ed:k8 +# ee-ef:k7 +# f0:k9 +# f1-f3:k10 +# f4:k11 +# f5-ff:k0 +# +# Now, let's put them into array form + +@utf8_cls = ( + [ 0x00 , 0x00 , 1 ], + [ 0x0e , 0x0f , 0 ], + [ 0x1b , 0x1b , 0 ], + [ 0x01 , 0x0d , 1 ], + [ 0x10 , 0x1a , 1 ], + [ 0x1c , 0x7f , 1 ], + [ 0x80 , 0x8f , 2 ], + [ 0x90 , 0x9f , 3 ], + [ 0xa0 , 0xbf , 4 ], + [ 0xc0 , 0xc1 , 0 ], + [ 0xc2 , 0xdf , 5 ], + [ 0xe0 , 0xe0 , 6 ], + [ 0xe1 , 0xec , 7 ], + [ 0xed , 0xed , 8 ], + [ 0xee , 0xef , 7 ], + [ 0xf0 , 0xf0 , 9 ], + [ 0xf1 , 0xf3 , 10 ], + [ 0xf4 , 0xf4 , 11 ], + [ 0xf5 , 0xff , 0 ], +); +# +# Now, we write the state diagram in class +# +# 0:k0->Error +# 0:k1->0 +# 0:k5->3 +# 0:k6->4 +# 0:k7->5 +# 0:k8->6 +# 0:k9->7 +# 0:k10->8 +# 0:k11->9 +# 0:*->Error +# 3:k2,k3,k4->0 +# 3:*->Error +# 4:k4->3 +# 4:*->Error +# 5:k2,k3,k4->3 +# 5:*->Error +# 6:k2,k3->3 +# 6:*->Error +# 7:k3,k4->5 +# 7:*->Error +# 8:k2,k3,k4->5 +# 8:*->Error +# 9:k2->5 +# 9:*->Error +# +# Now, let's put them into array +# +package genverifier; +@utf8_st = ( +# 0 1 2 3 4 5 6 7 8 9 10 11 + 1, 0, 1, 1, 1, 3, 4, 5, 6, 7, 8, 9, # state 0 Start + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 1 Error + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # state 2 ItsMe + 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, # state 3 + 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, # state 4 + 1, 1, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, # state 5 + 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, # state 6 + 1, 1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, # state 7 + 1, 1, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, # state 8 + 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 9 +); + + + +$utf8_ver = genverifier::GenVerifier("UTF8", "UTF-8", \@utf8_cls, 12, \@utf8_st); +print $utf8_ver; + + + diff --git a/intl/chardet/tools/genverifier.pm b/intl/chardet/tools/genverifier.pm new file mode 100644 index 000000000..8ccfef4d6 --- /dev/null +++ b/intl/chardet/tools/genverifier.pm @@ -0,0 +1,175 @@ +#!/usr/local/bin/perl + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package genverifier; +use strict; +use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); + +use Exporter; +$VERSION = 1.00; +@ISA = qw(Exporter); + +@EXPORT = qw( + GenVerifier + ); +@EXPORT_OK = qw(); + +sub GenNPL { + my($ret) = << "END_MPL"; +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +END_MPL + + return $ret; +} + +##-------------------------------------------------------------- +sub GetClass { + my($char, $clstbl) = @_; + my($l); + for($l =0; $l <= @$clstbl; $l++) { + if(($clstbl->[$l][0] <= $char) && ($char <= $clstbl->[$l][1])) + { + return $clstbl->[$l][2]; + } + } + print "WARNING- there are no class for $char\n"; +}; +##-------------------------------------------------------------- +sub GenClassPkg { + my($name, $bits) = @_; + return GenPkg($name, $bits, "_cls"); +} +##-------------------------------------------------------------- +sub GenStatePkg { + my($name, $bits) = @_; + return GenPkg($name, $bits, "_st"); +}; +##-------------------------------------------------------------- +sub GenPkg { + my($name, $bits, $tbl) = @_; + my($ret); + $ret = " {" . + "eIdxSft" . $bits . "bits, " . + "eSftMsk" . $bits . "bits, " . + "eBitSft" . $bits . "bits, " . + "eUnitMsk" . $bits . "bits, " . + $name . $tbl . "" . + " }"; + return $ret; +}; +##-------------------------------------------------------------- +sub Gen4BitsClass { + my($name, $clstbl) = @_; + my($i,$j); + my($cls); + my($ret); + $ret = ""; + $ret .= "static const uint32_t " . $name . "_cls [ 256 / 8 ] = {\n"; + for($i = 0; $i < 0x100; $i+= 8) { + $ret .= "PCK4BITS("; + for($j = $i; $j < $i + 8; $j++) { + $cls = &GetClass($j,$clstbl); + $ret .= sprintf("%2d", $cls) ; + if($j != ($i+7)) { + $ret .= ","; + } + } + if( $i+8 >= 0x100) { + $ret .= ") "; + } else { + $ret .= "),"; + } + $ret .= sprintf(" // %02x - %02x\n", $i, ($i+7)); + } + $ret .= "};\n"; + return $ret; +}; +##-------------------------------------------------------------- +sub GenVerifier { + my($name, $charset, $cls, $numcls, $st) = @_; + my($ret); + $ret = GenNPL(); + $ret .= GenNote(); + $ret .= GenHeader(); + $ret .= Gen4BitsClass($name, $cls); + $ret .= "\n\n"; + $ret .= Gen4BitsState($name, $st); + $ret .= "\n\n"; + $ret .= "const SMModel " . $name . "SMModel = {\n"; + $ret .= GenClassPkg($name, 4); + $ret .= ",\n"; + $ret .= " " . $numcls; + $ret .= ",\n"; + $ret .= GenStatePkg($name, 4); + $ret .= ",\n"; + $ret .= " " . "CHAR_LEN_TABLE(" . $name . "CharLenTable),\n"; + $ret .= ' "' . $charset . '",' . "\n"; + $ret .= "};\n"; + return $ret; + +}; +##-------------------------------------------------------------- +sub Gen4BitsState { + my($name, $sttbl) = @_; + my($lenafterpad) = (((@$sttbl-1) >> 3) + 1) << 3; + my($i,$j); + my($ret); + $ret = ""; + $ret .= "static const uint32_t " . $name . "_st [ " . ($lenafterpad >> 3) . "] = {\n"; + for($i = 0; $i < $lenafterpad ; $i+= 8) { + $ret .= "PCK4BITS("; + for($j = $i; $j < $i + 8; $j++) { + if(0 == $sttbl->[$j]) { + $ret .= "eStart"; + } else { if(1 == $sttbl->[$j]) { + $ret .= "eError"; + } else { if(2 == $sttbl->[$j]) { + $ret .= "eItsMe"; + } else { + $ret .= sprintf(" %d", $sttbl->[$j]) ; + }}} + if($j != ($i+7)) { + $ret .= ","; + } + } + if( $i+8 >= $lenafterpad ) { + $ret .= ") "; + } else { + $ret .= "),"; + } + $ret .= sprintf(" // %02x - %02x\n", $i, ($i+7)); + } + $ret .= "};\n"; + return $ret; +}; +##-------------------------------------------------------------- + +sub GenNote { + my($ret) = << "END_NOTE"; +/* + * DO NOT EDIT THIS DOCUMENT MANUALLY !!! + * THIS FILE IS AUTOMATICALLY GENERATED BY THE TOOLS UNDER + * mozilla/intl/chardet/tools/ + * Please contact ftang\@netscape.com or mozilla-i18n\@mozilla.org + * if you have any question. Thanks + */ +END_NOTE + return $ret; +} + +##-------------------------------------------------------------- +sub GenHeader { + my($ret) = << "END_HEADER"; +#include "nsVerifier.h" +END_HEADER + + return $ret; +} +##-------------------------------------------------------------- +1; # this should be the last line |