summaryrefslogtreecommitdiffstats
path: root/intl/uconv/tools/mkjpconv.pl
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/tools/mkjpconv.pl')
-rwxr-xr-xintl/uconv/tools/mkjpconv.pl323
1 files changed, 323 insertions, 0 deletions
diff --git a/intl/uconv/tools/mkjpconv.pl b/intl/uconv/tools/mkjpconv.pl
new file mode 100755
index 000000000..1394a6bc6
--- /dev/null
+++ b/intl/uconv/tools/mkjpconv.pl
@@ -0,0 +1,323 @@
+#!/usr/bin/perl
+$ID = "mkjpconv.pl @ARGV (Time-stamp: <2001-08-08 18:54:54 shom>)";
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#
+# based on CP932.TXT from unicode.org
+# additional information from SHIFTJIS.TXT from unicode.org
+#
+# mapping policy:
+# jis0208 to unicode : based on CP932
+# unicode to jis0208 : based on CP932
+# the lowest code is used for dual mapping to jis0208
+# ascii region : based on ISO8859-1 ( same as CP932 ) IGNORE?
+# kana region : based on CP932
+# IBM Ext(0xFxxx>) : premap to NEC region ( mappable to JIS )
+
+if ($ARGV[0] eq "") {
+ print STDERR "usage: mkjpconv.pl SHIFTJIS.TXT <INFILE(ex:CP932.TXT)> [Another check]\n";
+ exit 1;
+}
+
+open (SI, "SHIFTJIS.TXT") || die;
+while(<SI>) {
+ ($hi,$lo) = /^0x(..)?(..)\s/;
+ if ($lo eq "") { next; }
+ if ($hi eq "") { $hi=" " }
+ $defined{"0x$hi$lo"} = 1;
+}
+close (SI);
+
+shift(@ARGV);
+
+$src = $ARGV[0];
+
+$gendir = "$src.d";
+mkdir("$src.d");
+
+$sufile = "sjis2ucs-$src.map";
+$usfile = "ucs2sjis-$src.map";
+$jufile = "jis2ucs-$src.map";
+$jeufile = "jisext2ucs-$src.map";
+$jaufile = "jisasc2ucs-$src.map";
+$jrkufile = "jiskana2ucs-$src.map";
+$ujfile = "ucs2jis-$src.map";
+$ujefile = "ucs2jisext-$src.map";
+$ujafile = "ucs2jisasc-$src.map";
+$ujrkfile = "ucs2jiskana-$src.map";
+$ibmnecfile = "$gendir/IBMNEC.map";
+$jdxfile = "$gendir/jis0208.ump";
+$jdxextfile = "jis0208ext.ump";
+$commentfile = "comment-$src.txt";
+
+open (IN, "NPL.header") || die;
+while(<IN>) {
+ $NPL .= $_;
+}
+close (IN);
+
+foreach $infile ( @ARGV ) {
+
+ open (IN, "$infile") || die;
+
+ while(<IN>) {
+ ($from, $to, $seq, $dum, $comment) =
+ /^\s*(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)(\+0x\S+)?(\s+\#\s*(\S.*))?$/;
+ if ( $seq ne "" ) {
+ print "Warning: Unicode Seq:\t$from\t$to$seq\t# $comment\n";
+ }
+
+ if ( $from eq "" ) { next; }
+
+ if ( $from =~ /0x(..)$/ ) {
+ $from = " 0x$1";
+ }
+
+ if ( $fromto{$from} eq "" ) {
+ push(@fromlist, $from);
+ $fromto{$from} = $to;
+ $commentbody{$from} = $comment;
+ $commentseq{$from} = $seq
+ } elsif ( $fromto{$from} ne $to ) {
+ # another mappint SJIS:UCS2 = 1:N
+ print "Another map in $infile\t$from\t$fromto{$from},$to\n";
+ }
+
+ if ($checkanother==1) {
+ next;
+ }
+
+ if ( $tofrom{$to} eq "" ) {
+ $tofrom{$to} = $from;
+ } else {
+ if ( $from !~ /$tofrom{$to}/ ){
+ $tofrom{$to} = "$tofrom{$to},$from";
+ }
+ }
+
+ # print "$from $to\n";
+ }
+
+ close (IN);
+
+ $checkanother == 1;
+}
+
+open (COMMENT, ">$commentfile") || die;
+foreach $from (sort(@fromlist)) {
+ print COMMENT "$from\t$fromto{$from}$commentseq{$from}\t$commentbody{$from}\n";
+}
+close (COMMENT);
+
+
+open(SU, ">$sufile") || die;
+open(US, ">$usfile") || die;
+open(JU, ">$jufile") || die;
+open(JEU, ">$jeufile") || die;
+open(JAU, ">$jaufile") || die;
+open(JRKU, ">$jrkufile") || die;
+open(UJ, ">$ujfile") || die;
+open(UJE, ">$ujefile") || die;
+open(UJA, ">$ujafile") || die;
+open(UJRK, ">$ujrkfile") || die;
+open(IBMNEC, ">$ibmnecfile") || die;
+
+# print SU "/* generated from $src : SJIS UCS2 */\n";
+# print US "/* generated from $src : UCS2 SJIS */\n";
+print "Generated from $src\n";
+print "Command: mkjpconv.pl @ARGV\n";
+print "SJIS(JIS)\tUCS2\tSJIS\tS:U:S\tSJIS lower\n";
+
+foreach $i (sort(@fromlist)) {
+
+ $ucs = "";
+
+ $sjis = $i;
+ $sjis =~ s/\s+//;
+ $jis = sjistojis($sjis);
+
+ print "$i($jis)\t$fromto{$i}\t$tofrom{$fromto{$i}}";
+ $ucs = $fromto{$i};
+
+ if ( $i eq $tofrom{$fromto{$i}} ) {
+ print "\t1:1:1";
+ print "\t$i";
+ } else {
+ print "\t1:1:N";
+ @tolist = split(/,/,$tofrom{$fromto{$i}});
+ print "\t$tolist[0]";
+ #$ucs = $tolist[0];
+ if ( $sjis =~ /0xF[A-D]../ ) {
+ $ibmnec{$sjis} = $tolist[0];
+ #print IBMNEC "$sjis\t$tolist[0]\n";
+ }
+
+ }
+ print SU "$sjis\t$ucs\n";
+ push(@uslist, "$ucs\t$sjis\n");
+
+ #print US "$ucs\t$sjis\n";
+ if ( $jis ne "") {
+ #if ($sjis =~ /^0x87../ || $sjis =~ /^0xED../ ) {
+ # cp932 ext
+ if ($sjis =~ /0x..../ && $defined{$sjis} != 1) {
+ # jis not define
+ print JEU "$jis\t$ucs\n";
+ push(@ujelist, "$ucs\t$jis\n");
+ $jisextucs{$jis} = $ucs;
+ } else {
+ print JU "$jis\t$ucs\n";
+ push(@ujlist, "$ucs\t$jis\n");
+ $jisucs{$jis} = $ucs;
+ }
+
+ #print UJ "$ucs\t$jis\n";
+ } elsif ( $sjis =~ /\s*0x([8-9A-D].)/ ) {
+ $code = $1;
+ print JRKU "0x00$code\t$ucs\n";
+ push(@ujrklist, "$ucs\t0x00$code\n");
+ } elsif ( $sjis =~ /\s*0x([0-7].)/ ) {
+ $code = $1;
+ print JAU "0x00$code\t$ucs\n";
+ push(@ujalist, "$ucs\t0x00$code\n");
+ }
+ #print "\t# $comment{$i}\n";
+ print "\n";
+}
+
+print US sort(@uslist);
+print UJ sort(@ujlist);
+print UJE sort(@ujelist);
+print UJA sort(@ujalist);
+print UJRK sort(@ujrklist);
+
+# make ibmnec mapping
+
+print IBMNEC $NPL;
+print IBMNEC "/* generated by $ID */\n";
+print IBMNEC "/* IBM ext codes to NEC sel (in CP932) */\n\n";
+
+foreach $i (0xFA, 0xFB, 0xFC) {
+ for ($j=( ($i==0xFA) ? 0x40 : 0x00 ); $j<=0xFF; $j++) {
+ $ibm = sprintf("0x%02X%02X", $i, $j);
+ $raw = substr($ibm, 2,6);
+ if ("" == $ibmnec{$ibm}) {
+ print IBMNEC "/* $raw:UNDEF */ 0, \n";
+ } else {
+ print IBMNEC "/* $raw */ $ibmnec{$ibm}, \n";
+ }
+ }
+}
+
+close(IBMNEC);
+
+# make jdx
+
+open (JDX, ">$jdxfile") || die;
+
+print JDX $NPL;
+print JDX "/* generated by $ID */\n";
+print JDX "/* JIS X 0208 (with CP932 ext) to Unicode mapping */\n";
+
+for ($i=0; $i<94; $i++) {
+ printf JDX "/* 0x%2XXX */\n", ($i+0x21);
+ printf JDX " ";
+ for ($j=0; $j<94; $j++) {
+ $jis = sprintf("0x%02X%02X", ($i+0x21), $j+0x21);
+ # get JIS
+ $ucs = $jisucs{$jis};
+ if ("" == $ucs) {
+ # try CP932 ext
+ # try jis ext
+ $ucs = $jisextucs{$jis}
+ }
+ if ("" == $ucs) {
+ # undefined
+ print JDX "0xFFFD,";
+ } else {
+ print JDX "$ucs,";
+ }
+ if (7 == ( ($j+1) % 8 )) {
+ printf JDX "/* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8;
+ }
+ }
+ printf JDX " /* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8;
+}
+
+close (JDX);
+
+
+close(SU);
+close(US);
+close(JU);
+close(JEU);
+close(JAU);
+close(JRKU);
+close(UJ);
+close(UJE);
+close(UJA);
+close(UJRK);
+
+# generate uf files
+
+sub genuf {
+ my ($infile, $outfile) = @_;
+ my $com = "cat $infile | ./umaptable -uf > $gendir/$outfile";
+ print "Executing $com\n";
+ system($com);
+}
+
+genuf($sufile, "sjis.uf");
+genuf($jufile, "jis0208.uf");
+if ( $#ujelist > 0 ) {
+ genuf($jeufile, "jis0208ext.uf");
+} else {
+ print "Extension is not found. jis0208ext.uf is not generated.\n";
+}
+genuf("$jaufile $jrkufile", "jis0201.uf");
+# genuf($jaufile, "jis0201.uf");
+# genuf($jrkufile, "jis0201gl.uf");
+
+
+# generate test page
+
+
+exit;
+
+sub sjistojis {
+ my($sjis) = (@_);
+ my($first,$second,$h, $l, $j0208);
+
+ if ( $sjis !~ /^0x....$/ ) {
+ return "";
+ }
+
+ $first = hex(substr($sjis,2,2));
+ $second = hex(substr($sjis,4,2));
+ $jnum=0;
+
+ if($first < 0xE0)
+ {
+ $jnum = ($first - 0x81) * ((0xfd - 0x80)+(0x7f - 0x40));
+ } else {
+ $jnum = ($first - 0xe0 + (0xa0-0x81)) * ((0xfd - 0x80)+(0x7f - 0x40));
+ }
+ if($second >= 0x80)
+ {
+ $jnum += $second - 0x80 + (0x7f-0x40);
+ }
+ else
+ {
+ $jnum += $second - 0x40;
+ }
+ if(($jnum / 94 ) < 94) {
+ return sprintf "0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21);
+ } else {
+ #return sprintf "# 0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21);
+ return "";
+ }
+}
+