diff options
Diffstat (limited to 'intl/uconv/tools/mkjpconv.pl')
-rwxr-xr-x | intl/uconv/tools/mkjpconv.pl | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/intl/uconv/tools/mkjpconv.pl b/intl/uconv/tools/mkjpconv.pl new file mode 100755 index 000000000..1394a6bc6 --- /dev/null +++ b/intl/uconv/tools/mkjpconv.pl @@ -0,0 +1,323 @@ +#!/usr/bin/perl +$ID = "mkjpconv.pl @ARGV (Time-stamp: <2001-08-08 18:54:54 shom>)"; + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# +# based on CP932.TXT from unicode.org +# additional information from SHIFTJIS.TXT from unicode.org +# +# mapping policy: +# jis0208 to unicode : based on CP932 +# unicode to jis0208 : based on CP932 +# the lowest code is used for dual mapping to jis0208 +# ascii region : based on ISO8859-1 ( same as CP932 ) IGNORE? +# kana region : based on CP932 +# IBM Ext(0xFxxx>) : premap to NEC region ( mappable to JIS ) + +if ($ARGV[0] eq "") { + print STDERR "usage: mkjpconv.pl SHIFTJIS.TXT <INFILE(ex:CP932.TXT)> [Another check]\n"; + exit 1; +} + +open (SI, "SHIFTJIS.TXT") || die; +while(<SI>) { + ($hi,$lo) = /^0x(..)?(..)\s/; + if ($lo eq "") { next; } + if ($hi eq "") { $hi=" " } + $defined{"0x$hi$lo"} = 1; +} +close (SI); + +shift(@ARGV); + +$src = $ARGV[0]; + +$gendir = "$src.d"; +mkdir("$src.d"); + +$sufile = "sjis2ucs-$src.map"; +$usfile = "ucs2sjis-$src.map"; +$jufile = "jis2ucs-$src.map"; +$jeufile = "jisext2ucs-$src.map"; +$jaufile = "jisasc2ucs-$src.map"; +$jrkufile = "jiskana2ucs-$src.map"; +$ujfile = "ucs2jis-$src.map"; +$ujefile = "ucs2jisext-$src.map"; +$ujafile = "ucs2jisasc-$src.map"; +$ujrkfile = "ucs2jiskana-$src.map"; +$ibmnecfile = "$gendir/IBMNEC.map"; +$jdxfile = "$gendir/jis0208.ump"; +$jdxextfile = "jis0208ext.ump"; +$commentfile = "comment-$src.txt"; + +open (IN, "NPL.header") || die; +while(<IN>) { + $NPL .= $_; +} +close (IN); + +foreach $infile ( @ARGV ) { + + open (IN, "$infile") || die; + + while(<IN>) { + ($from, $to, $seq, $dum, $comment) = + /^\s*(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)(\+0x\S+)?(\s+\#\s*(\S.*))?$/; + if ( $seq ne "" ) { + print "Warning: Unicode Seq:\t$from\t$to$seq\t# $comment\n"; + } + + if ( $from eq "" ) { next; } + + if ( $from =~ /0x(..)$/ ) { + $from = " 0x$1"; + } + + if ( $fromto{$from} eq "" ) { + push(@fromlist, $from); + $fromto{$from} = $to; + $commentbody{$from} = $comment; + $commentseq{$from} = $seq + } elsif ( $fromto{$from} ne $to ) { + # another mappint SJIS:UCS2 = 1:N + print "Another map in $infile\t$from\t$fromto{$from},$to\n"; + } + + if ($checkanother==1) { + next; + } + + if ( $tofrom{$to} eq "" ) { + $tofrom{$to} = $from; + } else { + if ( $from !~ /$tofrom{$to}/ ){ + $tofrom{$to} = "$tofrom{$to},$from"; + } + } + + # print "$from $to\n"; + } + + close (IN); + + $checkanother == 1; +} + +open (COMMENT, ">$commentfile") || die; +foreach $from (sort(@fromlist)) { + print COMMENT "$from\t$fromto{$from}$commentseq{$from}\t$commentbody{$from}\n"; +} +close (COMMENT); + + +open(SU, ">$sufile") || die; +open(US, ">$usfile") || die; +open(JU, ">$jufile") || die; +open(JEU, ">$jeufile") || die; +open(JAU, ">$jaufile") || die; +open(JRKU, ">$jrkufile") || die; +open(UJ, ">$ujfile") || die; +open(UJE, ">$ujefile") || die; +open(UJA, ">$ujafile") || die; +open(UJRK, ">$ujrkfile") || die; +open(IBMNEC, ">$ibmnecfile") || die; + +# print SU "/* generated from $src : SJIS UCS2 */\n"; +# print US "/* generated from $src : UCS2 SJIS */\n"; +print "Generated from $src\n"; +print "Command: mkjpconv.pl @ARGV\n"; +print "SJIS(JIS)\tUCS2\tSJIS\tS:U:S\tSJIS lower\n"; + +foreach $i (sort(@fromlist)) { + + $ucs = ""; + + $sjis = $i; + $sjis =~ s/\s+//; + $jis = sjistojis($sjis); + + print "$i($jis)\t$fromto{$i}\t$tofrom{$fromto{$i}}"; + $ucs = $fromto{$i}; + + if ( $i eq $tofrom{$fromto{$i}} ) { + print "\t1:1:1"; + print "\t$i"; + } else { + print "\t1:1:N"; + @tolist = split(/,/,$tofrom{$fromto{$i}}); + print "\t$tolist[0]"; + #$ucs = $tolist[0]; + if ( $sjis =~ /0xF[A-D]../ ) { + $ibmnec{$sjis} = $tolist[0]; + #print IBMNEC "$sjis\t$tolist[0]\n"; + } + + } + print SU "$sjis\t$ucs\n"; + push(@uslist, "$ucs\t$sjis\n"); + + #print US "$ucs\t$sjis\n"; + if ( $jis ne "") { + #if ($sjis =~ /^0x87../ || $sjis =~ /^0xED../ ) { + # cp932 ext + if ($sjis =~ /0x..../ && $defined{$sjis} != 1) { + # jis not define + print JEU "$jis\t$ucs\n"; + push(@ujelist, "$ucs\t$jis\n"); + $jisextucs{$jis} = $ucs; + } else { + print JU "$jis\t$ucs\n"; + push(@ujlist, "$ucs\t$jis\n"); + $jisucs{$jis} = $ucs; + } + + #print UJ "$ucs\t$jis\n"; + } elsif ( $sjis =~ /\s*0x([8-9A-D].)/ ) { + $code = $1; + print JRKU "0x00$code\t$ucs\n"; + push(@ujrklist, "$ucs\t0x00$code\n"); + } elsif ( $sjis =~ /\s*0x([0-7].)/ ) { + $code = $1; + print JAU "0x00$code\t$ucs\n"; + push(@ujalist, "$ucs\t0x00$code\n"); + } + #print "\t# $comment{$i}\n"; + print "\n"; +} + +print US sort(@uslist); +print UJ sort(@ujlist); +print UJE sort(@ujelist); +print UJA sort(@ujalist); +print UJRK sort(@ujrklist); + +# make ibmnec mapping + +print IBMNEC $NPL; +print IBMNEC "/* generated by $ID */\n"; +print IBMNEC "/* IBM ext codes to NEC sel (in CP932) */\n\n"; + +foreach $i (0xFA, 0xFB, 0xFC) { + for ($j=( ($i==0xFA) ? 0x40 : 0x00 ); $j<=0xFF; $j++) { + $ibm = sprintf("0x%02X%02X", $i, $j); + $raw = substr($ibm, 2,6); + if ("" == $ibmnec{$ibm}) { + print IBMNEC "/* $raw:UNDEF */ 0, \n"; + } else { + print IBMNEC "/* $raw */ $ibmnec{$ibm}, \n"; + } + } +} + +close(IBMNEC); + +# make jdx + +open (JDX, ">$jdxfile") || die; + +print JDX $NPL; +print JDX "/* generated by $ID */\n"; +print JDX "/* JIS X 0208 (with CP932 ext) to Unicode mapping */\n"; + +for ($i=0; $i<94; $i++) { + printf JDX "/* 0x%2XXX */\n", ($i+0x21); + printf JDX " "; + for ($j=0; $j<94; $j++) { + $jis = sprintf("0x%02X%02X", ($i+0x21), $j+0x21); + # get JIS + $ucs = $jisucs{$jis}; + if ("" == $ucs) { + # try CP932 ext + # try jis ext + $ucs = $jisextucs{$jis} + } + if ("" == $ucs) { + # undefined + print JDX "0xFFFD,"; + } else { + print JDX "$ucs,"; + } + if (7 == ( ($j+1) % 8 )) { + printf JDX "/* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; + } + } + printf JDX " /* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; +} + +close (JDX); + + +close(SU); +close(US); +close(JU); +close(JEU); +close(JAU); +close(JRKU); +close(UJ); +close(UJE); +close(UJA); +close(UJRK); + +# generate uf files + +sub genuf { + my ($infile, $outfile) = @_; + my $com = "cat $infile | ./umaptable -uf > $gendir/$outfile"; + print "Executing $com\n"; + system($com); +} + +genuf($sufile, "sjis.uf"); +genuf($jufile, "jis0208.uf"); +if ( $#ujelist > 0 ) { + genuf($jeufile, "jis0208ext.uf"); +} else { + print "Extension is not found. jis0208ext.uf is not generated.\n"; +} +genuf("$jaufile $jrkufile", "jis0201.uf"); +# genuf($jaufile, "jis0201.uf"); +# genuf($jrkufile, "jis0201gl.uf"); + + +# generate test page + + +exit; + +sub sjistojis { + my($sjis) = (@_); + my($first,$second,$h, $l, $j0208); + + if ( $sjis !~ /^0x....$/ ) { + return ""; + } + + $first = hex(substr($sjis,2,2)); + $second = hex(substr($sjis,4,2)); + $jnum=0; + + if($first < 0xE0) + { + $jnum = ($first - 0x81) * ((0xfd - 0x80)+(0x7f - 0x40)); + } else { + $jnum = ($first - 0xe0 + (0xa0-0x81)) * ((0xfd - 0x80)+(0x7f - 0x40)); + } + if($second >= 0x80) + { + $jnum += $second - 0x80 + (0x7f-0x40); + } + else + { + $jnum += $second - 0x40; + } + if(($jnum / 94 ) < 94) { + return sprintf "0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); + } else { + #return sprintf "# 0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); + return ""; + } +} + |