diff options
Diffstat (limited to 'intl/uconv/tools/jamap.pl')
-rw-r--r-- | intl/uconv/tools/jamap.pl | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/intl/uconv/tools/jamap.pl b/intl/uconv/tools/jamap.pl new file mode 100644 index 000000000..7316d891f --- /dev/null +++ b/intl/uconv/tools/jamap.pl @@ -0,0 +1,334 @@ +#!/usr/local/bin/perl +use strict; + +my @source_files; + +my @sjis_h; +$sjis_h[0] = -1; +@sjis_h[0x81..0x9f] = map { 0x2100 + $_ * 0x200 } (0 .. 30); +@sjis_h[0xe0..0xef] = map { 0x5F00 + $_ * 0x200 } (0 .. 15); +@sjis_h[0xf0..0xf9] = (-2) x 10; +my @sjis_l; +@sjis_l[0x40..0x7e] = (0x21..0x5f); +@sjis_l[0x80..0xfc] = (0x60..0x7e, 0x121..0x17e); + +sub sjis_to_jis { + my ($s) = @_; + my $j; + my $type; + + my $h = $sjis_h[($s>>8)&0xff]; + + if ( $h > 0 ) { # jis0208 + + my $l = $sjis_l[$s&0xff]; + if ( $l == 0 ) { + $j = $s; + $type = 'sjis2undef'; + } else { + $j = $h + $l; + if ( $j >= 0x3000 && $j < 0x7500 ) { # jis0208 kanji + $type = 'jis0208'; + } elsif ( $j < 0x2900 ) { # jis0208 + $type = 'jis0208'; + } else { + $type = 'jis0208undef'; + } + } + + } elsif ( $h == -1 ) { # single byte + + $j = $s; + if ( $s <= 0x7f ) { # jis0201 roman + $type = 'jis0201'; + } elsif ( $s >= 0xa1 && $s <= 0xdf ) { # jis0201 kana + $type = 'jis0201'; + } else { # sjis single byte undefined + $type = 'sjis1undef'; + } + + } elsif ( $h == -2 ) { # private use + $j = $s; + $type = 'private'; + + } else { # sjis undefined + $j = $s; + $type = 'sjis2undef'; + } + + return ($j, $type); +} + + +sub read_sjis_map { + my ($filename, $s_col, $u_col) = @_; + my %map; + open MAP, $filename or die $!; + while (<MAP>) { + my @cols = split /\s+/; + my ($s, $u) = @cols[$s_col, $u_col]; + $s =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; + + $s = oct($s); + $u = oct($u); + + my ($j, $type) = sjis_to_jis($s); + push @{$map{$type}}, [$j, $s, $u]; + + } + close MAP or warn $!; + push @source_files, $filename; + return %map; +} + +sub read_0212_map { + my ($filename, $j_col, $u_col) = @_; + my $map; + open MAP, $filename or die $!; + while (<MAP>) { + my @cols = split /\s+/; + my ($j, $u) = @cols[$j_col, $u_col]; + $j =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; + + $j = oct($j); + $u = oct($u); + $u = 0xff5e if $u == 0x007e; + + push @$map, [$j, 0, $u]; + } + close MAP or warn $!; + push @source_files, $filename; + return $map; +} + + +my %printed; +sub write_fromu_map { + my ($filename, $code, @maps) = @_; + open MAP, ">$filename" or die $!; + foreach my $map (@maps) { + foreach my $pair (@$map) { + my ($j, $s, $u) = @$pair; + if ( $code eq 'sjis' ) { + $j = $s; + } + if ( defined($printed{$u}) ) { + if ( $printed{$u} ne $j ) { + printf "conflict 0x%04x to 0x%04x, 0x%04x\n", $u, $printed{$u}, $j; + } + } else { + if ( $j < 0x100 ) { + printf MAP "0x%02X\t0x%04X\n", $j, $u; + } else { + printf MAP "0x%04X\t0x%04X\n", $j, $u; + } + $printed{$u} = $j; + } + } + } + close MAP or warn $!; +} + +my @table; +my %table; +my $table_next_count = 0; + +sub get_94table_index { + my ($map_table) = @_; + my $key = join ',', map {int($map_table->[$_])} (0 .. 93); + my $table_index = $table{$key}; + if ( !defined($table_index) ) { + $table_index = $table_next_count; + $table_next_count += 94; + $table[$table_index] = $map_table; + $table{$key} = $table_index; + } + return $table_index; +} + +sub get_188table_index { + my ($map_table) = @_; + my $map_table1 = [ @{$map_table}[0 .. 93] ]; + my $map_table2 = [ @{$map_table}[94 .. 187] ]; + my $key = join ',', map {int($map_table->[$_])} (0 .. 187); + my $key1 = join ',', map {int($map_table1->[$_])} (0 .. 93); + my $key2 = join ',', map {int($map_table2->[$_])} (0 .. 93); + my $table_index = $table{$key}; + if ( !defined($table_index) ) { + $table_index = $table_next_count; + $table_next_count += 188; + $table[$table_index] = $map_table1; + $table[$table_index + 94] = $map_table2; + $table{$key} = $table_index; + $table{$key1} = $table_index unless defined($table{$key1}); + $table{$key2} = $table_index + 94 unless defined($table{$key2}); + } + return $table_index; +} + +get_188table_index([]); + +sub print_sjis_table_index { + my @maps = @_; + my %map_table; + foreach my $map (@maps) { + foreach my $pair (@$map) { + my ($j, $s, $u) = @$pair; + my $row = $s >> 8; + my $cell = $s&0xff; + if ( $cell >= 0x40 && $cell <= 0x7e ) { + $cell -= 0x40; + } elsif ( $cell >= 0x80 && $cell <= 0xfc ) { + $cell -= 0x41; + } else { + next; + } + if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { + print "conflict!\n"; + } + $map_table{$row}->[$cell] = $u; + } + } + + for ( my $i = 0x80; $i < 0x100; $i++ ) { + if ( ($i & 0x7) == 0 ) { + print MAP "\n "; + } + if ( $i >= 0xa1 && $i <= 0xdf ) { + printf MAP " 0x%04X,", $i + 0xfec0; + } elsif ( $i >= 0xf0 && $i <= 0xf9 ) { + printf MAP " 0x%04X,", 0xe000 + ($i - 0xf0) * 188; + } elsif ( $i == 0x80 ) { + print MAP " 0xFFFD,"; + } elsif ( $i == 0xa0 ) { + print MAP " 0xF8F0,"; + } elsif ( $i >= 0xfd ) { + printf MAP " 0x%04X,", $i + (0xf8f1 - 0xfd); + } else { + my $table_index = get_188table_index($map_table{$i}); + printf MAP " %6d,", $table_index; + } + } +} + +sub print_jis_table_index { + my @maps = @_; + my %map_table; + foreach my $map (@maps) { + foreach my $pair (@$map) { + my ($j, $s, $u) = @$pair; + my $row = $j >> 8; + my $cell = ($j&0xff) - 0x21; + if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { + print "conflict!\n"; + } + $map_table{$row}->[$cell] = $u; + } + } + + for ( my $i = 0; $i < 0x80; $i++ ) { + if ( ($i & 0x7) == 0 ) { + print MAP "\n "; + } + if ( $i >= 0x21 && $i <= 0x7e ) { + my $table_index = get_94table_index($map_table{$i}); + printf MAP " %6d,", $table_index; + } else { + print MAP " 0xFFFD,"; + } + } +} + +sub print_table_index { + my ($map_name, @maps) = @_; + print MAP "static const uint16_t g${map_name}IndexShiftJis[] = {"; + print_sjis_table_index(@maps); + print MAP "\n};\n"; + print MAP "static const uint16_t g${map_name}IndexJis0208[] = {"; + print_jis_table_index(@maps); + print MAP "\n};\n"; + print MAP "static const uint16_t * const g${map_name}Index[] = {"; + print MAP "\n g${map_name}IndexShiftJis, g${map_name}IndexJis0208"; + print MAP "\n};\n\n"; +} + +sub print_0212_table_index { + my ($map_name, @maps) = @_; + print MAP "static const uint16_t g${map_name}Index[] = {"; + print_jis_table_index(@maps); + print MAP "\n};\n\n"; +} + + +sub print_table { + print MAP "static const uint16_t gJapaneseMap[] = {"; + for ( my $i = 0; $i < $table_next_count; $i += 94 ) { + my $index = $i; + print MAP "\n /* index $index */\n "; + my $map_table = $table[$i]; + my $print_count = 1; + for ( my $j = 0; $j < 94; $j++ ) { + my $u = $map_table->[$j]; + if ( $u == 0 ) { $u = 0xfffd; } + printf MAP " 0x%04X,", $u; + if ( ++$print_count == 8 ) { + print MAP "\n "; + $print_count = 0; + } + } + } + print MAP "\n};\n"; +} + + +my %cp932 = read_sjis_map('CP932.TXT', 0, 1); +my $jis0212 = read_0212_map('JIS0212.TXT', 0, 1); + +%printed = (); +write_fromu_map('jis0201-uf-unify', 'jis', + $cp932{jis0201}, +); +write_fromu_map('jis0208-uf-unify', 'jis', + $cp932{jis0208}, +); + +%printed = (); +write_fromu_map('jis0208ext-uf-unify', 'jis', + $cp932{jis0208undef}, +); + +%printed = (); +write_fromu_map('sjis-uf-unify', 'sjis', + @cp932{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'}, +); + +open MAP, ">japanese.map" or die $!; +binmode MAP; + +while (<DATA>) { + if ( /^!/ ) { last; } + print MAP; +} +print MAP "/* generated by jamap.pl @source_files */\n\n"; +print MAP <<EOM; +// IE-compatible handling of undefined codepoints: +// 0x80 --> U+0080 +// 0xa0 --> U+F8F0 +// 0xfd --> U+F8F1 +// 0xfe --> U+F8F2 +// 0xff --> U+F8F3 +EOM + +print_table_index('CP932', @cp932{'jis0208', 'jis0208undef', 'sjis2undef'}); +print_0212_table_index('JIS0212', $jis0212); +print_table(); + +close MAP or warn $!; + +__DATA__ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +! |