diff options
Diffstat (limited to 'intl/lwbrk/tools')
-rw-r--r-- | intl/lwbrk/tools/anzx4051.html | 669 | ||||
-rw-r--r-- | intl/lwbrk/tools/anzx4051.pl | 356 | ||||
-rw-r--r-- | intl/lwbrk/tools/jisx4051class.txt | 159 | ||||
-rw-r--r-- | intl/lwbrk/tools/jisx4051simp.txt | 24 | ||||
-rw-r--r-- | intl/lwbrk/tools/spec_table.html | 127 |
5 files changed, 1335 insertions, 0 deletions
diff --git a/intl/lwbrk/tools/anzx4051.html b/intl/lwbrk/tools/anzx4051.html new file mode 100644 index 000000000..d894ce811 --- /dev/null +++ b/intl/lwbrk/tools/anzx4051.html @@ -0,0 +1,669 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<HTML> +<HEAD> +<TITLE> +Analysis of JIS X 4051 to Unicode General Category Mapping +</TITLE> +</HEAD> +<BODY> +<H1> +Analysis of JIS X 4051 to Unicode General Category Mapping +</H1> +<TABLE BORDER=3> +<TR BGCOLOR=blue><TH><TH> +<TD BGCOLOR=red>C</TD> +<TD BGCOLOR=red>L</TD> +<TD BGCOLOR=red>M</TD> +<TD BGCOLOR=red>N</TD> +<TD BGCOLOR=red>P</TD> +<TD BGCOLOR=red>S</TD> +<TD BGCOLOR=red>Z</TD> +<TD BGCOLOR=white>Total</TD> +<TD BGCOLOR=yellow>Cc</TD> +<TD BGCOLOR=yellow>Cf</TD> +<TD BGCOLOR=yellow>Co</TD> +<TD BGCOLOR=yellow>Cs</TD> +<TD BGCOLOR=yellow>Ll</TD> +<TD BGCOLOR=yellow>Lm</TD> +<TD BGCOLOR=yellow>Lo</TD> +<TD BGCOLOR=yellow>Lt</TD> +<TD BGCOLOR=yellow>Lu</TD> +<TD BGCOLOR=yellow>Mc</TD> +<TD BGCOLOR=yellow>Me</TD> +<TD BGCOLOR=yellow>Mn</TD> +<TD BGCOLOR=yellow>Nd</TD> +<TD BGCOLOR=yellow>Nl</TD> +<TD BGCOLOR=yellow>No</TD> +<TD BGCOLOR=yellow>Pc</TD> +<TD BGCOLOR=yellow>Pd</TD> +<TD BGCOLOR=yellow>Pe</TD> +<TD BGCOLOR=yellow>Pf</TD> +<TD BGCOLOR=yellow>Pi</TD> +<TD BGCOLOR=yellow>Po</TD> +<TD BGCOLOR=yellow>Ps</TD> +<TD BGCOLOR=yellow>Sc</TD> +<TD BGCOLOR=yellow>Sk</TD> +<TD BGCOLOR=yellow>Sm</TD> +<TD BGCOLOR=yellow>So</TD> +<TD BGCOLOR=yellow>Zl</TD> +<TD BGCOLOR=yellow>Zp</TD> +<TD BGCOLOR=yellow>Zs</TD> +</TR> +<TR><TH>00_1<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>14</TD> +<TD>1</TD> +<TD></TD> +<TD BGCOLOR=white>15</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD>2</TD> +<TD>11</TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>01_[a]<TH> +<TD></TD> +<TD>32</TD> +<TD>2</TD> +<TD></TD> +<TD>28</TD> +<TD>3</TD> +<TD></TD> +<TD BGCOLOR=white>65</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>8</TD> +<TD>24</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>2</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD>12</TD> +<TD>1</TD> +<TD></TD> +<TD>14</TD> +<TD></TD> +<TD></TD> +<TD>2</TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>02_7<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD BGCOLOR=white>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>03_8<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD BGCOLOR=white>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>04_9<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>5</TD> +<TD></TD> +<TD></TD> +<TD BGCOLOR=white>5</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>5</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>05_[b]<TH> +<TD>33</TD> +<TD>153</TD> +<TD></TD> +<TD>33</TD> +<TD>2</TD> +<TD>5</TD> +<TD>13</TD> +<TD BGCOLOR=white>239</TD> +<TD>32</TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>153</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>33</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>2</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>5</TD> +<TD></TD> +<TD></TD> +<TD>13</TD> +</TR> +<TR><TH>06_15<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>30</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD BGCOLOR=white>30</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>30</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>07_18<TH> +<TD>18</TD> +<TD>157</TD> +<TD></TD> +<TD>33</TD> +<TD>56</TD> +<TD>125</TD> +<TD>2</TD> +<TD BGCOLOR=white>391</TD> +<TD></TD> +<TD>18</TD> +<TD></TD> +<TD></TD> +<TD>64</TD> +<TD>7</TD> +<TD>5</TD> +<TD></TD> +<TD>81</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>3</TD> +<TD>30</TD> +<TD>4</TD> +<TD>5</TD> +<TD>2</TD> +<TD></TD> +<TD>5</TD> +<TD>36</TD> +<TD>4</TD> +<TD></TD> +<TD>3</TD> +<TD>24</TD> +<TD>98</TD> +<TD>1</TD> +<TD>1</TD> +<TD></TD> +</TR> +<TR><TH>08_COMPLEX<TH> +<TD></TD> +<TD>54</TD> +<TD>33</TD> +<TD>20</TD> +<TD>2</TD> +<TD>1</TD> +<TD></TD> +<TD BGCOLOR=white>110</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD>53</TD> +<TD></TD> +<TD></TD> +<TD>11</TD> +<TD></TD> +<TD>22</TD> +<TD>10</TD> +<TD></TD> +<TD>10</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>2</TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>09_[c]<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>3</TD> +<TD>4</TD> +<TD></TD> +<TD BGCOLOR=white>7</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>3</TD> +<TD>2</TD> +<TD></TD> +<TD>2</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>0A_[d]<TH> +<TD>1</TD> +<TD>2</TD> +<TD></TD> +<TD>6</TD> +<TD>28</TD> +<TD>14</TD> +<TD></TD> +<TD BGCOLOR=white>51</TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>6</TD> +<TD></TD> +<TD></TD> +<TD>3</TD> +<TD>3</TD> +<TD></TD> +<TD>22</TD> +<TD></TD> +<TD>2</TD> +<TD>3</TD> +<TD>7</TD> +<TD>2</TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>0B_[e]<TH> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD>1</TD> +<TD>3</TD> +<TD BGCOLOR=white>6</TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>3</TD> +</TR> +<TR><TH>X<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD BGCOLOR=white>0</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +</TABLE> +<TABLE BORDER=3> +<TR BGCOLOR=blue><TH><TH> +<TD BGCOLOR=red>00_1</TD> +<TD BGCOLOR=red>01_[a]</TD> +<TD BGCOLOR=red>02_7</TD> +<TD BGCOLOR=red>03_8</TD> +<TD BGCOLOR=red>04_9</TD> +<TD BGCOLOR=red>05_[b]</TD> +<TD BGCOLOR=red>06_15</TD> +<TD BGCOLOR=red>07_18</TD> +<TD BGCOLOR=red>08_COMPLEX</TD> +<TD BGCOLOR=red>09_[c]</TD> +<TD BGCOLOR=red>0A_[d]</TD> +<TD BGCOLOR=red>0B_[e]</TD> +<TD BGCOLOR=red>X</TD> +</TR> +<TR><TH>00<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>33</TD> +<TD>10</TD> +<TD>127</TD> +<TD></TD> +<TD>7</TD> +<TD>44</TD> +<TD>2</TD> +<TD></TD> +</TR> +<TR><TH>0E<TH> +<TD>1</TD> +<TD>6</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>20</TD> +<TD>1</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>17<TH> +<TD>2</TD> +<TD>4</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>110</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>20<TH> +<TD>2</TD> +<TD>8</TD> +<TD>1</TD> +<TD></TD> +<TD>5</TD> +<TD>13</TD> +<TD></TD> +<TD>100</TD> +<TD></TD> +<TD></TD> +<TD>7</TD> +<TD>4</TD> +<TD></TD> +</TR> +<TR><TH>21<TH> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>1</TD> +<TD></TD> +<TD>32</TD> +<TD></TD> +<TD>163</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +<TR><TH>30<TH> +<TD>10</TD> +<TD>47</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD>161</TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +<TD></TD> +</TR> +</TABLE> diff --git a/intl/lwbrk/tools/anzx4051.pl b/intl/lwbrk/tools/anzx4051.pl new file mode 100644 index 000000000..b13315b38 --- /dev/null +++ b/intl/lwbrk/tools/anzx4051.pl @@ -0,0 +1,356 @@ +#!/usr/bin/perl +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +###################################################################### +# +# Initial global variable +# +###################################################################### +%utot = (); +$ui=0; +$li=0; + +###################################################################### +# +# Open the unicode database file +# +###################################################################### +open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt") + || die "cannot find UnicodeData-Latest.txt"; + +###################################################################### +# +# Open the JIS X 4051 Class file +# +###################################################################### +open ( CLASS , "< jisx4051class.txt") + || die "cannot find jisx4051class.txt"; + +###################################################################### +# +# Open the JIS X 4051 Class simplified mapping +# +###################################################################### +open ( SIMP , "< jisx4051simp.txt") + || die "cannot find jisx4051simp.txt"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( OUT , "> anzx4051.html") + || die "cannot open output anzx4051.html file"; + +###################################################################### +# +# Open the output file +# +###################################################################### +open ( HEADER , "> ../src/jisx4051class.h") + || die "cannot open output ../src/jisx4051class.h file"; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$hthmlheader = <<END_OF_HTML; +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<HTML> +<HEAD> +<TITLE> +Analysis of JIS X 4051 to Unicode General Category Mapping +</TITLE> +</HEAD> +<BODY> +<H1> +Analysis of JIS X 4051 to Unicode General Category Mapping +</H1> +END_OF_HTML +print OUT $hthmlheader; + +###################################################################### +# +# Generate license and header +# +###################################################################### +$npl = <<END_OF_NPL; +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* + DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY + mozilla/intl/lwbrk/tools/anzx4051.pl + */ +END_OF_NPL +print HEADER $npl; + +%occ = (); +%gcat = (); +%dcat = (); +%simp = (); +%gcount = (); +%dcount = (); +%sccount = (); +%rangecount = (); + +###################################################################### +# +# Process the file line by line +# +###################################################################### +while(<UNICODATA>) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + $c = $f[0]; # The unicode value + $g = $f[2]; + $d = substr($g, 0, 1); + + $gcat{$c} = $g; + $dcat{$c} = $d; + $gcount{$g}++; + $dcount{$d}++; +} +close(UNIDATA); + +while(<SIMP>) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + + $simp{$f[0]} = $f[1]; + $sccount{$f[1]}++; +} +close(SIMP); + +sub GetClass{ + my ($u) = @_; + my $hex = DecToHex($u); + $g = $gcat{$hex}; + if($g ne "") { + return $g; + } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { + return "Han"; + } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { + return "Lo"; + } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { + return "Cs"; + } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { + return "Cs"; + } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { + return "Cs"; + } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { + return "Co"; + } else { + printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex; + } +} +sub GetDClass{ + my ($u) = @_; + my $hex = DecToHex($u); + $g = $dcat{$hex}; + if($g ne "") { + return $g; + } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) { + return "Han"; + } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) { + return "L"; + } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) { + return "C"; + } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) { + return "C"; + } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) { + return "C"; + } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) { + return "C"; + } else { + printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex; + } +} +sub DecToHex{ + my ($d) = @_; + return sprintf("%04X", $d); +} +%gtotal = (); +%dtotal = (); +while(<CLASS>) { + chop; + ###################################################################### + # + # Get value from fields + # + ###################################################################### + @f = split(/;/ , $_); + + if( substr($f[2], 0, 1) ne "a") + { + $sc = $simp{$f[2]}; + $l = hex($f[0]); + if($f[1] eq "") + { + $h = $l; + } else { + $h = hex($f[1]); + } + for($k = $l; $k <= $h ; $k++) + { + if( exists($occ{$k})) + { + # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n", + # DecToHex($k), $occ{$k} , $f[2] , $sc; + } + else + { + $occ{$k} = $sc . " | " . $f[2]; + $gclass = GetClass($k); + $dclass = GetDClass($k); + $gtotal{$sc . $gclass}++; + $dtotal{$sc . $dclass}++; + $u = DecToHex($k); + $rk = " " . substr($u,0,2) . ":" . $sc; + $rangecount{$rk}++; + } + } + } +} + +#print %gtotal; +#print %dtotal; + +sub printreport +{ + print OUT "<TABLE BORDER=3>\n"; + print OUT "<TR BGCOLOR=blue><TH><TH>\n"; + + foreach $d (sort(keys %dcount)) { + print OUT "<TD BGCOLOR=red>$d</TD>\n"; + } + + print OUT "<TD BGCOLOR=white>Total</TD>\n"; + foreach $g (sort(keys %gcount)) { + print OUT "<TD BGCOLOR=yellow>$g</TD>\n"; + } + print OUT "</TR>\n"; + foreach $sc (sort(keys %sccount)) { + + print OUT "<TR><TH>$sc<TH>\n"; + + $total = 0; + foreach $d (sort (keys %dcount)) { + $count = $dtotal{$sc . $d}; + $total += $count; + print OUT "<TD>$count</TD>\n"; + } + + print OUT "<TD BGCOLOR=white>$total</TD>\n"; + + foreach $g (sort(keys %gcount)) { + $count = $gtotal{$sc . $g}; + print OUT "<TD>$count</TD>\n"; + } + + + print OUT "</TR>\n"; + } + print OUT "</TABLE>\n"; + + + print OUT "<TABLE BORDER=3>\n"; + print OUT "<TR BGCOLOR=blue><TH><TH>\n"; + + foreach $sc (sort(keys %sccount)) + { + print OUT "<TD BGCOLOR=red>$sc</TD>\n"; + } + + print OUT "</TR>\n"; + + + for($rr = 0; $rr < 0x4f; $rr++) + { + $empty = 0; + $r = sprintf("%02X" , $rr) ; + $tmp = "<TR><TH>" . $r . "<TH>\n"; + + foreach $sc (sort(keys %sccount)) { + $count = $rangecount{ " " .$r . ":" .$sc}; + $tmp .= sprintf("<TD>%s</TD>\n", $count); + $empty += $count; + } + + $tmp .= "</TR>\n"; + + if($empty ne 0) + { + print OUT $tmp; + } + } + print OUT "</TABLE>\n"; + +} +printreport(); + +sub printarray +{ + my($r, $def) = @_; +printf "[%s || %s]\n", $r, $def; + $k = hex($r) * 256; + printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r; + for($i = 0 ; $i < 256; $i+= 8) + { + for($j = 7 ; $j >= 0; $j-- ) + { + $v = $k + $i + $j; + if( exists($occ{$v})) + { + $p = substr($occ{$v}, 1,1); + } else { + $p = $def; + } + + if($j eq 7 ) + { + printf HEADER "0x%s" , $p; + } else { + printf HEADER "%s", $p ; + } + } + printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7); + } + print HEADER "};\n\n"; +} +printarray("00", "7"); +printarray("20", "7"); +printarray("21", "7"); +printarray("30", "5"); +printarray("0E", "8"); +printarray("17", "7"); + +#print %rangecount; + +###################################################################### +# +# Close files +# +###################################################################### +close(HEADER); +close(CLASS); +close(OUT); + diff --git a/intl/lwbrk/tools/jisx4051class.txt b/intl/lwbrk/tools/jisx4051class.txt new file mode 100644 index 000000000..5b26b7267 --- /dev/null +++ b/intl/lwbrk/tools/jisx4051class.txt @@ -0,0 +1,159 @@ +0000;001f;17 +0020;;17 +0024;;24 +0027;;18 +0028;;22 +002D;;18 +002F;;18 +0021;002F;23 +0030;0039;15 +003C;;22 +003A;003F;23 +0040;;18 +0041;005A;18 +005B;;22 +005E;;18 +005F;;18 +005B;005F;23 +0060;;18 +0061;007A;18 +007B;;22 +007B;007E;23 +00A0;;24 +00A3;;22 +00A5;;22 +00A9;;18 +00AA;;18 +00AB;;18 +00AC;;22 +00AE;;18 +00AF;;18 +00A1;00BF;23 +00B0;;18 +00F7;;23 +00C0;00FF;18 +0E3F;;1 +0E2F;;4 +0E46;;4 +0E5A;0E5B;4 +0E50;0E59;15 +0E4F;;18 +0EAF;;4 +0EC6;;4 +0ED0;0ED9;15 +1735;1736;1 +17D4;17D5;4 +17D8;;4 +17DA;;4 +1780;17DD;21 +17E0;17E9;21 +17F0;17F9;21 +2007;;24 +2000;200B;17 +200C;200F;18 +2010;;18 +2011;;24 +2012;2013;18 +2014;;7 +2015;;18 +2016;2017;18 +2019;;23 +201D;;23 +2018;201F;18 +2020;2023;18 +2024;2026;23 +2027;;23 +2028;202E;18 +202F;;24 +2030;2034;9 +2035;2038;18 +2039;;1 +203A;;2 +203B;;12 +203C;203D;3 +203E;;23 +203F;2043;18 +2044;;3 +2045;;1 +2046;;2 +2047;2049;3 +204A;205E;18 +205F;;17 +2060;;24 +2061;2063;18 +206A;206F;18 +2070;2071;18 +2074;208E;18 +2090;2094;18 +2116;;8 +2160;217F;12 +2190;21EA;a12 +2126;;18 +2100;2138;18 +2153;2182;18 +2190;21EA;18 +3008;;1 +300A;;1 +300C;;1 +300E;;1 +3010;;1 +3014;;1 +3016;;1 +3018;;1 +301A;;1 +301D;;1 +3001;;2 +3009;;2 +300B;;2 +300D;;2 +300F;;2 +3011;;2 +3015;;2 +3017;;2 +3019;;2 +301B;;2 +301E;;2 +301F;;2 +3005;;3 +301C;;3 +3041;;3 +3043;;3 +3045;;3 +3047;;3 +3049;;3 +3063;;3 +3083;;3 +3085;;3 +3087;;3 +308E;;3 +309D;;3 +309E;;3 +30A1;;3 +30A3;;3 +30A5;;3 +30A7;;3 +30A9;;3 +30C3;;3 +30E3;;3 +30E5;;3 +30E7;;3 +30EE;;3 +30F5;;3 +30F6;;3 +30FC;;3 +30FD;;3 +30FE;;3 +30FB;;5 +3002;;6 +3000;;10 +3042;3094;11 +3099;309E;3 +3003;;12 +3004;;12 +3006;;12 +3007;;12 +3012;;12 +3013;;12 +3020;;12 +3036;;12 +30A2;30FA;12 diff --git a/intl/lwbrk/tools/jisx4051simp.txt b/intl/lwbrk/tools/jisx4051simp.txt new file mode 100644 index 000000000..e12a7fd80 --- /dev/null +++ b/intl/lwbrk/tools/jisx4051simp.txt @@ -0,0 +1,24 @@ +1;00_1 +2;01_[a] +3;01_[a] +4;01_[a] +5;01_[a] +6;01_[a] +7;02_7 +8;03_8 +9;04_9 +10;05_[b] +11;05_[b] +12;05_[b] +13;X +14;X +15;06_15 +16;X +17;05_[b] +18;07_18 +19;X +20;X +21;08_COMPLEX +22;09_[c] +23;0A_[d] +24;0B_[e] diff --git a/intl/lwbrk/tools/spec_table.html b/intl/lwbrk/tools/spec_table.html new file mode 100644 index 000000000..519f98c53 --- /dev/null +++ b/intl/lwbrk/tools/spec_table.html @@ -0,0 +1,127 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<title></title> +<style type="text/css"> +table { + border: solid 1px; + border-collapse: collapse; +} +tbody, tfoot { + border-top: solid 2px; +} +td, th { + border: solid 1px; +} +td { + text-align: center; +} +</style> +</head> +<body> +<p>This is a specification table for line breaking.</p> +<p>The values of IE7 and Opera9: 'A' means that the line is breakable After the character, and 'B' means Before. 'BA' means Before and After.</p> +<p>(C) which is the tail of the IE7 and the Opera9 means Character. (N) means Numeric. +This means that they are around the character at testing. E.g., "a$a" is a testcase for (C), "0$0" is a testcase for (N).</p> +<p>Gecko is not breaking the lines on most western language context. But for file paths, URLs and very long word which is connected hyphens, +some characters might be breakable. They are 'breakable' in the table. However, they are not always breakable, +they <em>depend on the context</em> in the word.</p> +<table border="1"> +<thead> +<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr> +</thead> +<tfoot> +<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr> +</tfoot> +<tbody> +<tr><th>0x21</th><th>!</th><td></td><td>A</td><td>A</td><td></td><td></td></tr> +<tr><th>0x22</th><th>"</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x23</th><th>#</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x24</th><th>$</th><td></td><td></td><td>B</td><td></td><td></td></tr> +<tr><th>0x25</th><th>%</th><td>breakable</td><td>A</td><td>A</td><td></td><td></td></tr> +<tr><th>0x26</th><th>&</th><td>breakable</td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x27</th><th>'</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x28</th><th>(</th><td></td><td>B</td><td>B</td><td></td><td></td></tr> +<tr><th>0x29</th><th>)</th><td></td><td>A</td><td>A</td><td></td><td></td></tr> +<tr><th>0x2A</th><th>*</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x2B</th><th>+</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x2C</th><th>,</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x2D</th><th>-</th><td>breakable</td><td>BA</td><td>BA</td><td>A</td><td>A</td></tr> +<tr><th>0x2E</th><th>.</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x2F</th><th>/</th><td>breakable</td><td></td><td></td><td>A</td><td>A</td></tr> +</tbody> +<tbody> +<tr><th>0x3A</th><th>:</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x3B</th><th>;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x3C</th><th><</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x3D</th><th>=</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x3E</th><th>></th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x3F</th><th>?</th><td></td><td>A</td><td>A</td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0x40</th><th>@</th><td></td><td></td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0x5B</th><th>[</th><td></td><td>B</td><td>B</td><td></td><td></td></tr> +<tr><th>0x5C</th><th>\</th><td>breakable</td><td></td><td>B</td><td></td><td></td></tr> +<tr><th>0x5D</th><th>]</th><td></td><td>A</td><td>A</td><td></td><td></td></tr> +<tr><th>0x5E</th><th>^</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0x5F</th><th>_</th><td></td><td></td><td></td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0x60</th><th>`</th><td></td><td></td><td></td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0x7B</th><th>{</th><td></td><td>B</td><td>B</td><td></td><td></td></tr> +<tr><th>0x7C</th><th>|</th><td></td><td></td><td></td><td>A</td><td>A</td></tr> +<tr><th>0x7D</th><th>}</th><td></td><td>A</td><td>A</td><td></td><td></td></tr> +<tr><th>0x7E</th><th>~</th><td></td><td></td><td></td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0xA1</th><th>¡</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xA2</th><th>¢</th><td></td><td>A</td><td>A</td><td></td><td></td></tr> +<tr><th>0xA3</th><th>£</th><td></td><td></td><td>B</td><td></td><td></td></tr> +<tr><th>0xA4</th><th>¤</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xA5</th><th>¥</th><td></td><td></td><td>B</td><td></td><td></td></tr> +<tr><th>0xA6</th><th>¦</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xA7</th><th>§</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xA8</th><th>¨</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xA9</th><th>©</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xAA</th><th>ª</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xAB</th><th>«</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xAC</th><th>¬</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xAE</th><th>®</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xAF</th><th>¯</th><td></td><td></td><td></td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0xB0</th><th>°</th><td></td><td>A</td><td>A</td><td></td><td></td></tr> +<tr><th>0xB1</th><th>±</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xB2</th><th>²</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xB3</th><th>³</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xB4</th><th>´</th><td></td><td></td><td></td><td>B</td><td>B</td></tr> +<tr><th>0xB5</th><th>µ</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xB6</th><th>¶</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xB7</th><th>·</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xB8</th><th>¸</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xB9</th><th>¹</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xBA</th><th>º</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xBB</th><th>»</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xBC</th><th>¼</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xBD</th><th>½</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xBE</th><th>¾</th><td></td><td></td><td></td><td></td><td></td></tr> +<tr><th>0xBF</th><th>¿</th><td></td><td></td><td></td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0xD7</th><th>×</th><td></td><td></td><td></td><td></td><td></td></tr> +</tbody> +<tbody> +<tr><th>0xF7</th><th>÷</th><td></td><td></td><td></td><td></td><td></td></tr> +</tbody> +</table> +</body> +</html> |